Skip to content
This repository has been archived by the owner on Dec 3, 2020. It is now read-only.

Commit

Permalink
Fix #181: Drop scores of product data within shopping carts.
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Kelly committed Oct 25, 2018
1 parent 9813ba8 commit 414a6af
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/extraction/fathom/coefficients.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@
"isNearbyImageXAxisPriceCoeff": 5,
"isNearbyImageYAxisTitleCoeff": 5,
"largerFontSizeCoeff": 7,
"largerImageCoeff": 2
"largerImageCoeff": 2,
"isChildOfCartClassedNodeCoeff": 0.8
}
28 changes: 28 additions & 0 deletions src/extraction/fathom/ruleset_factory.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export default class RulesetFactory {
this.isNearbyImageYAxisTitleCoeff,
this.largerFontSizeCoeff,
this.largerImageCoeff,
this.isChildOfCartClassedNodeCoeff,
] = coefficients;
}

Expand Down Expand Up @@ -169,6 +170,27 @@ export default class RulesetFactory {
return DEFAULT_SCORE;
}

/**
* Score based on whether the node has a parent element with a class name
* starting with the word "cart". Helps avoid products in shopping carts.
*/
isChildOfCartClassedNode(fnode) {
let cartParentCount = 0;
let element = fnode.element;
for (let k = 0; (k < 8) && element && element.className; k++) {
if (element.className.includes('cart')) {
cartParentCount++;
}
element = element.parentNode;
}

if (cartParentCount > 0) {
return cartParentCount * this.isChildOfCartClassedNodeCoeff;
}

return DEFAULT_SCORE;
}

/**
* Scores fnode whose innerText matches a priceish RegExp pattern
*/
Expand Down Expand Up @@ -279,6 +301,8 @@ export default class RulesetFactory {
rule(type('imageish'), score(fnode => this.isAboveTheFold(fnode, this.isAboveTheFoldImageCoeff))),
// better score for larger images
rule(type('imageish'), score(this.largerImage.bind(this))),
// worse score for being inside a shopping cart
rule(type('imageish'), score(this.isChildOfCartClassedNode.bind(this))),
// return image element(s) with max score
rule(type('imageish').max(), out('image')),

Expand All @@ -289,6 +313,8 @@ export default class RulesetFactory {
rule(dom('h1').when(this.isEligibleTitle.bind(this)), type('titleish')),
// better score based on y-axis proximity to max scoring image element
rule(type('titleish'), score(this.isNearbyImageYAxisTitle.bind(this))),
// worse score for being inside a shopping cart
rule(type('titleish'), score(this.isChildOfCartClassedNode.bind(this))),
// return title element(s) with max score
rule(type('titleish').max(), out('title')),

Expand All @@ -311,6 +337,8 @@ export default class RulesetFactory {
rule(type('priceish'), score(this.isNearbyImageXAxisPrice.bind(this))),
// check if innerText has a priceish pattern
rule(type('priceish'), score(this.hasPriceishPattern.bind(this))),
// worse score for being inside a shopping cart
rule(type('priceish'), score(this.isChildOfCartClassedNode.bind(this))),
// return price element(s) with max score
rule(type('priceish').max(), out('price')),
);
Expand Down

0 comments on commit 414a6af

Please sign in to comment.