diff --git a/.gitignore b/.gitignore index a26de97..11393fe 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ node_modules web-ext-artifacts build gecko.log +.DS_Store diff --git a/src/fathom_coefficients.json b/src/fathom_coefficients.json index d54851e..a90f91a 100644 --- a/src/fathom_coefficients.json +++ b/src/fathom_coefficients.json @@ -1,9 +1,11 @@ { - "largerImageCoeff": 3, - "largerFontSizeCoeff": 1, - "hasDollarSignCoeff": 3, - "hasTitleInIDCoeff": 10, - "hasTitleInClassNameCoeff": 5, - "isHiddenCoeff": -100, - "isHeaderElementCoeff": 10 + "largerImageCoeff": 2, + "largerFontSizeCoeff": 7, + "hasDollarSignCoeff": 8, + "hasPriceInIDCoeff": 17, + "hasPriceInClassNameCoeff": 2, + "isAboveTheFoldPriceCoeff": 33, + "isAboveTheFoldImageCoeff": 13, + "isNearbyImageXAxisCoeff": 5, + "hasPriceishPatternCoeff": 15 } diff --git a/src/fathom_extraction.js b/src/fathom_extraction.js index 65790d2..b804b44 100644 --- a/src/fathom_extraction.js +++ b/src/fathom_extraction.js @@ -15,10 +15,12 @@ import { largerImageCoeff, largerFontSizeCoeff, hasDollarSignCoeff, - hasTitleInIDCoeff, - hasTitleInClassNameCoeff, - isHiddenCoeff, - isHeaderElementCoeff, + hasPriceInIDCoeff, + hasPriceInClassNameCoeff, + isAboveTheFoldPriceCoeff, + isAboveTheFoldImageCoeff, + isNearbyImageXAxisCoeff, + hasPriceishPatternCoeff, } from 'commerce/fathom_coefficients.json'; const PRODUCT_FEATURES = ['title', 'price', 'image']; @@ -36,10 +38,12 @@ function runRuleset(doc) { largerImageCoeff, largerFontSizeCoeff, hasDollarSignCoeff, - hasTitleInIDCoeff, - hasTitleInClassNameCoeff, - isHiddenCoeff, - isHeaderElementCoeff, + hasPriceInIDCoeff, + hasPriceInClassNameCoeff, + isAboveTheFoldPriceCoeff, + isAboveTheFoldImageCoeff, + isNearbyImageXAxisCoeff, + hasPriceishPatternCoeff, ]).against(doc).get(`${feature}`); fnodesList = fnodesList.filter(fnode => fnode.scoreFor(`${feature}ish`) >= SCORE_THRESHOLD); // It is possible for multiple elements to have the same highest score. diff --git a/src/fathom_ruleset.js b/src/fathom_ruleset.js index 39ef04e..bfcd0f9 100644 --- a/src/fathom_ruleset.js +++ b/src/fathom_ruleset.js @@ -3,10 +3,17 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ import {dom, out, rule, ruleset, score, type} from 'fathom-web'; +import {ancestors} from 'fathom-web/utils'; // for training: utilsForFrontend const DEFAULT_BODY_FONT_SIZE = 14; const DEFAULT_SCORE = 1; +const SCORE_THRESHOLD = 4; +const TOP_BUFFER = 150; const VIEWPORT_HEIGHT = window.innerHeight; +const VIEWPORT_WIDTH = window.innerWidth; +// Taken from: https://github.com/mozilla/fathom-trainees/blob/master/src/trainees.js +const ZEROISH = 0.08; +const ONEISH = 0.9; /** * Rulesets to train. @@ -40,17 +47,20 @@ trainees.set( * out-rule of interest are the same. A multi-out ruleset will not work without * commenting out all but one `out` and setting the ruleset name to that `out`. */ - 'product', // 'product' for production and 'title', 'image' or 'price' for training + 'product', // Ruleset name: 'product' for production and 'title', 'image' or 'price' for training { - coeffs: [3, 1, 3, 10, 5, -100, 10], // Input rule coefficients in order here + // For training only: input rule coefficients in order here + coeffs: [2, 7, 8, 17, 2, 33, 13, 5, 15], rulesetMaker([ - coeffLargerImage, - coeffLargerFontSize, - coeffHasDollarSign, - coeffHasTitleInID, - coeffHasTitleInClassName, - coeffIsHidden, - coeffIsHeaderElement, + largerImageCoeff, + largerFontSizeCoeff, + hasDollarSignCoeff, + hasPriceInIDCoeff, + hasPriceInClassNameCoeff, + isAboveTheFoldPriceCoeff, + isAboveTheFoldImageCoeff, + isNearbyImageXAxisCoeff, + hasPriceishPatternCoeff, ]) { /** * Scores fnode in direct proportion to its size @@ -61,11 +71,11 @@ trainees.set( if (area === 0) { return DEFAULT_SCORE; } - return area * coeffLargerImage; + return area * largerImageCoeff; } /** - * Scores fnode in direct proportion to its font size + * Scores fnode in proportion to its font size */ function largerFontSize(fnode) { const sizeWithUnits = window.getComputedStyle(fnode.element).fontSize; @@ -73,7 +83,7 @@ trainees.set( if (size) { // normalize the multiplier by the default font size const sizeMultiplier = parseInt(size, 10) / DEFAULT_BODY_FONT_SIZE; - return (sizeMultiplier * coeffLargerFontSize); + return (sizeMultiplier * largerFontSizeCoeff); } return DEFAULT_SCORE; } @@ -83,82 +93,214 @@ trainees.set( */ function hasDollarSign(fnode) { if (fnode.element.innerText.includes('$')) { - return coeffHasDollarSign; + return hasDollarSignCoeff; } return DEFAULT_SCORE; } /** - * Scores fnode with "title" in its id + * Scores fnode with 'price' in its id or its parent's id */ - function hasTitleInID(fnode) { - const id = fnode.element.id; - if (id.includes('title') || id.includes('Title')) { - return coeffHasTitleInID; + function hasPriceInID(fnode) { + const element = fnode.element; + const parentElement = element.parentElement; + const ID = element.id; + const parentID = parentElement.id; + if (ID.includes('price') || ID.includes('Price')) { + return hasPriceInIDCoeff; + } + if (parentID.includes('price') || parentID.includes('Price')) { + return 0.75 * hasPriceInIDCoeff; } return DEFAULT_SCORE; } /** - * Scores fnode with "title" in a class name + * Scores fnode with 'price' in its class name or its parent's class name */ - function hasTitleInClassName(fnode) { - const className = fnode.element.className; - if (className.includes('title') || className.includes('Title')) { - return coeffHasTitleInClassName; + function hasPriceInClassName(fnode) { + const element = fnode.element; + const parentElement = element.parentElement; + const className = element.className; + const parentClassName = parentElement.className; + if (className.includes('price') || className.includes('Price')) { + return hasPriceInClassNameCoeff; + } + if (parentClassName.includes('price') || parentClassName.includes('Price')) { + return 0.75 * hasPriceInClassNameCoeff; } return DEFAULT_SCORE; } /** - * Scores fnode that is hidden + * Checks if fnode is visible + */ + function isVisible(fnode) { + const element = fnode.element; + for (const ancestor of ancestors(element)) { + const style = getComputedStyle(ancestor); + if (style.visibility === 'hidden' + || style.display === 'none' + || style.opacity === '0' + || style.width === '0' + || style.height === '0') { + return false; + } + } + return true; + } + + /** + * Scale a number to the range [ZEROISH, ONEISH]. + * + * Taken from: https://github.com/mozilla/fathom-trainees + * + * For a rising trapezoid, the result is ZEROISH until the input + * reaches zeroAt, then increases linearly until oneAt, at which it + * becomes ONEISH. To make a falling trapezoid, where the result is + * ONEISH to the left and ZEROISH to the right, use a zeroAt greater + * than oneAt. + */ + function trapezoid(number, zeroAt, oneAt) { + const isRising = zeroAt < oneAt; + if (isRising) { + if (number <= zeroAt) { + return ZEROISH; + } + if (number >= oneAt) { + return ONEISH; + } + } else { + if (number >= zeroAt) { + return ZEROISH; + } + if (number <= oneAt) { + return ONEISH; + } + } + const slope = (ONEISH - ZEROISH) / (oneAt - zeroAt); + return slope * (number - zeroAt) + ZEROISH; + } + + /** + * Scores fnode by its vertical location relative to the fold + */ + function isAboveTheFold(fnode, featureCoeff) { + const domRect = fnode.element.getBoundingClientRect(); + // Use a falling trapezoid to score the element; + // result is ONEISH until the input reaches VIEWPORT_HEIGHT, then decreases + // linearly until VIEWPORT_HEIGHT * 2, where it becomes ZEROISH. + return trapezoid(domRect.top, VIEWPORT_HEIGHT * 2, VIEWPORT_HEIGHT) * featureCoeff; + } + + /** + * Checks to see if fnode is eligible for scoring + * Note: This is a compound method, because `.when` chaining these methods onto + * a `dom` rule does not currently work. + */ + function isEligible(fnode, featureType) { + if (featureType === 'priceish') { + return ( + isVisible(fnode) + && removeRedundantAncestors(fnode) + && isNearbyImageYAxis(fnode) + ); + } + if (featureType === 'titleish') { + return ( + isVisible(fnode) + /** + * Don't removeRedundantAncestors, because