diff --git a/src/config.js b/src/config.js index 0baa43f..3f9c3be 100644 --- a/src/config.js +++ b/src/config.js @@ -16,3 +16,6 @@ export const PRICE_CHECK_TIMEOUT_INTERVAL = 1000 * 60 * 15; // 15 minutes /** Delay before removing iframes created during price checks */ export const IFRAME_TIMEOUT = 1000 * 60; // 1 minute + +// Minimum score to be considered the "correct" feature element extracted by Fathom +export const SCORE_THRESHOLD = 4; diff --git a/src/fathom_coefficients.json b/src/fathom_coefficients.json index a90f91a..53d5d07 100644 --- a/src/fathom_coefficients.json +++ b/src/fathom_coefficients.json @@ -6,6 +6,7 @@ "hasPriceInClassNameCoeff": 2, "isAboveTheFoldPriceCoeff": 33, "isAboveTheFoldImageCoeff": 13, - "isNearbyImageXAxisCoeff": 5, + "isNearbyImageXAxisPriceCoeff": 5, + "isNearbyImageYAxisTitleCoeff": 5, "hasPriceishPatternCoeff": 15 } diff --git a/src/fathom_extraction.js b/src/fathom_extraction.js index b804b44..1af29b5 100644 --- a/src/fathom_extraction.js +++ b/src/fathom_extraction.js @@ -19,32 +19,36 @@ import { hasPriceInClassNameCoeff, isAboveTheFoldPriceCoeff, isAboveTheFoldImageCoeff, - isNearbyImageXAxisCoeff, + isNearbyImageXAxisPriceCoeff, + isNearbyImageYAxisTitleCoeff, hasPriceishPatternCoeff, } from 'commerce/fathom_coefficients.json'; +import {SCORE_THRESHOLD} from 'commerce/config'; const PRODUCT_FEATURES = ['title', 'price', 'image']; -const SCORE_THRESHOLD = 4; +const {rulesetMaker} = productRuleset.get('product'); +const rulesetWithCoeffs = rulesetMaker([ + largerImageCoeff, + largerFontSizeCoeff, + hasDollarSignCoeff, + hasPriceInIDCoeff, + hasPriceInClassNameCoeff, + isAboveTheFoldPriceCoeff, + isAboveTheFoldImageCoeff, + isNearbyImageXAxisPriceCoeff, + isNearbyImageYAxisTitleCoeff, + hasPriceishPatternCoeff, +]); /** * Extracts the highest scoring element above a score threshold * contained in a page's HTML document. */ function runRuleset(doc) { + const rulesetOutput = rulesetWithCoeffs.against(doc); const extractedElements = {}; - const rules = productRuleset.get('product').rulesetMaker; for (const feature of PRODUCT_FEATURES) { - let fnodesList = rules([ - largerImageCoeff, - largerFontSizeCoeff, - hasDollarSignCoeff, - hasPriceInIDCoeff, - hasPriceInClassNameCoeff, - isAboveTheFoldPriceCoeff, - isAboveTheFoldImageCoeff, - isNearbyImageXAxisCoeff, - hasPriceishPatternCoeff, - ]).against(doc).get(`${feature}`); + let fnodesList = rulesetOutput.get(feature); fnodesList = fnodesList.filter(fnode => fnode.scoreFor(`${feature}ish`) >= SCORE_THRESHOLD); // It is possible for multiple elements to have the same highest score. if (fnodesList.length >= 1) { @@ -69,10 +73,10 @@ export default function extractProduct(doc) { const extractedElements = runRuleset(doc); if (hasAllFeatures(extractedElements)) { for (const feature of PRODUCT_FEATURES) { - extractedProduct[feature] = (feature === 'image' - ? extractedElements[feature].src - : extractedElements[feature].innerText - ); + if (feature === 'image') { + extractedProduct[feature] = extractedElements[feature].src; + } + extractedProduct[feature] = extractedElements[feature].innerText; } } return hasAllFeatures(extractedProduct) ? extractedProduct : null; diff --git a/src/fathom_ruleset.js b/src/fathom_ruleset.js index bfcd0f9..f263abe 100644 --- a/src/fathom_ruleset.js +++ b/src/fathom_ruleset.js @@ -3,14 +3,14 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ import {dom, out, rule, ruleset, score, type} from 'fathom-web'; -import {ancestors} from 'fathom-web/utils'; // for training: utilsForFrontend +// For training, replace 'utils' with 'utilsForFrontend'. The mozilla/fathom-trainees +// add-on currently imports Fathom as a submodule +import {ancestors} from 'fathom-web/utils'; +import {SCORE_THRESHOLD} from 'commerce/config'; const DEFAULT_BODY_FONT_SIZE = 14; const DEFAULT_SCORE = 1; -const SCORE_THRESHOLD = 4; const TOP_BUFFER = 150; -const VIEWPORT_HEIGHT = window.innerHeight; -const VIEWPORT_WIDTH = window.innerWidth; // Taken from: https://github.com/mozilla/fathom-trainees/blob/master/src/trainees.js const ZEROISH = 0.08; const ONEISH = 0.9; @@ -50,7 +50,7 @@ trainees.set( 'product', // Ruleset name: 'product' for production and 'title', 'image' or 'price' for training { // For training only: input rule coefficients in order here - coeffs: [2, 7, 8, 17, 2, 33, 13, 5, 15], + coeffs: [2, 7, 8, 17, 2, 33, 13, 5, 5, 15], rulesetMaker([ largerImageCoeff, largerFontSizeCoeff, @@ -59,7 +59,8 @@ trainees.set( hasPriceInClassNameCoeff, isAboveTheFoldPriceCoeff, isAboveTheFoldImageCoeff, - isNearbyImageXAxisCoeff, + isNearbyImageXAxisPriceCoeff, + isNearbyImageYAxisTitleCoeff, hasPriceishPatternCoeff, ]) { /** @@ -78,14 +79,10 @@ trainees.set( * Scores fnode in proportion to its font size */ function largerFontSize(fnode) { - const sizeWithUnits = window.getComputedStyle(fnode.element).fontSize; - const size = sizeWithUnits.replace('px', ''); - if (size) { - // normalize the multiplier by the default font size - const sizeMultiplier = parseInt(size, 10) / DEFAULT_BODY_FONT_SIZE; - return (sizeMultiplier * largerFontSizeCoeff); - } - return DEFAULT_SCORE; + const size = window.getComputedStyle(fnode.element).fontSize; + // normalize the multiplier by the default font size + const sizeMultiplier = parseFloat(size, 10) / DEFAULT_BODY_FONT_SIZE; + return sizeMultiplier * largerFontSizeCoeff; } /** @@ -102,14 +99,12 @@ trainees.set( * Scores fnode with 'price' in its id or its parent's id */ function hasPriceInID(fnode) { - const element = fnode.element; - const parentElement = element.parentElement; - const ID = element.id; - const parentID = parentElement.id; - if (ID.includes('price') || ID.includes('Price')) { + const id = fnode.element.id; + const parentID = fnode.element.parentElement.id; + if (id.toLowerCase().includes('price')) { return hasPriceInIDCoeff; } - if (parentID.includes('price') || parentID.includes('Price')) { + if (parentID.toLowerCase().includes('price')) { return 0.75 * hasPriceInIDCoeff; } return DEFAULT_SCORE; @@ -119,31 +114,28 @@ trainees.set( * Scores fnode with 'price' in its class name or its parent's class name */ function hasPriceInClassName(fnode) { - const element = fnode.element; - const parentElement = element.parentElement; - const className = element.className; - const parentClassName = parentElement.className; - if (className.includes('price') || className.includes('Price')) { + const className = fnode.element.className; + const parentClassName = fnode.element.parentElement.className; + if (className.toLowerCase().includes('price')) { return hasPriceInClassNameCoeff; } - if (parentClassName.includes('price') || parentClassName.includes('Price')) { + if (parentClassName.toLowerCase().includes('price')) { return 0.75 * hasPriceInClassNameCoeff; } return DEFAULT_SCORE; } - /** - * Checks if fnode is visible - */ function isVisible(fnode) { - const element = fnode.element; - for (const ancestor of ancestors(element)) { + for (const ancestor of ancestors(fnode.element)) { const style = getComputedStyle(ancestor); - if (style.visibility === 'hidden' + const isElementHidden = ( + style.visibility === 'hidden' || style.display === 'none' || style.opacity === '0' || style.width === '0' - || style.height === '0') { + || style.height === '0' + ); + if (isElementHidden) { return false; } } @@ -151,79 +143,66 @@ trainees.set( } /** - * Scale a number to the range [ZEROISH, ONEISH]. - * - * Taken from: https://github.com/mozilla/fathom-trainees - * - * For a rising trapezoid, the result is ZEROISH until the input - * reaches zeroAt, then increases linearly until oneAt, at which it - * becomes ONEISH. To make a falling trapezoid, where the result is - * ONEISH to the left and ZEROISH to the right, use a zeroAt greater - * than oneAt. + * Scores fnode by its vertical location relative to the fold */ - function trapezoid(number, zeroAt, oneAt) { - const isRising = zeroAt < oneAt; - if (isRising) { - if (number <= zeroAt) { - return ZEROISH; - } - if (number >= oneAt) { - return ONEISH; - } - } else { - if (number >= zeroAt) { - return ZEROISH; - } - if (number <= oneAt) { - return ONEISH; - } + function isAboveTheFold(fnode, featureCoeff) { + const viewportHeight = window.innerHeight; + const top = fnode.element.getBoundingClientRect().top; + const upperHeightLimit = viewportHeight * 2; + // Use a falling trapezoid function to score the element + // Taken from: https://github.com/mozilla/fathom-trainees + if (top >= upperHeightLimit) { + return ZEROISH * featureCoeff; + } + if (top <= viewportHeight) { + return ONEISH * featureCoeff; } - const slope = (ONEISH - ZEROISH) / (oneAt - zeroAt); - return slope * (number - zeroAt) + ZEROISH; + // slope = deltaY / deltaX + const slope = (ONEISH - ZEROISH) / (viewportHeight - upperHeightLimit); + // y = mx + b, where m = slope and b = y-intercept + return (slope * (top - upperHeightLimit) + ZEROISH) * featureCoeff; } /** - * Scores fnode by its vertical location relative to the fold + * Checks to see if a 'priceish' fnode is eligible for scoring + * Note: This is a compound method, because `.when` chaining these methods + * onto a `dom` rule does not currently work. i.e. + * `rule(dom('span, h2') + * .when(isVisible) + * .when(hasDifferentInnerTextThanChildren) + * .when(isNearbyImageYAxisPrice)), + * type('priceish')),` + * ...is replaced with: + * `rule(dom('span, h2').when(isEligiblePrice), type('priceish')),` */ - function isAboveTheFold(fnode, featureCoeff) { - const domRect = fnode.element.getBoundingClientRect(); - // Use a falling trapezoid to score the element; - // result is ONEISH until the input reaches VIEWPORT_HEIGHT, then decreases - // linearly until VIEWPORT_HEIGHT * 2, where it becomes ZEROISH. - return trapezoid(domRect.top, VIEWPORT_HEIGHT * 2, VIEWPORT_HEIGHT) * featureCoeff; + function isEligiblePrice(fnode) { + return ( + isVisible(fnode) + && hasDifferentInnerTextThanChildren(fnode) + && isNearbyImageYAxisPrice(fnode) + ); } /** - * Checks to see if fnode is eligible for scoring - * Note: This is a compound method, because `.when` chaining these methods onto - * a `dom` rule does not currently work. + * Checks to see if a 'titleish' fnode is eligible for scoring */ - function isEligible(fnode, featureType) { - if (featureType === 'priceish') { - return ( - isVisible(fnode) - && removeRedundantAncestors(fnode) - && isNearbyImageYAxis(fnode) - ); - } - if (featureType === 'titleish') { - return ( - isVisible(fnode) - /** - * Don't removeRedundantAncestors, because