diff --git a/README.md b/README.md index 4fc09a6..7c802c8 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ After this, you can run `pipenv run test` to run the automated test suite. - `src/background` contains the background scripts that trigger UI elements (such as the page action) and periodically check for price updates. - `src/browser_action` contains the toolbar popup for managing the list of currently-tracked products. +- `src/extraction` contains the content scripts that extract product information from product web pages. - `src/page_action` contains the URL bar popup for viewing and tracking the product in the current tab. - `src/state` contains the Redux-based code for managing global extension state. - `src/tests` contains the automated test suite. diff --git a/src/extraction/fathom_default_coefficients.json b/src/extraction/fathom_default_coefficients.json index 53d5d07..7a70565 100644 --- a/src/extraction/fathom_default_coefficients.json +++ b/src/extraction/fathom_default_coefficients.json @@ -1,12 +1,12 @@ { - "largerImageCoeff": 2, - "largerFontSizeCoeff": 7, "hasDollarSignCoeff": 8, - "hasPriceInIDCoeff": 17, "hasPriceInClassNameCoeff": 2, - "isAboveTheFoldPriceCoeff": 33, + "hasPriceInIDCoeff": 17, + "hasPriceishPatternCoeff": 15, "isAboveTheFoldImageCoeff": 13, + "isAboveTheFoldPriceCoeff": 33, "isNearbyImageXAxisPriceCoeff": 5, "isNearbyImageYAxisTitleCoeff": 5, - "hasPriceishPatternCoeff": 15 + "largerFontSizeCoeff": 7, + "largerImageCoeff": 2 } diff --git a/src/extraction/fathom_extraction.js b/src/extraction/fathom_extraction.js index 0b59e84..40328ae 100644 --- a/src/extraction/fathom_extraction.js +++ b/src/extraction/fathom_extraction.js @@ -15,8 +15,8 @@ import RulesetFactory from 'commerce/extraction/ruleset_factory'; import {SCORE_THRESHOLD} from 'commerce/config'; const PRODUCT_FEATURES = ['title', 'price', 'image']; -// Array of numbers corresponding to the coefficients -const coefficients = Object.values(defaultCoefficients); +// Array of numbers corresponding to the coefficients in order +const coefficients = RulesetFactory.getCoeffsInOrder(defaultCoefficients); // For production, we don't need to generate a new ruleset factory // and ruleset every time we run Fathom, since the coefficients are static. const rulesetFactory = new RulesetFactory(coefficients); @@ -28,8 +28,9 @@ const rules = rulesetFactory.makeRuleset(); */ function runRuleset(doc) { const extractedElements = {}; + const results = rules.against(doc); for (const feature of PRODUCT_FEATURES) { - let fnodesList = rules.against(doc).get(feature); + let fnodesList = results.get(feature); fnodesList = fnodesList.filter(fnode => fnode.scoreFor(`${feature}ish`) >= SCORE_THRESHOLD); // It is possible for multiple elements to have the same highest score. if (fnodesList.length >= 1) { @@ -56,8 +57,9 @@ export default function extractProduct(doc) { for (const feature of PRODUCT_FEATURES) { if (feature === 'image') { extractedProduct[feature] = extractedElements[feature].src; + } else { + extractedProduct[feature] = extractedElements[feature].innerText; } - extractedProduct[feature] = extractedElements[feature].innerText; } } return hasAllFeatures(extractedProduct) ? extractedProduct : null; diff --git a/src/extraction/ruleset_factory.js b/src/extraction/ruleset_factory.js index 7e4744f..5f6834d 100644 --- a/src/extraction/ruleset_factory.js +++ b/src/extraction/ruleset_factory.js @@ -31,16 +31,16 @@ export default class RulesetFactory { */ constructor(coefficients) { [ - this.largerImageCoeff, - this.largerFontSizeCoeff, this.hasDollarSignCoeff, - this.hasPriceInIDCoeff, this.hasPriceInClassNameCoeff, - this.isAboveTheFoldPriceCoeff, + this.hasPriceInIDCoeff, + this.hasPriceishPatternCoeff, this.isAboveTheFoldImageCoeff, + this.isAboveTheFoldPriceCoeff, this.isNearbyImageXAxisPriceCoeff, this.isNearbyImageYAxisTitleCoeff, - this.hasPriceishPatternCoeff, + this.largerFontSizeCoeff, + this.largerImageCoeff, ] = coefficients; } @@ -113,15 +113,20 @@ export default class RulesetFactory { const viewportHeight = window.innerHeight; const top = fnode.element.getBoundingClientRect().top; const upperHeightLimit = viewportHeight * 2; - // Use a falling trapezoid function to score the element - // Taken from: https://github.com/mozilla/fathom-trainees + + // If the node is below the fold by more than a viewport's length, + // return a low score. if (top >= upperHeightLimit) { return ZEROISH * featureCoeff; } + + // If the node is above the fold, return a high score. if (top <= viewportHeight) { return ONEISH * featureCoeff; } - // slope = deltaY / deltaX + + // Otherwise, scale the score linearly between the fold and a viewport's + // length below it. const slope = (ONEISH - ZEROISH) / (viewportHeight - upperHeightLimit); return (slope * (top - upperHeightLimit) + ZEROISH) * featureCoeff; } @@ -132,7 +137,7 @@ export default class RulesetFactory { isNearbyImageXAxisPrice(fnode) { const viewportWidth = window.innerWidth; const eleDOMRect = fnode.element.getBoundingClientRect(); - const imageElement = fnode._ruleset.get('image')[0].element; // eslint-disable-line no-underscore-dangle + const imageElement = this.getHighestScoringImage(fnode); const imageDOMRect = imageElement.getBoundingClientRect(); const deltaRight = eleDOMRect.left - imageDOMRect.right; const deltaLeft = imageDOMRect.left - eleDOMRect.right; @@ -157,7 +162,7 @@ export default class RulesetFactory { isNearbyImageYAxisTitle(fnode) { const viewportHeight = window.innerHeight; const DOMRect = fnode.element.getBoundingClientRect(); - const imageElement = fnode._ruleset.get('image')[0].element; // eslint-disable-line no-underscore-dangle + const imageElement = this.getHighestScoringImage(fnode); const imageDOMRect = imageElement.getBoundingClientRect(); // Some titles (like on Ebay) are above the image, so include a top buffer const isEleTopNearby = DOMRect.top >= (imageDOMRect.top - TOP_BUFFER); @@ -240,7 +245,7 @@ export default class RulesetFactory { isNearbyImageYAxisPrice(fnode) { const element = fnode.element; const DOMRect = element.getBoundingClientRect(); - const imageElement = fnode._ruleset.get('image')[0].element; // eslint-disable-line no-underscore-dangle + const imageElement = this.getHighestScoringImage(fnode); const imageDOMRect = imageElement.getBoundingClientRect(); if (DOMRect.top >= (imageDOMRect.top - TOP_BUFFER) && DOMRect.bottom <= imageDOMRect.bottom) { @@ -319,4 +324,22 @@ export default class RulesetFactory { rule(type('priceish').max(), out('price')), ); } + + /** + * Takes in a coefficients object and returns a coefficients array in the + * same order. + */ + static getCoeffsInOrder(coeffsObj) { + const coeffsKeys = Object.keys(coeffsObj); + coeffsKeys.sort(); // sort keys in string Unicode order + const coeffs = []; + for (const key of coeffsKeys) { + coeffs.push(coeffsObj[key]); + } + return coeffs; + } + + getHighestScoringImage(fnode) { + return fnode._ruleset.get('image')[0].element; // eslint-disable-line no-underscore-dangle + } } diff --git a/src/trainees.js b/src/extraction/trainees.js similarity index 61% rename from src/trainees.js rename to src/extraction/trainees.js index a8ce8fb..4254085 100644 --- a/src/trainees.js +++ b/src/extraction/trainees.js @@ -2,11 +2,13 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* eslint-disable import/no-unresolved */ +// This file is moved up a level to the ./src folder for training import defaultCoefficients from './extraction/fathom_default_coefficients.json'; import RulesetFactory from './extraction/ruleset_factory'; -// Array of numbers corresponding to the coefficients -const coeffs = Object.values(defaultCoefficients); +// Array of numbers corresponding to the coefficients in order +const coeffs = RulesetFactory.getCoeffsInOrder(defaultCoefficients); /** * Rulesets to train using Fathom. @@ -41,59 +43,33 @@ const coeffs = Object.values(defaultCoefficients); * the freeze-dried library Fathom uses). */ +function rulesetMaker(coefficients) { + // The coefficients are updated over time during training, so create a new factory for + // each iteration + const rulesetFactory = new RulesetFactory(coefficients); + return rulesetFactory.makeRuleset(); +} + const trainees = new Map([ [ - /** - * A ruleset that finds the main product image on a product page. - */ - 'image', // Ruleset name + 'image', { coeffs, - /** - * @param {Array.number} coefficients - */ - rulesetMaker(coefficients) { - // The coefficients are updated over time during training, so create a new factory for - // each iteration - const rulesetFactory = new RulesetFactory(coefficients); - return rulesetFactory.makeRuleset(); // The ruleset - }, + rulesetMaker, }, ], [ - /** - * A ruleset that finds the main product title on a product page. - */ - 'title', // Ruleset name + 'title', { coeffs, - /** - * @param {Array.number} coefficients - */ - rulesetMaker(coefficients) { - // The coefficients are updated over time during training, so create a new factory for - // each iteration - const rulesetFactory = new RulesetFactory(coefficients); - return rulesetFactory.makeRuleset(); // The ruleset - }, + rulesetMaker, }, ], [ - /** - * A ruleset that finds the main product price on a product page. - */ - 'price', // Ruleset name + 'price', { coeffs, - /** - * @param {Array.number} coefficients - */ - rulesetMaker(coefficients) { - // The coefficients are updated over time during training, so create a new factory for - // each iteration - const rulesetFactory = new RulesetFactory(coefficients); - return rulesetFactory.makeRuleset(); // The ruleset - }, + rulesetMaker, }, ], ]);