Skip to content
This repository has been archived by the owner on Dec 3, 2020. It is now read-only.

Commit

Permalink
Fix #36: Incorporate more feedback from Osmose.
Browse files Browse the repository at this point in the history
  • Loading branch information
biancadanforth committed Aug 24, 2018
1 parent 4b1d448 commit fdef992
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 61 deletions.
10 changes: 5 additions & 5 deletions src/extraction/fathom_default_coefficients.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"largerImageCoeff": 2,
"largerFontSizeCoeff": 7,
"hasDollarSignCoeff": 8,
"hasPriceInIDCoeff": 17,
"hasPriceInClassNameCoeff": 2,
"isAboveTheFoldPriceCoeff": 33,
"hasPriceInIDCoeff": 17,
"hasPriceishPatternCoeff": 15,
"isAboveTheFoldImageCoeff": 13,
"isAboveTheFoldPriceCoeff": 33,
"isNearbyImageXAxisPriceCoeff": 5,
"isNearbyImageYAxisTitleCoeff": 5,
"hasPriceishPatternCoeff": 15
"largerFontSizeCoeff": 7,
"largerImageCoeff": 2
}
10 changes: 6 additions & 4 deletions src/extraction/fathom_extraction.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ import RulesetFactory from 'commerce/extraction/ruleset_factory';
import {SCORE_THRESHOLD} from 'commerce/config';

const PRODUCT_FEATURES = ['title', 'price', 'image'];
// Array of numbers corresponding to the coefficients
const coefficients = Object.values(defaultCoefficients);
// Array of numbers corresponding to the coefficients in order
const coefficients = RulesetFactory.getCoeffsInOrder(defaultCoefficients);
// For production, we don't need to generate a new ruleset factory
// and ruleset every time we run Fathom, since the coefficients are static.
const rulesetFactory = new RulesetFactory(coefficients);
Expand All @@ -28,8 +28,9 @@ const rules = rulesetFactory.makeRuleset();
*/
function runRuleset(doc) {
const extractedElements = {};
const results = rules.against(doc);
for (const feature of PRODUCT_FEATURES) {
let fnodesList = rules.against(doc).get(feature);
let fnodesList = results.get(feature);
fnodesList = fnodesList.filter(fnode => fnode.scoreFor(`${feature}ish`) >= SCORE_THRESHOLD);
// It is possible for multiple elements to have the same highest score.
if (fnodesList.length >= 1) {
Expand All @@ -56,8 +57,9 @@ export default function extractProduct(doc) {
for (const feature of PRODUCT_FEATURES) {
if (feature === 'image') {
extractedProduct[feature] = extractedElements[feature].src;
} else {
extractedProduct[feature] = extractedElements[feature].innerText;
}
extractedProduct[feature] = extractedElements[feature].innerText;
}
}
return hasAllFeatures(extractedProduct) ? extractedProduct : null;
Expand Down
45 changes: 34 additions & 11 deletions src/extraction/ruleset_factory.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,16 @@ export default class RulesetFactory {
*/
constructor(coefficients) {
[
this.largerImageCoeff,
this.largerFontSizeCoeff,
this.hasDollarSignCoeff,
this.hasPriceInIDCoeff,
this.hasPriceInClassNameCoeff,
this.isAboveTheFoldPriceCoeff,
this.hasPriceInIDCoeff,
this.hasPriceishPatternCoeff,
this.isAboveTheFoldImageCoeff,
this.isAboveTheFoldPriceCoeff,
this.isNearbyImageXAxisPriceCoeff,
this.isNearbyImageYAxisTitleCoeff,
this.hasPriceishPatternCoeff,
this.largerFontSizeCoeff,
this.largerImageCoeff,
] = coefficients;
}

Expand Down Expand Up @@ -113,15 +113,20 @@ export default class RulesetFactory {
const viewportHeight = window.innerHeight;
const top = fnode.element.getBoundingClientRect().top;
const upperHeightLimit = viewportHeight * 2;
// Use a falling trapezoid function to score the element
// Taken from: https://github.com/mozilla/fathom-trainees

// If the node is below the fold by more than a viewport's length,
// return a low score.
if (top >= upperHeightLimit) {
return ZEROISH * featureCoeff;
}

// If the node is above the fold, return a high score.
if (top <= viewportHeight) {
return ONEISH * featureCoeff;
}
// slope = deltaY / deltaX

// Otherwise, scale the score linearly between the fold and a viewport's
// length below it.
const slope = (ONEISH - ZEROISH) / (viewportHeight - upperHeightLimit);
return (slope * (top - upperHeightLimit) + ZEROISH) * featureCoeff;
}
Expand All @@ -132,7 +137,7 @@ export default class RulesetFactory {
isNearbyImageXAxisPrice(fnode) {
const viewportWidth = window.innerWidth;
const eleDOMRect = fnode.element.getBoundingClientRect();
const imageElement = fnode._ruleset.get('image')[0].element; // eslint-disable-line no-underscore-dangle
const imageElement = this.getHighestScoringImage(fnode);
const imageDOMRect = imageElement.getBoundingClientRect();
const deltaRight = eleDOMRect.left - imageDOMRect.right;
const deltaLeft = imageDOMRect.left - eleDOMRect.right;
Expand All @@ -157,7 +162,7 @@ export default class RulesetFactory {
isNearbyImageYAxisTitle(fnode) {
const viewportHeight = window.innerHeight;
const DOMRect = fnode.element.getBoundingClientRect();
const imageElement = fnode._ruleset.get('image')[0].element; // eslint-disable-line no-underscore-dangle
const imageElement = this.getHighestScoringImage(fnode);
const imageDOMRect = imageElement.getBoundingClientRect();
// Some titles (like on Ebay) are above the image, so include a top buffer
const isEleTopNearby = DOMRect.top >= (imageDOMRect.top - TOP_BUFFER);
Expand Down Expand Up @@ -240,7 +245,7 @@ export default class RulesetFactory {
isNearbyImageYAxisPrice(fnode) {
const element = fnode.element;
const DOMRect = element.getBoundingClientRect();
const imageElement = fnode._ruleset.get('image')[0].element; // eslint-disable-line no-underscore-dangle
const imageElement = this.getHighestScoringImage(fnode);
const imageDOMRect = imageElement.getBoundingClientRect();
if (DOMRect.top >= (imageDOMRect.top - TOP_BUFFER)
&& DOMRect.bottom <= imageDOMRect.bottom) {
Expand Down Expand Up @@ -319,4 +324,22 @@ export default class RulesetFactory {
rule(type('priceish').max(), out('price')),
);
}

/**
* Takes in a coefficients object and returns a coefficients array in the
* same order.
*/
static getCoeffsInOrder(coeffsObj) {
const coeffsKeys = Object.keys(coeffsObj);
coeffsKeys.sort(); // sort keys in string Unicode order
const coeffs = [];
for (const key of coeffsKeys) {
coeffs.push(coeffsObj[key]);
}
return coeffs;
}

getHighestScoringImage(fnode) {
return fnode._ruleset.get('image')[0].element; // eslint-disable-line no-underscore-dangle
}
}
58 changes: 17 additions & 41 deletions src/trainees.js → src/extraction/trainees.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/* eslint-disable import/no-unresolved */
// This file is moved up a level to the ./src folder for training
import defaultCoefficients from './extraction/fathom_default_coefficients.json';
import RulesetFactory from './extraction/ruleset_factory';

// Array of numbers corresponding to the coefficients
const coeffs = Object.values(defaultCoefficients);
// Array of numbers corresponding to the coefficients in order
const coeffs = RulesetFactory.getCoeffsInOrder(defaultCoefficients);

/**
* Rulesets to train using Fathom.
Expand Down Expand Up @@ -41,59 +43,33 @@ const coeffs = Object.values(defaultCoefficients);
* the freeze-dried library Fathom uses).
*/

function rulesetMaker(coefficients) {
// The coefficients are updated over time during training, so create a new factory for
// each iteration
const rulesetFactory = new RulesetFactory(coefficients);
return rulesetFactory.makeRuleset();
}

const trainees = new Map([
[
/**
* A ruleset that finds the main product image on a product page.
*/
'image', // Ruleset name
'image',
{
coeffs,
/**
* @param {Array.number} coefficients
*/
rulesetMaker(coefficients) {
// The coefficients are updated over time during training, so create a new factory for
// each iteration
const rulesetFactory = new RulesetFactory(coefficients);
return rulesetFactory.makeRuleset(); // The ruleset
},
rulesetMaker,
},
],
[
/**
* A ruleset that finds the main product title on a product page.
*/
'title', // Ruleset name
'title',
{
coeffs,
/**
* @param {Array.number} coefficients
*/
rulesetMaker(coefficients) {
// The coefficients are updated over time during training, so create a new factory for
// each iteration
const rulesetFactory = new RulesetFactory(coefficients);
return rulesetFactory.makeRuleset(); // The ruleset
},
rulesetMaker,
},
],
[
/**
* A ruleset that finds the main product price on a product page.
*/
'price', // Ruleset name
'price',
{
coeffs,
/**
* @param {Array.number} coefficients
*/
rulesetMaker(coefficients) {
// The coefficients are updated over time during training, so create a new factory for
// each iteration
const rulesetFactory = new RulesetFactory(coefficients);
return rulesetFactory.makeRuleset(); // The ruleset
},
rulesetMaker,
},
],
]);
Expand Down

0 comments on commit fdef992

Please sign in to comment.