Skip to content
This repository has been archived by the owner on Dec 3, 2020. It is now read-only.

Commit

Permalink
#36: Incorporate feedback from Osmose and erikrose.
Browse files Browse the repository at this point in the history
  • Loading branch information
biancadanforth committed Aug 20, 2018
1 parent 5b0aba8 commit 117fcdd
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 131 deletions.
11 changes: 11 additions & 0 deletions src/config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/**
* Config values that are shared between files or otherwise useful to have in
* a separate file.
*/

// Minimum score to be considered the "correct" feature element extracted by Fathom
export const SCORE_THRESHOLD = 4; // eslint-disable-line import/prefer-default-export
3 changes: 2 additions & 1 deletion src/fathom_coefficients.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"hasPriceInClassNameCoeff": 2,
"isAboveTheFoldPriceCoeff": 33,
"isAboveTheFoldImageCoeff": 13,
"isNearbyImageXAxisCoeff": 5,
"isNearbyImageXAxisPriceCoeff": 5,
"isNearbyImageYAxisTitleCoeff": 5,
"hasPriceishPatternCoeff": 15
}
40 changes: 22 additions & 18 deletions src/fathom_extraction.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,32 +19,36 @@ import {
hasPriceInClassNameCoeff,
isAboveTheFoldPriceCoeff,
isAboveTheFoldImageCoeff,
isNearbyImageXAxisCoeff,
isNearbyImageXAxisPriceCoeff,
isNearbyImageYAxisTitleCoeff,
hasPriceishPatternCoeff,
} from 'commerce/fathom_coefficients.json';
import {SCORE_THRESHOLD} from 'commerce/config';

const PRODUCT_FEATURES = ['title', 'price', 'image'];
const SCORE_THRESHOLD = 4;
const {rulesetMaker} = productRuleset.get('product');
const rulesetWithCoeffs = rulesetMaker([
largerImageCoeff,
largerFontSizeCoeff,
hasDollarSignCoeff,
hasPriceInIDCoeff,
hasPriceInClassNameCoeff,
isAboveTheFoldPriceCoeff,
isAboveTheFoldImageCoeff,
isNearbyImageXAxisPriceCoeff,
isNearbyImageYAxisTitleCoeff,
hasPriceishPatternCoeff,
]);

/**
* Extracts the highest scoring element above a score threshold
* contained in a page's HTML document.
*/
function runRuleset(doc) {
const rulesetOutput = rulesetWithCoeffs.against(doc);
const extractedElements = {};
const rules = productRuleset.get('product').rulesetMaker;
for (const feature of PRODUCT_FEATURES) {
let fnodesList = rules([
largerImageCoeff,
largerFontSizeCoeff,
hasDollarSignCoeff,
hasPriceInIDCoeff,
hasPriceInClassNameCoeff,
isAboveTheFoldPriceCoeff,
isAboveTheFoldImageCoeff,
isNearbyImageXAxisCoeff,
hasPriceishPatternCoeff,
]).against(doc).get(`${feature}`);
let fnodesList = rulesetOutput.get(feature);
fnodesList = fnodesList.filter(fnode => fnode.scoreFor(`${feature}ish`) >= SCORE_THRESHOLD);
// It is possible for multiple elements to have the same highest score.
if (fnodesList.length >= 1) {
Expand All @@ -69,10 +73,10 @@ export default function extractProduct(doc) {
const extractedElements = runRuleset(doc);
if (hasAllFeatures(extractedElements)) {
for (const feature of PRODUCT_FEATURES) {
extractedProduct[feature] = (feature === 'image'
? extractedElements[feature].src
: extractedElements[feature].innerText
);
if (feature === 'image') {
extractedProduct[feature] = extractedElements[feature].src;
}
extractedProduct[feature] = extractedElements[feature].innerText;
}
}
return hasAllFeatures(extractedProduct) ? extractedProduct : null;
Expand Down
Loading

0 comments on commit 117fcdd

Please sign in to comment.