This repository has been archived by the owner on Dec 3, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #45 from mozilla/36-fathom-rules
Fix #36: Add initial Fathom rules with 98.7% average training accuracy
- Loading branch information
Showing
12 changed files
with
502 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ node_modules | |
web-ext-artifacts | ||
build | ||
gecko.log | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{ | ||
"hasDollarSignCoeff": 8, | ||
"hasPriceInClassNameCoeff": 2, | ||
"hasPriceInIDCoeff": 17, | ||
"hasPriceishPatternCoeff": 15, | ||
"isAboveTheFoldImageCoeff": 13, | ||
"isAboveTheFoldPriceCoeff": 33, | ||
"isNearbyImageXAxisPriceCoeff": 5, | ||
"isNearbyImageYAxisTitleCoeff": 5, | ||
"largerFontSizeCoeff": 7, | ||
"largerImageCoeff": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
/* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | ||
|
||
/** | ||
* Uses Fathom to extract a product from its product page, | ||
* where a 'product' is defined by the bundle of features that | ||
* makes it identifiable. | ||
* | ||
* Features: title, image, price | ||
*/ | ||
|
||
import defaultCoefficients from 'commerce/extraction/fathom_default_coefficients.json'; | ||
import RulesetFactory from 'commerce/extraction/ruleset_factory'; | ||
|
||
const PRODUCT_FEATURES = ['title', 'price', 'image']; | ||
// Minimum score to be considered the "correct" feature element extracted by Fathom | ||
const SCORE_THRESHOLD = 4; | ||
// Array of numbers corresponding to the coefficients in order | ||
const coefficients = RulesetFactory.getCoeffsInOrder(defaultCoefficients); | ||
// For production, we don't need to generate a new ruleset factory | ||
// and ruleset every time we run Fathom, since the coefficients are static. | ||
const rulesetFactory = new RulesetFactory(coefficients); | ||
const rules = rulesetFactory.makeRuleset(); | ||
|
||
/** | ||
* Extracts the highest scoring element above a score threshold | ||
* contained in a page's HTML document. | ||
*/ | ||
function runRuleset(doc) { | ||
const extractedElements = {}; | ||
const results = rules.against(doc); | ||
for (const feature of PRODUCT_FEATURES) { | ||
let fnodesList = results.get(feature); | ||
fnodesList = fnodesList.filter(fnode => fnode.scoreFor(`${feature}ish`) >= SCORE_THRESHOLD); | ||
// It is possible for multiple elements to have the same highest score. | ||
if (fnodesList.length >= 1) { | ||
extractedElements[feature] = fnodesList[0].element; | ||
} | ||
} | ||
return extractedElements; | ||
} | ||
|
||
/** | ||
* Returns true if every key in PRODUCT_FEATURES has a truthy value. | ||
*/ | ||
function hasAllFeatures(obj) { | ||
return PRODUCT_FEATURES.map(key => obj[key]).every(val => val); | ||
} | ||
|
||
/* | ||
* Run the ruleset for the product features against the current window document | ||
*/ | ||
export default function extractProduct(doc) { | ||
const extractedProduct = {}; | ||
const extractedElements = runRuleset(doc); | ||
if (hasAllFeatures(extractedElements)) { | ||
for (const feature of PRODUCT_FEATURES) { | ||
if (feature === 'image') { | ||
extractedProduct[feature] = extractedElements[feature].src; | ||
} else { | ||
extractedProduct[feature] = extractedElements[feature].innerText; | ||
} | ||
} | ||
} | ||
return hasAllFeatures(extractedProduct) ? extractedProduct : null; | ||
} |
File renamed without changes.
Oops, something went wrong.