Skip to content
This repository has been archived by the owner on Dec 3, 2020. It is now read-only.

Commit

Permalink
Incorporate Osmose's feedback and rebase.
Browse files Browse the repository at this point in the history
  • Loading branch information
biancadanforth committed Aug 2, 2018
1 parent e3f5f9a commit d364866
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 131 deletions.
2 changes: 1 addition & 1 deletion src/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
browser.runtime.onConnect.addListener((port) => {
port.onMessage.addListener((message) => {
if (message.from === 'content' && message.subject === 'ready') {
console.log(message.data); // eslint-disable-line no-console
console.log(message.extractedProduct); // eslint-disable-line no-console
}
});
port.postMessage({
Expand Down
3 changes: 3 additions & 0 deletions src/fathom_coefficients.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"hasDivWithPriceClass": 1
}
75 changes: 34 additions & 41 deletions src/fathom_ruleset.js
Original file line number Diff line number Diff line change
@@ -1,61 +1,54 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
/** This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/*
* Using fathom to extract a product from its product page,
* Using Fathom to extract a product from its product page,
* where a 'product' is defined by the bundle of features that
* makes it identifiable.
*
* Features: Title, Image, Price
*
* Note that this page is defined in manifest.json to run at "document_idle"
* which is after all DOM content has been loaded.
*
* Features: title, image, price
*/

import {dom, out, rule, ruleset, score, type} from 'fathom-web';
import fathomCoeffs from 'commerce/fathom_coefficients.json';

const tuningRoutines = {
price: {routine: tunedPriceFnodes, coeffs: [2]},
};

/*
* Remove dollar sign, strip whitespace, strip words (anything not numeric
* or a price symbol), and remove trailing zeros
/**
* Checks to see if an element is a <div> with a class of "price".
* Returns an integer corresponding to the coefficient to use for
* scoring an element with this rule.
*/
function formatPrice(priceString) {
const formattedPriceStr = priceString.replace('$', '').replace(/([\s]|[^0-9$.-])/g, '');
return parseFloat(formattedPriceStr.substr(formattedPriceStr.indexOf('$') + 1));
function hasDivWithPriceClass(fnode) {
if (fnode.element.classList.contains('price')) {
return fathomCoeffs.hasDivWithPriceClass;
}
return 1;
}

/*
* Ruleset for product prices
/**
* Ruleset for product features. Each feature has its own type.
*/
function tunedPriceFnodes(coeffHasDivWithPriceClass = 1) {
function hasDivWithPriceClass(fnode) {
if (fnode.element.classList.contains('price')) {
return coeffHasDivWithPriceClass;
}
return 1;
}

const rules = ruleset(
// get all elements that could contain the price
rule(dom('div'), type('priceish')),
const rules = ruleset(
// get all elements that could contain the price
rule(dom('div'), type('priceish')),

// check class names to see if they contain 'price'
rule(type('priceish'), score(hasDivWithPriceClass)),
// check class names to see if they contain 'price'
rule(type('priceish'), score(hasDivWithPriceClass)),

// return price element with max score
rule(type('priceish').max(), out('product-price')),
);
// return price element with max score
rule(type('priceish').max(), out('product-price')),
);

function tuningRoutine(doc) {
return rules.against(doc).get('product-price');
/**
* Extracts the highest scoring element for a given feature contained
* in a page's HTML document.
*/
export default function runTuningRoutine(doc) {
const fnodesList = rules.against(doc).get('product-price');
// It is possible for multiple elements to have the same highest score.
const elementsList = fnodesList.map(fnode => fnode.element);
if (elementsList.length === 1) {
return elementsList[0];
}

return tuningRoutine;
return null;
}

export {tuningRoutines, formatPrice};
187 changes: 98 additions & 89 deletions src/product_info.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

import {tuningRoutines, formatPrice} from 'commerce/fathom_ruleset';
/**
* Note that this page is defined in manifest.json to run at "document_idle"
* which is after all DOM content has been loaded.
*/

import runTuningRoutine from 'commerce/fathom_ruleset';
import {retry} from 'commerce/utils';
import extractionData from './product_extraction_data.json';
import extractionData from 'commerce/product_extraction_data.json';

const OPEN_GRAPH_PROPERTY_VALUES = {
title: 'og:title',
Expand Down Expand Up @@ -50,102 +55,106 @@ async function openBackgroundPort() {
}
}());

/**
* Checks to see if any product information for the page was found,
* and if so, sends it to the background script via the port.
*/
async function getProductInfo(port) {
port.postMessage({
from: 'content',
subject: 'ready',
data: {
price: runRuleset('price', tuningRoutines.price.coeffs) || extractData().price,
url: window.document.URL,
},
});
}

/**
* Returns any extraction data found for the vendor based on the URL
* for the page.
*/
function getProductAttributeInfo() {
const hostname = new URL(window.location.href).host;
for (const [vendor, attributeInfo] of Object.entries(extractionData)) {
if (hostname.includes(vendor)) {
return attributeInfo;
const fallbackExtraction = {
/**
* Returns any extraction data found for the vendor based on the URL
* for the page.
*/
getProductAttributeInfo() {
const hostname = new URL(window.location.href).host;
for (const [vendor, attributeInfo] of Object.entries(extractionData)) {
if (hostname.includes(vendor)) {
return attributeInfo;
}
}
}
return null;
}
return null;
},

/**
* Extracts and returns the string value for a given element property or attribute.
*
* @param {HTMLElement} element
* @param {string} extractionProperty
*/
function extractValueFromElement(element, extractionProperty) {
switch (extractionProperty) {
case 'content':
return element.getAttribute('content');
case 'innerText':
return element.innerText;
case 'src':
return element.src;
default:
throw new Error(`Unrecognized extraction property or attribute '${extractionProperty}'.`);
}
}
/**
* Extracts and returns the string value for a given element property or attribute.
*
* @param {HTMLElement} element
* @param {string} extractionProperty
*/
extractValueFromElement(element, extractionProperty) {
switch (extractionProperty) {
case 'content':
return element.getAttribute('content');
case 'innerText':
return element.innerText;
case 'src':
return element.src;
default:
throw new Error(`Unrecognized extraction property or attribute '${extractionProperty}'.`);
}
},

/**
* Returns any product information available on the page from CSS
* selectors if they exist, otherwise from Open Graph <meta> tags.
*/
function extractData() {
const data = {};
const attributeInfo = getProductAttributeInfo();
if (attributeInfo) {
for (const [productAttribute, extractor] of Object.entries(attributeInfo)) {
const {selectors, extractUsing} = extractor;
for (const selector of selectors) {
const element = document.querySelector(selector);
if (element) {
data[productAttribute] = extractValueFromElement(element, extractUsing);
if (data[productAttribute]) {
break;
} else {
throw new Error(`Element found did not return a valid product ${productAttribute}.`);
/**
* Returns any product information available on the page from CSS
* selectors if they exist, otherwise from Open Graph <meta> tags.
*/
extractProduct() {
const data = {};
const attributeInfo = this.getProductAttributeInfo();
if (attributeInfo) {
for (const [productAttribute, extractor] of Object.entries(attributeInfo)) {
const {selectors, extractUsing} = extractor;
for (const selector of selectors) {
const element = document.querySelector(selector);
if (element) {
data[productAttribute] = this.extractValueFromElement(element, extractUsing);
if (data[productAttribute]) {
break;
} else {
throw new Error(`Element found did not return a valid product ${productAttribute}.`);
}
} else if (selector === selectors[selectors.length - 1]) {
// None of the selectors matched an element on the page
throw new Error(`No elements found with vendor data for product ${productAttribute}.`);
}
} else if (selector === selectors[selectors.length - 1]) {
// None of the selectors matched an element on the page
throw new Error(`No elements found with vendor data for product ${productAttribute}.`);
}
}
} else {
for (const [key, value] of Object.entries(OPEN_GRAPH_PROPERTY_VALUES)) {
const metaEle = document.querySelector(`meta[property='${value}']`);
if (metaEle) {
data[key] = metaEle.getAttribute('content');
}
}
}
} else {
for (const [key, value] of Object.entries(OPEN_GRAPH_PROPERTY_VALUES)) {
const metaEle = document.querySelector(`meta[property='${value}']`);
if (metaEle) {
data[key] = metaEle.getAttribute('content');
data.url = window.document.URL;
return data;
},
};

const fathomExtraction = {
/*
* Run the ruleset for the product features against the current window document
*/
extractProduct() {
const priceEle = runTuningRoutine(window.document);
if (priceEle) {
const price = (priceEle.tagName !== 'META') ? priceEle.textContent : priceEle.getAttribute('content');
if (price) {
return {
price,
url: window.document.URL,
};
}
}
}
return data;
}
return null;
},
};

/*
* Run the ruleset for a feature against the current window document
* @param {string} feature title/image/price
* @param {object} array of tuning coeffs if any
/**
* Checks to see if any product information for the page was found,
* and if so, sends it to the background script via the port.
*/
function runRuleset(feature, coeffs = []) {
const tuningRoutine = tuningRoutines[feature].routine;
let gotText;
if (feature === 'price') {
// strip whitespace, dollar sign, words, and trailing zeros when comparing price
gotText = tuningRoutine(...coeffs)(window.document).map(fnode => fnode.element)[0];
gotText = formatPrice((gotText.tagName !== 'META') ? gotText.textContent : gotText.getAttribute('content'));
}
return gotText;
async function getProductInfo(port) {
const extractedProduct = fathomExtraction.extractProduct() || fallbackExtraction.extractProduct();
port.postMessage({
from: 'content',
subject: 'ready',
extractedProduct,
});
}

0 comments on commit d364866

Please sign in to comment.