Skip to content
This repository has been archived by the owner on Dec 3, 2020. It is now read-only.

Commit

Permalink
Move Fathom and fallback extraction to separate modules.
Browse files Browse the repository at this point in the history
  • Loading branch information
biancadanforth committed Aug 2, 2018
1 parent 0980f10 commit a99d3ba
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 105 deletions.
88 changes: 88 additions & 0 deletions src/fallback_extraction.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/*
* Uses CSS selectors, or failing that, Open Graph <meta> tags to extract
* a product from its product page, where a 'product' is defined by the bundle
* of features that makes it identifiable.
*
* Features: title, image, price
*/

import extractionData from 'commerce/product_extraction_data.json';

const OPEN_GRAPH_PROPERTY_VALUES = {
title: 'og:title',
image: 'og:image',
price: 'og:price:amount',
};

/**
* Returns any extraction data found for the vendor based on the URL
* for the page.
*/
function getProductAttributeInfo() {
const hostname = new URL(window.location.href).host;
for (const [vendor, attributeInfo] of Object.entries(extractionData)) {
if (hostname.includes(vendor)) {
return attributeInfo;
}
}
return null;
}

/**
* Extracts and returns the string value for a given element property or attribute.
*
* @param {HTMLElement} element
* @param {string} extractionProperty
*/
function extractValueFromElement(element, extractionProperty) {
switch (extractionProperty) {
case 'content':
return element.getAttribute('content');
case 'innerText':
return element.innerText;
case 'src':
return element.src;
default:
throw new Error(`Unrecognized extraction property or attribute '${extractionProperty}'.`);
}
}

/**
* Returns any product information available on the page from CSS
* selectors if they exist, otherwise from Open Graph <meta> tags.
*/
export default function extractProduct() {
const data = {};
const attributeInfo = getProductAttributeInfo();
if (attributeInfo) {
for (const [productAttribute, extractor] of Object.entries(attributeInfo)) {
const {selectors, extractUsing} = extractor;
for (const selector of selectors) {
const element = document.querySelector(selector);
if (element) {
data[productAttribute] = extractValueFromElement(element, extractUsing);
if (data[productAttribute]) {
break;
} else {
throw new Error(`Element found did not return a valid product ${productAttribute}.`);
}
} else if (selector === selectors[selectors.length - 1]) {
// None of the selectors matched an element on the page
throw new Error(`No elements found with vendor data for product ${productAttribute}.`);
}
}
}
} else {
for (const [key, value] of Object.entries(OPEN_GRAPH_PROPERTY_VALUES)) {
const metaEle = document.querySelector(`meta[property='${value}']`);
if (metaEle) {
data[key] = metaEle.getAttribute('content');
}
}
}
return data;
}
26 changes: 21 additions & 5 deletions src/fathom_ruleset.js → src/fathom_extraction.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
/** This Source Code Form is subject to the terms of the Mozilla Public
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/*
* Using Fathom to extract a product from its product page,
* Uses Fathom to extract a product from its product page,
* where a 'product' is defined by the bundle of features that
* makes it identifiable.
*
Expand Down Expand Up @@ -42,10 +42,10 @@ const rules = ruleset(
);

/**
* Extracts the highest scoring element above a score threshold for a
* given feature contained in a page's HTML document.
* Extracts the highest scoring element above a score threshold
* contained in a page's HTML document.
*/
export default function runTuningRoutine(doc) {
function runRuleset(doc) {
let fnodesList = rules.against(doc).get('product-price');
fnodesList = fnodesList.filter(fnode => fnode.scoreFor('priceish') >= SCORE_THRESHOLD);
// It is possible for multiple elements to have the same highest score.
Expand All @@ -54,3 +54,19 @@ export default function runTuningRoutine(doc) {
}
return null;
}

/*
* Run the ruleset for the product features against the current window document
*/
export default function extractProduct(doc) {
const priceEle = runRuleset(doc);
if (priceEle) {
const price = (priceEle.tagName !== 'META') ? priceEle.textContent : priceEle.getAttribute('content');
if (price) {
return {
price,
};
}
}
return null;
}
1 change: 1 addition & 0 deletions src/product_extraction_data.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"price": {
"selectors": [
"#priceblock_ourprice",
"#priceblock_dealprice",
".display-price",
".offer-price"
],
Expand Down
105 changes: 5 additions & 100 deletions src/product_info.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,9 @@
* which is after all DOM content has been loaded.
*/

import runTuningRoutine from 'commerce/fathom_ruleset';
import extractProductWithFathom from 'commerce/fathom_extraction';
import extractProductWithFallback from 'commerce/fallback_extraction';
import {retry} from 'commerce/utils';
import extractionData from 'commerce/product_extraction_data.json';

const OPEN_GRAPH_PROPERTY_VALUES = {
title: 'og:title',
image: 'og:image',
price: 'og:price:amount',
};

/**
* Open a Port to the background script and wait for the background script to
Expand Down Expand Up @@ -55,103 +49,14 @@ async function openBackgroundPort() {
}
}());

const fallbackExtraction = {
/**
* Returns any extraction data found for the vendor based on the URL
* for the page.
*/
getProductAttributeInfo() {
const hostname = new URL(window.location.href).host;
for (const [vendor, attributeInfo] of Object.entries(extractionData)) {
if (hostname.includes(vendor)) {
return attributeInfo;
}
}
return null;
},

/**
* Extracts and returns the string value for a given element property or attribute.
*
* @param {HTMLElement} element
* @param {string} extractionProperty
*/
extractValueFromElement(element, extractionProperty) {
switch (extractionProperty) {
case 'content':
return element.getAttribute('content');
case 'innerText':
return element.innerText;
case 'src':
return element.src;
default:
throw new Error(`Unrecognized extraction property or attribute '${extractionProperty}'.`);
}
},

/**
* Returns any product information available on the page from CSS
* selectors if they exist, otherwise from Open Graph <meta> tags.
*/
extractProduct() {
const data = {};
const attributeInfo = this.getProductAttributeInfo();
if (attributeInfo) {
for (const [productAttribute, extractor] of Object.entries(attributeInfo)) {
const {selectors, extractUsing} = extractor;
for (const selector of selectors) {
const element = document.querySelector(selector);
if (element) {
data[productAttribute] = this.extractValueFromElement(element, extractUsing);
if (data[productAttribute]) {
break;
} else {
throw new Error(`Element found did not return a valid product ${productAttribute}.`);
}
} else if (selector === selectors[selectors.length - 1]) {
// None of the selectors matched an element on the page
throw new Error(`No elements found with vendor data for product ${productAttribute}.`);
}
}
}
} else {
for (const [key, value] of Object.entries(OPEN_GRAPH_PROPERTY_VALUES)) {
const metaEle = document.querySelector(`meta[property='${value}']`);
if (metaEle) {
data[key] = metaEle.getAttribute('content');
}
}
}
data.url = window.document.URL;
return data;
},
};

const fathomExtraction = {
/*
* Run the ruleset for the product features against the current window document
*/
extractProduct() {
const priceEle = runTuningRoutine(window.document);
if (priceEle) {
const price = (priceEle.tagName !== 'META') ? priceEle.textContent : priceEle.getAttribute('content');
if (price) {
return {
price,
url: window.document.URL,
};
}
}
return null;
},
};

/**
* Checks to see if any product information for the page was found,
* and if so, sends it to the background script via the port.
*/
async function getProductInfo(port) {
const extractedProduct = fathomExtraction.extractProduct() || fallbackExtraction.extractProduct();
const extractedProduct = (extractProductWithFathom(window.document)
|| extractProductWithFallback());
extractedProduct.url = window.document.URL;
port.postMessage({
from: 'content',
subject: 'ready',
Expand Down

0 comments on commit a99d3ba

Please sign in to comment.