Skip to content
This repository has been archived by the owner on Dec 3, 2020. It is now read-only.

Commit

Permalink
Incorporate more feedback from Osmose
Browse files Browse the repository at this point in the history
* Log a warning instead of throwing an error and halting execution if fallback extraction for a supported site fails in various ways.
* Restructure 'fallbackExtractionData' in 'fallback_extraction_selectors.js', so it is an array of objects with keys 'domains' and 'features'.
* Format 'parsePrice' utility function's input argument into an array of strings so that it takes in the same type of argument regardless of where the extraction info is coming from (Fathom or fallback).
* Remove unused utility methods from './src/utils.js' and './src/extraction/utils.js'.
  • Loading branch information
biancadanforth committed Sep 28, 2018
1 parent 740c53c commit 9e546b7
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 247 deletions.
15 changes: 8 additions & 7 deletions src/extraction/fallback_extraction.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@ const OPEN_GRAPH_PROPERTY_VALUES = {
*/
function getFeatureInfo() {
const hostname = new URL(window.location.href).host;
for (const [vendorDomainsStr, featureInfo] of Object.entries(extractionData)) {
const vendorDomains = vendorDomainsStr.split('_');
for (const domain of vendorDomains) {
for (const siteInfo of extractionData) {
for (const domain of siteInfo.domains) {
if (hostname.includes(domain)) {
return featureInfo;
return siteInfo.features;
}
}
}
Expand All @@ -44,11 +43,13 @@ function findValue(extractors) {
if (value) {
return value;
}
throw new Error('Element found did not return a valid value for the product feature.');
// eslint-disable-next-line no-console
console.warn('Element found did not return a valid value for the product feature.');
}
}
// None of the selectors matched an element on the page
throw new Error('No elements found with vendor data for the product feature.');
// eslint-disable-next-line no-console
console.warn('No elements found with vendor data for the product feature.');
return null;
}

/**
Expand Down
196 changes: 108 additions & 88 deletions src/extraction/fallback_extraction_selectors.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

import {getPriceInSubunits} from 'commerce/extraction/utils';
import {parsePrice} from 'commerce/extraction/utils';

function inUnits(fn) {
return element => getPriceInSubunits(fn(element));
return (element) => {
const priceString = fn(element);
return parsePrice([priceString]);
};
}

function fromProperty(property) {
Expand All @@ -18,98 +21,115 @@ function fromAttribute(attribute) {


/**
* CSS selector data by site (represented by a string of acceptable hostnames), where each
* selector is paired with a method that extracts the value from the element returned by
* that selector.
* CSS selector data by site, where each selector is paired with a method that
* extracts the value from the element returned by that selector.
*/
const fallbackExtractionData = {
'amazon.com_www.amazon.com_smile.amazon.com': {
title: [
['#productTitle', fromProperty('innerText')],
['.product-title', fromProperty('innerText')],
],
price: [
['#priceblock_dealprice', inUnits(fromProperty('innerText'))],
['#priceblock_ourprice', inUnits(fromProperty('innerText'))],
['#price_inside_buybox', inUnits(fromProperty('innerText'))],
['#buybox .a-color-price', inUnits(fromProperty('innerText'))],
['input[name="displayedPrice"]', inUnits(fromAttribute('value'))],
['.a-size-large.a-color-price.guild_priceblock_ourprice', inUnits(fromProperty('innerText'))],
['.a-color-price.a-size-medium.a-align-bottom', inUnits(fromProperty('innerText'))],
['.display-price', inUnits(fromProperty('innerText'))],
['.offer-price', inUnits(fromProperty('innerText'))],
],
image: [
['#landingImage', fromProperty('src')],
['#imgBlkFront', fromProperty('src')],
['#ebooksImgBlkFront', fromProperty('src')],
],
const fallbackExtractionData = [
{
domains: ['amazon.com', 'www.amazon.com', 'smile.amazon.com'],
features: {
title: [
['#productTitle', fromProperty('innerText')],
['.product-title', fromProperty('innerText')],
],
price: [
['#priceblock_dealprice', inUnits(fromProperty('innerText'))],
['#priceblock_ourprice', inUnits(fromProperty('innerText'))],
['#price_inside_buybox', inUnits(fromProperty('innerText'))],
['#buybox .a-color-price', inUnits(fromProperty('innerText'))],
['input[name="displayedPrice"]', inUnits(fromAttribute('value'))],
['.a-size-large.a-color-price.guild_priceblock_ourprice', inUnits(fromProperty('innerText'))],
['.a-color-price.a-size-medium.a-align-bottom', inUnits(fromProperty('innerText'))],
['.display-price', inUnits(fromProperty('innerText'))],
['.offer-price', inUnits(fromProperty('innerText'))],
],
image: [
['#landingImage', fromProperty('src')],
['#imgBlkFront', fromProperty('src')],
['#ebooksImgBlkFront', fromProperty('src')],
],
},
},
'bestbuy.com_www.bestbuy.com': {
title: [
['.sku-title h1', fromProperty('innerText')],
],
price: [
['.priceView-hero-price.priceView-purchase-price', inUnits(fromProperty('innerText'))],
],
image: [
['img.primary-image', fromProperty('src')],
],
{
domains: ['bestbuy.com', 'www.bestbuy.com'],
features: {
title: [
['.sku-title h1', fromProperty('innerText')],
],
price: [
['.priceView-hero-price.priceView-purchase-price', inUnits(fromProperty('innerText'))],
],
image: [
['img.primary-image', fromProperty('src')],
],
},
},
'ebay.com_www.ebay.com': {
title: [
['#itemTitle', fromProperty('innerText')],
['.product-title', fromProperty('innerText')],
],
price: [
['#prcIsum', inUnits(fromProperty('innerText'))],
['#orgPrc', inUnits(fromProperty('innerText'))],
['#mm-saleDscPrc', inUnits(fromProperty('innerText'))],
['.display-price', inUnits(fromProperty('innerText'))],
],
image: [
['#icImg', fromProperty('src')],
['.vi-image-gallery__image.vi-image-gallery__image--absolute-center', fromProperty('src')],
],
{
domains: ['ebay.com', 'www.ebay.com'],
features: {
title: [
['#itemTitle', fromProperty('innerText')],
['.product-title', fromProperty('innerText')],
],
price: [
['#prcIsum', inUnits(fromProperty('innerText'))],
['#orgPrc', inUnits(fromProperty('innerText'))],
['#mm-saleDscPrc', inUnits(fromProperty('innerText'))],
['.display-price', inUnits(fromProperty('innerText'))],
],
image: [
['#icImg', fromProperty('src')],
['.vi-image-gallery__image.vi-image-gallery__image--absolute-center', fromProperty('src')],
],
},
},
'homedepot.com_www.homedepot.com': {
title: [
['h1.product-title__title', fromProperty('innerText')],
],
price: [
['#ajaxPrice', inUnits(fromAttribute('content'))],
['#ajaxPriceAlt', inUnits(fromProperty('innerText'))],
],
image: [
['#mainImage', fromProperty('src')],
],
{
domains: ['homedepot.com', 'www.homedepot.com'],
features: {
title: [
['h1.product-title__title', fromProperty('innerText')],
],
price: [
['#ajaxPrice', inUnits(fromAttribute('content'))],
['#ajaxPriceAlt', inUnits(fromProperty('innerText'))],
],
image: [
['#mainImage', fromProperty('src')],
],
},
},
'walmart.com_www.walmart.com': {
title: [
['h1.prod-ProductTitle', fromAttribute('content')],
['h1.prod-ProductTitle', fromProperty('innerText')],
],
price: [
['.PriceRange.prod-PriceHero', inUnits(fromProperty('innerText'))],
['.price-group', inUnits(fromAttribute('aria-label'))],
['.price-group', inUnits(fromProperty('innerText'))],
],
image: [
['.prod-hero-image-image', fromProperty('src')],
['.prod-hero-image-carousel-image', fromProperty('src')],
],
{
domains: ['walmart.com', 'www.walmart.com'],
features: {
title: [
['h1.prod-ProductTitle', fromAttribute('content')],
['h1.prod-ProductTitle', fromProperty('innerText')],
],
price: [
['.PriceRange.prod-PriceHero', inUnits(fromProperty('innerText'))],
['.price-group', inUnits(fromAttribute('aria-label'))],
['.price-group', inUnits(fromProperty('innerText'))],
],
image: [
['.prod-hero-image-image', fromProperty('src')],
['.prod-hero-image-carousel-image', fromProperty('src')],
],
},
},
'mkelly.me_www.mkelly.me': {
title: [
['#title', fromProperty('innerText')],
],
price: [
['#price', inUnits(fromProperty('innerText'))],
],
image: [
['img', fromProperty('src')],
],
{
domains: ['mkelly.me', 'www.mkelly.me'],
features: {
title: [
['#title', fromProperty('innerText')],
],
price: [
['#price', inUnits(fromProperty('innerText'))],
],
image: [
['img', fromProperty('src')],
],
},
},
};
];

export default fallbackExtractionData;
5 changes: 3 additions & 2 deletions src/extraction/fathom_extraction.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import defaultCoefficients from 'commerce/extraction/fathom_default_coefficients.json';
import RulesetFactory from 'commerce/extraction/ruleset_factory';
import {getPriceInSubunits} from 'commerce/extraction/utils';
import {parsePrice} from 'commerce/extraction/utils';

// Minimum score to be considered the "correct" feature element extracted by Fathom
const SCORE_THRESHOLD = 4;
Expand Down Expand Up @@ -42,7 +42,8 @@ const PRODUCT_FEATURES = {
price: {
...FEATURE_DEFAULTS,
getValueFromElement(element) {
return getPriceInSubunits(element);
const tokens = Array.from(element.childNodes).map(node => node.textContent);
return parsePrice(tokens);
},
},
};
Expand Down
69 changes: 17 additions & 52 deletions src/extraction/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,25 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/**
* Converts a price element (from Fathom extraction) or string (from fallback extraction) into
* a numerical price value in subunits (like cents); e.g. <span>$10.00</span> and "$10.00" both
* return 1000. If string parsing fails, returns NaN.
* @param {HTMLElement|string} price
* Converts an array of price tokens into a numerical price value in subunits.
* E.g. ["$10.00"] and ["$", "10", "00", "/each"] both return 1000.
* If string parsing fails, returns NaN.
* @param {Array.String} The price token strings extracted from the page
* @returns {Number} the price in subunits
*/
export function getPriceInSubunits(price) {
let priceUnits = [];
if (typeof price === 'string') {
priceUnits = getPriceUnitsFromStr(price);
} else {
priceUnits = getPriceUnitsFromArr(Array.from(price.childNodes));
}
export function parsePrice(tokens) {
const priceUnits = (
tokens
// Split tokens by $ and . to get the numbers between them
.flatMap(token => token.split(/[.$]/))
// Filter out any tokens that do not contain a digit
.filter(token => /\d/g.test(token))
// Remove any non-digit characters for each token in the list
.map(token => token.replace(/\D/g, ''))
// Convert price token strings to integers
.map(token => parseInt(token, 10))
);

// Convert units and subunits to a single integer value in subunits
switch (priceUnits.length) {
case 1:
Expand All @@ -26,44 +32,3 @@ export function getPriceInSubunits(price) {
return NaN;
}
}

/**
* Extracts price units from textContent from text and/or DOM nodes
* @param {Array} Array of DOM nodes
* @returns {Array.Number}
*/
function getPriceUnitsFromArr(arr) {
return cleanPriceTokens(arr.flatMap(token => splitString(token.textContent)));
}

/**
* Extracts price units from a string
* @param {String}
* @returns {Array.Number}
*/
function getPriceUnitsFromStr(str) {
return cleanPriceTokens(splitString(str));
}

/**
* Filters and cleans string tokens
* @param {Array.String}
* @returns {Array.Number}
*/
function cleanPriceTokens(tokens) {
// Filter out any tokens that do not contain a digit
const priceTokens = tokens.filter(token => /\d/g.test(token));

// Remove any non-digit characters for each token in the list
const cleanedPriceTokens = priceTokens.map(token => token.replace(/\D/g, ''));

// Convert price token strings to integers
return cleanedPriceTokens.map(token => parseInt(token, 10));
}

/**
* Separates a string into an array of substrings using '$' and '.' as separators
*/
function splitString(str) {
return str.split(/[.$]/);
}
Loading

0 comments on commit 9e546b7

Please sign in to comment.