Skip to content
This repository has been archived by the owner on Dec 3, 2020. It is now read-only.

Commit

Permalink
Incorporate Osmose's feedback
Browse files Browse the repository at this point in the history
* Replace top level keys in CSS selector data (in ‘fallback_extraction_selectors.js’) with a string composed of supported domains/subdomains.
  * Update ‘getFeatureInfo’ to split the string into a list of domains/subdomains before checking for a hostname match.
* Break out part of ‘extractProduct’ in ‘fallback_extraction.js` for better readability.
* Rename some internal variables for more consistency/clarity.
  • Loading branch information
biancadanforth committed Sep 21, 2018
1 parent d0aece3 commit dc0ca41
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 32 deletions.
51 changes: 27 additions & 24 deletions src/extraction/fallback_extraction.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,33 @@ const OPEN_GRAPH_PROPERTY_VALUES = {
* for the page.
*/
function getFeatureInfo() {
const url = window.location.href;
for (const [regExpStr, featureInfo] of Object.entries(extractionData)) {
const regExp = new RegExp(regExpStr);
if (regExp.test(url)) {
return featureInfo;
const hostname = new URL(window.location.href).host;
for (const [vendorDomainsStr, featureInfo] of Object.entries(extractionData)) {
const vendorDomains = vendorDomainsStr.split('_');
for (const domain of vendorDomains) {
if (hostname.includes(domain)) {
return featureInfo;
}
}
}
return null;
}

function findValue(extractors) {
for (const [selector, extractionMethod] of extractors) {
const element = document.querySelector(selector);
if (element) {
const value = extractionMethod(element);
if (value) {
return value;
}
throw new Error('Element found did not return a valid value for the product feature.');
}
}
// None of the selectors matched an element on the page
throw new Error('No elements found with vendor data for the product feature.');
}

/**
* Returns any product information available on the page from CSS
* selectors if they exist, otherwise from Open Graph <meta> tags.
Expand All @@ -42,28 +59,14 @@ export default function extractProduct() {
const extractedProduct = {};
const featureInfo = getFeatureInfo();
if (featureInfo) {
for (const [feature, routines] of Object.entries(featureInfo)) {
for (const routine of routines) {
const [selector, extractionMethod] = routine;
const element = document.querySelector(selector);
if (element) {
extractedProduct[feature] = extractionMethod(element);
if (extractedProduct[feature]) {
break;
} else {
throw new Error(`Element found did not return a valid product ${feature}.`);
}
} else if (routine === routines[routines.length - 1]) {
// None of the selectors matched an element on the page
throw new Error(`No elements found with vendor data for product ${feature}.`);
}
}
for (const [feature, extractors] of Object.entries(featureInfo)) {
extractedProduct[feature] = findValue(extractors);
}
} else {
for (const [key, value] of Object.entries(OPEN_GRAPH_PROPERTY_VALUES)) {
const metaEle = document.querySelector(`meta[property='${value}']`);
for (const [feature, propertyValue] of Object.entries(OPEN_GRAPH_PROPERTY_VALUES)) {
const metaEle = document.querySelector(`meta[property='${propertyValue}']`);
if (metaEle) {
extractedProduct[key] = metaEle.getAttribute('content');
extractedProduct[feature] = metaEle.getAttribute('content');
}
}
}
Expand Down
17 changes: 9 additions & 8 deletions src/extraction/fallback_extraction_selectors.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@ function fromAttribute(attribute) {


/**
* CSS selector data by site (represented by a regular expression), where each selector is paired
* with a method that extracts the value from the element returned by that selector.
* CSS selector data by site (represented by a string of acceptable hostnames), where each
* selector is paired with a method that extracts the value from the element returned by
* that selector.
*/
const fallbackExtractionData = {
'^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}amazon\\.com': {
'amazon.com_www.amazon.com_smile.amazon.com': {
title: [
['#productTitle', fromProperty('innerText')],
['.product-title', fromProperty('innerText')],
Expand All @@ -44,7 +45,7 @@ const fallbackExtractionData = {
['#ebooksImgBlkFront', fromProperty('src')],
],
},
'^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}bestbuy\\.com': {
'bestbuy.com_www.bestbuy.com': {
title: [
['.sku-title h1', fromProperty('innerText')],
],
Expand All @@ -55,7 +56,7 @@ const fallbackExtractionData = {
['img.primary-image', fromProperty('src')],
],
},
'^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}ebay\\.com': {
'ebay.com_www.ebay.com': {
title: [
['#itemTitle', fromProperty('innerText')],
['.product-title', fromProperty('innerText')],
Expand All @@ -71,7 +72,7 @@ const fallbackExtractionData = {
['.vi-image-gallery__image.vi-image-gallery__image--absolute-center', fromProperty('src')],
],
},
'^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}homedepot\\.com': {
'homedepot.com_www.homedepot.com': {
title: [
['h1.product-title__title', fromProperty('innerText')],
],
Expand All @@ -83,7 +84,7 @@ const fallbackExtractionData = {
['#mainImage', fromProperty('src')],
],
},
'^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}walmart\\.com': {
'walmart.com_www.walmart.com': {
title: [
['h1.prod-ProductTitle', fromAttribute('content')],
['h1.prod-ProductTitle', fromProperty('innerText')],
Expand All @@ -98,7 +99,7 @@ const fallbackExtractionData = {
['.prod-hero-image-carousel-image', fromProperty('src')],
],
},
'www.mkelly.me': {
'mkelly.me_www.mkelly.me': {
title: [
['#title', fromProperty('innerText')],
],
Expand Down

0 comments on commit dc0ca41

Please sign in to comment.