From f95a90c6495fa11c2901cb301782e2e851df4913 Mon Sep 17 00:00:00 2001 From: Bianca Danforth Date: Thu, 13 Sep 2018 14:47:11 -0700 Subject: [PATCH 1/4] Fix #84: Improve fallback extraction Improve fallback extraction by CSS selectors: * Update selectors for the top 5 sites. Add Home Depot and Best Buy. * Rename selectors JSON file to be more descriptive (was 'product_extraction_data.json', now 'fallback_extraction_selectors.json'). * Represent supported sites in 'fallback_extraction_selectors.json' as regular expression strings so that fallback extraction works for any subdomain of the site (e.g. 'smile.amazon.com'). * Represent CSS selectors by tuples in 'fallback_extraction_selectors.json', so that each selector can specify which attribute or property to read for that selector. * Clean price strings from fallback extraction using the same methods as used by Fathom extraction (PR #111); consolidate and move shared methods to 'utils.js'. --- src/extraction/fallback_extraction.js | 44 +++------ .../fallback_extraction_selectors.json | 88 +++++++++++++++++ src/extraction/product_extraction_data.json | 99 ------------------- src/utils.js | 98 ++++++++++++++++++ 4 files changed, 202 insertions(+), 127 deletions(-) create mode 100644 src/extraction/fallback_extraction_selectors.json delete mode 100644 src/extraction/product_extraction_data.json diff --git a/src/extraction/fallback_extraction.js b/src/extraction/fallback_extraction.js index 5b3535f..3d5bbc1 100644 --- a/src/extraction/fallback_extraction.js +++ b/src/extraction/fallback_extraction.js @@ -10,7 +10,9 @@ * Features: title, image, price */ -import extractionData from 'commerce/extraction/product_extraction_data.json'; +import extractionData from 'commerce/extraction/fallback_extraction_selectors.json'; +import {getPriceString, extractValueFromElement} from 'commerce/utils'; + const OPEN_GRAPH_PROPERTY_VALUES = { title: 'og:title', @@ -23,34 +25,16 @@ const OPEN_GRAPH_PROPERTY_VALUES = { * for the page. */ function getProductAttributeInfo() { - const hostname = new URL(window.location.href).host; - for (const [vendor, attributeInfo] of Object.entries(extractionData)) { - if (hostname.includes(vendor)) { + const url = window.location.href; + for (const [regExpStr, attributeInfo] of Object.entries(extractionData)) { + const regExp = new RegExp(regExpStr); + if (regExp.test(url)) { return attributeInfo; } } return null; } -/** - * Extracts and returns the string value for a given element property or attribute. - * - * @param {HTMLElement} element - * @param {string} extractionProperty - */ -function extractValueFromElement(element, extractionProperty) { - switch (extractionProperty) { - case 'content': - return element.getAttribute('content'); - case 'innerText': - return element.innerText; - case 'src': - return element.src; - default: - throw new Error(`Unrecognized extraction property or attribute '${extractionProperty}'.`); - } -} - /** * Returns any product information available on the page from CSS * selectors if they exist, otherwise from Open Graph tags. @@ -59,18 +43,22 @@ export default function extractProduct() { const data = {}; const attributeInfo = getProductAttributeInfo(); if (attributeInfo) { - for (const [productAttribute, extractor] of Object.entries(attributeInfo)) { - const {selectors, extractUsing} = extractor; - for (const selector of selectors) { + for (const [productAttribute, tuples] of Object.entries(attributeInfo)) { + for (const tuple of tuples) { + const [selector, extractUsing] = tuple; const element = document.querySelector(selector); if (element) { - data[productAttribute] = extractValueFromElement(element, extractUsing); + if (productAttribute === 'price') { + data[productAttribute] = getPriceString(element, extractUsing); + } else { + data[productAttribute] = extractValueFromElement(element, extractUsing); + } if (data[productAttribute]) { break; } else { throw new Error(`Element found did not return a valid product ${productAttribute}.`); } - } else if (selector === selectors[selectors.length - 1]) { + } else if (tuple === tuples[tuples.length - 1]) { // None of the selectors matched an element on the page throw new Error(`No elements found with vendor data for product ${productAttribute}.`); } diff --git a/src/extraction/fallback_extraction_selectors.json b/src/extraction/fallback_extraction_selectors.json new file mode 100644 index 0000000..c5362c0 --- /dev/null +++ b/src/extraction/fallback_extraction_selectors.json @@ -0,0 +1,88 @@ +{ + "^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}amazon\\.com": { + "title": [ + ["#productTitle", "innerText"], + [".product-title", "innerText"] + ], + "price": [ + ["#priceblock_dealprice", "innerText"], + ["#priceblock_ourprice", "innerText"], + ["#price_inside_buybox", "innerText"], + ["#buybox .a-color-price", "innerText"], + ["input[name='displayedPrice']", "value"], + [".a-size-large.a-color-price.guild_priceblock_ourprice", "innerText"], + [".a-color-price.a-size-medium.a-align-bottom", "innerText"], + [".display-price", "innerText"], [".offer-price", "innerText"] + ], + "image": [ + ["#landingImage", "src"], + ["#imgBlkFront", "src"], + ["#ebooksImgBlkFront", "src"] + ] + }, + "^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}bestbuy\\.com": { + "title": [ + [".sku-title h1", "innerText"] + ], + "price": [ + [".priceView-hero-price.priceView-purchase-price", "innerText"] + ], + "image": [ + ["img.primary-image", "src"] + ] + }, + "^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}ebay\\.com": { + "title": [ + ["#itemTitle", "innerText"], + [".product-title", "innerText"] + ], + "price": [ + ["#prcIsum", "innerText"], + ["#orgPrc", "innerText"], + ["#mm-saleDscPrc", "innerText"], + [".display-price", "innerText"] + ], + "image": [ + ["#icImg", "src"], + [".vi-image-gallery__image.vi-image-gallery__image--absolute-center", "src"] + ] + }, + "^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}homedepot\\.com": { + "title": [ + ["h1.product-title__title", "innerText"] + ], + "price": [ + ["#ajaxPrice", "content"], + ["#ajaxPriceAlt", "innerText"] + ], + "image": [ + ["#mainImage", "src"] + ] + }, + "^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}walmart\\.com": { + "title": [ + ["h1.prod-ProductTitle", "content"], + ["h1.prod-ProductTitle", "innerText"] + ], + "price": [ + [".PriceRange.prod-PriceHero", "innerText"], + [".price-group", "aria-label"], + [".price-group", "innerText"] + ], + "image": [ + [".prod-hero-image-image", "src"], + [".prod-hero-image-carousel-image", "src"] + ] + }, + "www.mkelly.me": { + "title": [ + ["#title", "innerText"] + ], + "price": [ + ["#price", "innerText"] + ], + "image": [ + ["img", "src"] + ] + } +} diff --git a/src/extraction/product_extraction_data.json b/src/extraction/product_extraction_data.json deleted file mode 100644 index 77f745f..0000000 --- a/src/extraction/product_extraction_data.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "www.aliexpress.com": { - "title": { - "selectors": [".product-name"], - "extractUsing": "innerText" - }, - "price": { - "selectors": [ - "#j-sku-discount-price", - "#j-sku-price" - ], - "extractUsing": "innerText" - }, - "image": { - "selectors": [".ui-image-viewer-thumb-frame > img"], - "extractUsing": "src" - } - }, - "www.amazon.com": { - "title": { - "selectors": [ - "#productTitle", - ".product-title" - ], - "extractUsing": "innerText" - }, - "price": { - "selectors": [ - "#priceblock_ourprice", - "#priceblock_dealprice", - ".display-price", - ".offer-price" - ], - "extractUsing": "innerText" - }, - "image": { - "selectors": [ - "#landingImage", - "#imgBlkFront" - ], - "extractUsing": "src" - } - }, - "www.ebay.com": { - "title": { - "selectors": [ - "#itemTitle", - ".product-title" - ], - "extractUsing": "innerText" - }, - "price": { - "selectors": [ - ".display-price", - "#prcIsum", - "#orgPrc" - ], - "extractUsing": "innerText" - }, - "image": { - "selectors": [ - "#icImg", - ".vi-image-gallery__image.vi-image-gallery__image--absolute-center" - ], - "extractUsing": "src" - } - }, - "www.walmart.com": { - "title": { - "selectors": [".prod-ProductTitle"], - "extractUsing": "innerText" - }, - "price": { - "selectors": [".price-group"], - "extractUsing": "innerText" - }, - "image": { - "selectors": [ - ".prod-hero-image-image", - ".prod-hero-image-carousel-image" - ], - "extractUsing": "src" - } - }, - "www.mkelly.me": { - "title": { - "selectors": ["#title"], - "extractUsing": "innerText" - }, - "price": { - "selectors": ["#price"], - "extractUsing": "innerText" - }, - "image": { - "selectors": ["img"], - "extractUsing": "src" - } - } -} diff --git a/src/utils.js b/src/utils.js index f8c8469..9d05f34 100644 --- a/src/utils.js +++ b/src/utils.js @@ -51,3 +51,101 @@ export async function retry(callback, maxRetries = 5, delayFactor = 2, initialDe export function validatePropType(value, propType) { return checkPropTypes({value: propType}, {value}, 'prop', 'Validation'); } + +/** + * Returns true if the string contains a number. + */ +function hasNumber(string) { + return /\d/.test(string); +} + +/** + * Returns true if the string contains a dollar sign. + */ +function hasDollarSign(string) { + return /\$/.test(string); +} + +/** + * Get the main and sub unit elements for the product price. + * + * @returns {Object} A string:element object with 'mainUnit' and 'subUnit' keys. + */ +export function getPriceUnitElements(element) { + let isMainUnit = true; + const priceElements = {}; + // Loop through children: first element containing a digit is main unit, + // second is subunit. + for (const priceSubEle of element.children) { + if (hasNumber(priceSubEle.innerText)) { + if (isMainUnit) { + priceElements.mainUnit = priceSubEle; + isMainUnit = false; + } else { + priceElements.subUnit = priceSubEle; + } + } + } + return priceElements; +} + +/** + * Reformats price string to be of form "$NX.XX". + */ +export function cleanPriceString(priceStr) { + // Remove any commas + let cleanedPriceStr = priceStr.replace(/,/g, ''); + // Add a '$' at the beginning if not present; common for strings pulled from element attributes + if (!hasDollarSign) { + cleanedPriceStr = cleanedPriceStr.replace(/^/, '$'); + } + // Remove any characters preceding the '$' and following the '.XX' + cleanedPriceStr = cleanedPriceStr.substring(cleanedPriceStr.indexOf('$')); + cleanedPriceStr = cleanedPriceStr.substring(0, cleanedPriceStr.indexOf('.') + 3); + return cleanedPriceStr; +} + +/** + * Checks if a price object has subunits and returns a price string. + * + * @param {HTMLElement} - The element containing the price + * @param {String} extractUsing - The property/attribute to use to get the product price + */ +export function getPriceString(element, extractUsing) { + if (element.children.length > 0) { + const priceObj = getPriceUnitElements(element); + // Check for subunits e.g. dollars and cents. + if ('mainUnit' in priceObj) { + const mainUnitStr = priceObj.mainUnit.innerText; + // If no subunits, then main units contain subunits + const subUnitStr = priceObj.subUnit ? `.${priceObj.subUnit.innerText}` : ''; + const priceStr = `${mainUnitStr}${subUnitStr}`; + return cleanPriceString(hasDollarSign(priceStr) ? priceStr : `$${priceStr}`); + } + } + const priceStr = extractValueFromElement(element, extractUsing); + return cleanPriceString(priceStr); +} + +/** + * Extracts and returns the string value for a given element property or attribute. + * + * @param {HTMLElement} element + * @param {String} extractUsing - The property/attribute to use to get the product price + */ +export function extractValueFromElement(element, extractUsing) { + switch (extractUsing) { + case 'content': + return element.getAttribute('content'); + case 'innerText': + return element.innerText; + case 'src': + return element.src; + case 'value': + return element.getAttribute('value'); + case 'aria-label': + return element.getAttribute('aria-label'); + default: + throw new Error(`Unrecognized extraction property or attribute '${extractUsing}'.`); + } +} From 106fca2cce81def3fa4453502ab684333d6b7274 Mon Sep 17 00:00:00 2001 From: Bianca Danforth Date: Fri, 21 Sep 2018 11:08:05 -0700 Subject: [PATCH 2/4] Update with changes from dependency: PR#111 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Replaces ‘fallback_extraction_selectors.json’ with a JS version, which allows the tuples associating a CSS selector with how to extract information to point to a site-specific extraction method that returns the desired value when executed. * Renamed some variables in various functions in ‘fallback_extraction.js’ so that it more closely matches its sister functions in ‘fathom_extraction.js’ for improved readability. * Modified ‘getPriceInSubunits’ function in './src/extraction/utils.js' to take in an HTML element (from Fathom extraction) OR a string (from fallback extraction). * Refactored some of the supporting functions. --- src/extraction/fallback_extraction.js | 39 +++--- .../fallback_extraction_selectors.js | 114 ++++++++++++++++++ .../fallback_extraction_selectors.json | 88 -------------- src/extraction/utils.js | 51 ++++++-- 4 files changed, 170 insertions(+), 122 deletions(-) create mode 100644 src/extraction/fallback_extraction_selectors.js delete mode 100644 src/extraction/fallback_extraction_selectors.json diff --git a/src/extraction/fallback_extraction.js b/src/extraction/fallback_extraction.js index 3d5bbc1..2a510a2 100644 --- a/src/extraction/fallback_extraction.js +++ b/src/extraction/fallback_extraction.js @@ -10,8 +10,7 @@ * Features: title, image, price */ -import extractionData from 'commerce/extraction/fallback_extraction_selectors.json'; -import {getPriceString, extractValueFromElement} from 'commerce/utils'; +import extractionData from 'commerce/extraction/fallback_extraction_selectors'; const OPEN_GRAPH_PROPERTY_VALUES = { @@ -24,12 +23,12 @@ const OPEN_GRAPH_PROPERTY_VALUES = { * Returns any extraction data found for the vendor based on the URL * for the page. */ -function getProductAttributeInfo() { +function getFeatureInfo() { const url = window.location.href; - for (const [regExpStr, attributeInfo] of Object.entries(extractionData)) { + for (const [regExpStr, featureInfo] of Object.entries(extractionData)) { const regExp = new RegExp(regExpStr); if (regExp.test(url)) { - return attributeInfo; + return featureInfo; } } return null; @@ -40,27 +39,23 @@ function getProductAttributeInfo() { * selectors if they exist, otherwise from Open Graph tags. */ export default function extractProduct() { - const data = {}; - const attributeInfo = getProductAttributeInfo(); - if (attributeInfo) { - for (const [productAttribute, tuples] of Object.entries(attributeInfo)) { - for (const tuple of tuples) { - const [selector, extractUsing] = tuple; + const extractedProduct = {}; + const featureInfo = getFeatureInfo(); + if (featureInfo) { + for (const [feature, routines] of Object.entries(featureInfo)) { + for (const routine of routines) { + const [selector, extractionMethod] = routine; const element = document.querySelector(selector); if (element) { - if (productAttribute === 'price') { - data[productAttribute] = getPriceString(element, extractUsing); - } else { - data[productAttribute] = extractValueFromElement(element, extractUsing); - } - if (data[productAttribute]) { + extractedProduct[feature] = extractionMethod(element); + if (extractedProduct[feature]) { break; } else { - throw new Error(`Element found did not return a valid product ${productAttribute}.`); + throw new Error(`Element found did not return a valid product ${feature}.`); } - } else if (tuple === tuples[tuples.length - 1]) { + } else if (routine === routines[routines.length - 1]) { // None of the selectors matched an element on the page - throw new Error(`No elements found with vendor data for product ${productAttribute}.`); + throw new Error(`No elements found with vendor data for product ${feature}.`); } } } @@ -68,9 +63,9 @@ export default function extractProduct() { for (const [key, value] of Object.entries(OPEN_GRAPH_PROPERTY_VALUES)) { const metaEle = document.querySelector(`meta[property='${value}']`); if (metaEle) { - data[key] = metaEle.getAttribute('content'); + extractedProduct[key] = metaEle.getAttribute('content'); } } } - return data; + return extractedProduct; } diff --git a/src/extraction/fallback_extraction_selectors.js b/src/extraction/fallback_extraction_selectors.js new file mode 100644 index 0000000..35fe17a --- /dev/null +++ b/src/extraction/fallback_extraction_selectors.js @@ -0,0 +1,114 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +import {getPriceInSubunits} from 'commerce/extraction/utils'; + +function inUnits(fn) { + return element => getPriceInSubunits(fn(element)); +} + +function fromProperty(property) { + return (element => element[property]); +} + +function fromAttribute(attribute) { + return (element => element.getAttribute(attribute)); +} + + +/** + * CSS selector data by site (represented by a regular expression), where each selector is paired + * with a method that extracts the value from the element returned by that selector. + */ +const fallbackExtractionData = { + '^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}amazon\\.com': { + title: [ + ['#productTitle', fromProperty('innerText')], + ['.product-title', fromProperty('innerText')], + ], + price: [ + ['#priceblock_dealprice', inUnits(fromProperty('innerText'))], + ['#priceblock_ourprice', inUnits(fromProperty('innerText'))], + ['#price_inside_buybox', inUnits(fromProperty('innerText'))], + ['#buybox .a-color-price', inUnits(fromProperty('innerText'))], + ['input[name="displayedPrice"]', inUnits(fromAttribute('value'))], + ['.a-size-large.a-color-price.guild_priceblock_ourprice', inUnits(fromProperty('innerText'))], + ['.a-color-price.a-size-medium.a-align-bottom', inUnits(fromProperty('innerText'))], + ['.display-price', inUnits(fromProperty('innerText'))], + ['.offer-price', inUnits(fromProperty('innerText'))], + ], + image: [ + ['#landingImage', fromProperty('src')], + ['#imgBlkFront', fromProperty('src')], + ['#ebooksImgBlkFront', fromProperty('src')], + ], + }, + '^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}bestbuy\\.com': { + title: [ + ['.sku-title h1', fromProperty('innerText')], + ], + price: [ + ['.priceView-hero-price.priceView-purchase-price', inUnits(fromProperty('innerText'))], + ], + image: [ + ['img.primary-image', fromProperty('src')], + ], + }, + '^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}ebay\\.com': { + title: [ + ['#itemTitle', fromProperty('innerText')], + ['.product-title', fromProperty('innerText')], + ], + price: [ + ['#prcIsum', inUnits(fromProperty('innerText'))], + ['#orgPrc', inUnits(fromProperty('innerText'))], + ['#mm-saleDscPrc', inUnits(fromProperty('innerText'))], + ['.display-price', inUnits(fromProperty('innerText'))], + ], + image: [ + ['#icImg', fromProperty('src')], + ['.vi-image-gallery__image.vi-image-gallery__image--absolute-center', fromProperty('src')], + ], + }, + '^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}homedepot\\.com': { + title: [ + ['h1.product-title__title', fromProperty('innerText')], + ], + price: [ + ['#ajaxPrice', inUnits(fromAttribute('content'))], + ['#ajaxPriceAlt', inUnits(fromProperty('innerText'))], + ], + image: [ + ['#mainImage', fromProperty('src')], + ], + }, + '^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}walmart\\.com': { + title: [ + ['h1.prod-ProductTitle', fromAttribute('content')], + ['h1.prod-ProductTitle', fromProperty('innerText')], + ], + price: [ + ['.PriceRange.prod-PriceHero', inUnits(fromProperty('innerText'))], + ['.price-group', inUnits(fromAttribute('aria-label'))], + ['.price-group', inUnits(fromProperty('innerText'))], + ], + image: [ + ['.prod-hero-image-image', fromProperty('src')], + ['.prod-hero-image-carousel-image', fromProperty('src')], + ], + }, + 'www.mkelly.me': { + title: [ + ['#title', fromProperty('innerText')], + ], + price: [ + ['#price', inUnits(fromProperty('innerText'))], + ], + image: [ + ['img', fromProperty('src')], + ], + }, +}; + +export default fallbackExtractionData; diff --git a/src/extraction/fallback_extraction_selectors.json b/src/extraction/fallback_extraction_selectors.json deleted file mode 100644 index c5362c0..0000000 --- a/src/extraction/fallback_extraction_selectors.json +++ /dev/null @@ -1,88 +0,0 @@ -{ - "^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}amazon\\.com": { - "title": [ - ["#productTitle", "innerText"], - [".product-title", "innerText"] - ], - "price": [ - ["#priceblock_dealprice", "innerText"], - ["#priceblock_ourprice", "innerText"], - ["#price_inside_buybox", "innerText"], - ["#buybox .a-color-price", "innerText"], - ["input[name='displayedPrice']", "value"], - [".a-size-large.a-color-price.guild_priceblock_ourprice", "innerText"], - [".a-color-price.a-size-medium.a-align-bottom", "innerText"], - [".display-price", "innerText"], [".offer-price", "innerText"] - ], - "image": [ - ["#landingImage", "src"], - ["#imgBlkFront", "src"], - ["#ebooksImgBlkFront", "src"] - ] - }, - "^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}bestbuy\\.com": { - "title": [ - [".sku-title h1", "innerText"] - ], - "price": [ - [".priceView-hero-price.priceView-purchase-price", "innerText"] - ], - "image": [ - ["img.primary-image", "src"] - ] - }, - "^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}ebay\\.com": { - "title": [ - ["#itemTitle", "innerText"], - [".product-title", "innerText"] - ], - "price": [ - ["#prcIsum", "innerText"], - ["#orgPrc", "innerText"], - ["#mm-saleDscPrc", "innerText"], - [".display-price", "innerText"] - ], - "image": [ - ["#icImg", "src"], - [".vi-image-gallery__image.vi-image-gallery__image--absolute-center", "src"] - ] - }, - "^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}homedepot\\.com": { - "title": [ - ["h1.product-title__title", "innerText"] - ], - "price": [ - ["#ajaxPrice", "content"], - ["#ajaxPriceAlt", "innerText"] - ], - "image": [ - ["#mainImage", "src"] - ] - }, - "^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}walmart\\.com": { - "title": [ - ["h1.prod-ProductTitle", "content"], - ["h1.prod-ProductTitle", "innerText"] - ], - "price": [ - [".PriceRange.prod-PriceHero", "innerText"], - [".price-group", "aria-label"], - [".price-group", "innerText"] - ], - "image": [ - [".prod-hero-image-image", "src"], - [".prod-hero-image-carousel-image", "src"] - ] - }, - "www.mkelly.me": { - "title": [ - ["#title", "innerText"] - ], - "price": [ - ["#price", "innerText"] - ], - "image": [ - ["img", "src"] - ] - } -} diff --git a/src/extraction/utils.js b/src/extraction/utils.js index ff07896..9ae3e12 100644 --- a/src/extraction/utils.js +++ b/src/extraction/utils.js @@ -3,13 +3,19 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /** - * Converts a price element into a numerical price value in subunits (like cents). - * e.g. $10.00 returns 1000. If string parsing fails, returns NaN. - * @param {HTMLElement} priceEle + * Converts a price element (from Fathom extraction) or string (from fallback extraction) into + * a numerical price value in subunits (like cents); e.g. $10.00 and "$10.00" both + * return 1000. If string parsing fails, returns NaN. + * @param {HTMLElement|string} price * @returns {Number} the price in subunits */ -export function getPriceInSubunits(priceEle) { - const priceUnits = getPriceUnits(priceEle.childNodes); +export function getPriceInSubunits(price) { + let priceUnits = []; + if (typeof price === 'string') { + priceUnits = getPriceUnitsFromStr(price); + } else { + priceUnits = getPriceUnitsFromArr(Array.from(price.childNodes)); + } // Convert units and subunits to a single integer value in subunits switch (priceUnits.length) { case 1: @@ -22,17 +28,31 @@ export function getPriceInSubunits(priceEle) { } /** - * Extracts price units by filtering and cleaning textContent from text and DOM nodes - * @param {Array.NodeList} nodes + * Extracts price units from textContent from text and/or DOM nodes + * @param {Array} Array of DOM nodes * @returns {Array.Number} */ -function getPriceUnits(nodes) { - const nodesArr = Array.from(nodes); - // Separate token strings in a list into substrings using '$' and '.' as separators - const allTokens = nodesArr.flatMap(token => token.textContent.split(/[.$]/)); +function getPriceUnitsFromArr(arr) { + return cleanPriceTokens(arr.flatMap(token => splitString(token.textContent))); +} + +/** + * Extracts price units from a string + * @param {String} + * @returns {Array.Number} + */ +function getPriceUnitsFromStr(str) { + return cleanPriceTokens(splitString(str)); +} +/** + * Filters and cleans string tokens + * @param {Array.String} + * @returns {Array.Number} + */ +function cleanPriceTokens(tokens) { // Filter out any tokens that do not contain a digit - const priceTokens = allTokens.filter(token => /\d/g.test(token)); + const priceTokens = tokens.filter(token => /\d/g.test(token)); // Remove any non-digit characters for each token in the list const cleanedPriceTokens = priceTokens.map(token => token.replace(/\D/g, '')); @@ -40,3 +60,10 @@ function getPriceUnits(nodes) { // Convert price token strings to integers return cleanedPriceTokens.map(token => parseInt(token, 10)); } + +/** + * Separates a string into an array of substrings using '$' and '.' as separators + */ +function splitString(str) { + return str.split(/[.$]/); +} From 740c53c08abedf7bf69459ad587dcc20df01d7f1 Mon Sep 17 00:00:00 2001 From: Bianca Danforth Date: Fri, 21 Sep 2018 13:24:47 -0700 Subject: [PATCH 3/4] Incorporate Osmose's feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Replace top level keys in CSS selector data (in ‘fallback_extraction_selectors.js’) with a string composed of supported domains/subdomains. * Update ‘getFeatureInfo’ to split the string into a list of domains/subdomains before checking for a hostname match. * Break out part of ‘extractProduct’ in ‘fallback_extraction.js` for better readability. * Rename some internal variables for more consistency/clarity. --- src/extraction/fallback_extraction.js | 51 ++++++++++--------- .../fallback_extraction_selectors.js | 17 ++++--- 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/src/extraction/fallback_extraction.js b/src/extraction/fallback_extraction.js index 2a510a2..4257558 100644 --- a/src/extraction/fallback_extraction.js +++ b/src/extraction/fallback_extraction.js @@ -24,16 +24,33 @@ const OPEN_GRAPH_PROPERTY_VALUES = { * for the page. */ function getFeatureInfo() { - const url = window.location.href; - for (const [regExpStr, featureInfo] of Object.entries(extractionData)) { - const regExp = new RegExp(regExpStr); - if (regExp.test(url)) { - return featureInfo; + const hostname = new URL(window.location.href).host; + for (const [vendorDomainsStr, featureInfo] of Object.entries(extractionData)) { + const vendorDomains = vendorDomainsStr.split('_'); + for (const domain of vendorDomains) { + if (hostname.includes(domain)) { + return featureInfo; + } } } return null; } +function findValue(extractors) { + for (const [selector, extractionMethod] of extractors) { + const element = document.querySelector(selector); + if (element) { + const value = extractionMethod(element); + if (value) { + return value; + } + throw new Error('Element found did not return a valid value for the product feature.'); + } + } + // None of the selectors matched an element on the page + throw new Error('No elements found with vendor data for the product feature.'); +} + /** * Returns any product information available on the page from CSS * selectors if they exist, otherwise from Open Graph tags. @@ -42,28 +59,14 @@ export default function extractProduct() { const extractedProduct = {}; const featureInfo = getFeatureInfo(); if (featureInfo) { - for (const [feature, routines] of Object.entries(featureInfo)) { - for (const routine of routines) { - const [selector, extractionMethod] = routine; - const element = document.querySelector(selector); - if (element) { - extractedProduct[feature] = extractionMethod(element); - if (extractedProduct[feature]) { - break; - } else { - throw new Error(`Element found did not return a valid product ${feature}.`); - } - } else if (routine === routines[routines.length - 1]) { - // None of the selectors matched an element on the page - throw new Error(`No elements found with vendor data for product ${feature}.`); - } - } + for (const [feature, extractors] of Object.entries(featureInfo)) { + extractedProduct[feature] = findValue(extractors); } } else { - for (const [key, value] of Object.entries(OPEN_GRAPH_PROPERTY_VALUES)) { - const metaEle = document.querySelector(`meta[property='${value}']`); + for (const [feature, propertyValue] of Object.entries(OPEN_GRAPH_PROPERTY_VALUES)) { + const metaEle = document.querySelector(`meta[property='${propertyValue}']`); if (metaEle) { - extractedProduct[key] = metaEle.getAttribute('content'); + extractedProduct[feature] = metaEle.getAttribute('content'); } } } diff --git a/src/extraction/fallback_extraction_selectors.js b/src/extraction/fallback_extraction_selectors.js index 35fe17a..90eb797 100644 --- a/src/extraction/fallback_extraction_selectors.js +++ b/src/extraction/fallback_extraction_selectors.js @@ -18,11 +18,12 @@ function fromAttribute(attribute) { /** - * CSS selector data by site (represented by a regular expression), where each selector is paired - * with a method that extracts the value from the element returned by that selector. + * CSS selector data by site (represented by a string of acceptable hostnames), where each + * selector is paired with a method that extracts the value from the element returned by + * that selector. */ const fallbackExtractionData = { - '^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}amazon\\.com': { + 'amazon.com_www.amazon.com_smile.amazon.com': { title: [ ['#productTitle', fromProperty('innerText')], ['.product-title', fromProperty('innerText')], @@ -44,7 +45,7 @@ const fallbackExtractionData = { ['#ebooksImgBlkFront', fromProperty('src')], ], }, - '^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}bestbuy\\.com': { + 'bestbuy.com_www.bestbuy.com': { title: [ ['.sku-title h1', fromProperty('innerText')], ], @@ -55,7 +56,7 @@ const fallbackExtractionData = { ['img.primary-image', fromProperty('src')], ], }, - '^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}ebay\\.com': { + 'ebay.com_www.ebay.com': { title: [ ['#itemTitle', fromProperty('innerText')], ['.product-title', fromProperty('innerText')], @@ -71,7 +72,7 @@ const fallbackExtractionData = { ['.vi-image-gallery__image.vi-image-gallery__image--absolute-center', fromProperty('src')], ], }, - '^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}homedepot\\.com': { + 'homedepot.com_www.homedepot.com': { title: [ ['h1.product-title__title', fromProperty('innerText')], ], @@ -83,7 +84,7 @@ const fallbackExtractionData = { ['#mainImage', fromProperty('src')], ], }, - '^https?:\\/\\/([a-zA-Z0-9]+\\.){0,}walmart\\.com': { + 'walmart.com_www.walmart.com': { title: [ ['h1.prod-ProductTitle', fromAttribute('content')], ['h1.prod-ProductTitle', fromProperty('innerText')], @@ -98,7 +99,7 @@ const fallbackExtractionData = { ['.prod-hero-image-carousel-image', fromProperty('src')], ], }, - 'www.mkelly.me': { + 'mkelly.me_www.mkelly.me': { title: [ ['#title', fromProperty('innerText')], ], From 9e546b78f01db76dbbc390c5be46c940877cada6 Mon Sep 17 00:00:00 2001 From: Bianca Danforth Date: Fri, 28 Sep 2018 11:05:11 -0700 Subject: [PATCH 4/4] Incorporate more feedback from Osmose * Log a warning instead of throwing an error and halting execution if fallback extraction for a supported site fails in various ways. * Restructure 'fallbackExtractionData' in 'fallback_extraction_selectors.js', so it is an array of objects with keys 'domains' and 'features'. * Format 'parsePrice' utility function's input argument into an array of strings so that it takes in the same type of argument regardless of where the extraction info is coming from (Fathom or fallback). * Remove unused utility methods from './src/utils.js' and './src/extraction/utils.js'. --- src/extraction/fallback_extraction.js | 15 +- .../fallback_extraction_selectors.js | 196 ++++++++++-------- src/extraction/fathom_extraction.js | 5 +- src/extraction/utils.js | 69 ++---- src/utils.js | 98 --------- 5 files changed, 136 insertions(+), 247 deletions(-) diff --git a/src/extraction/fallback_extraction.js b/src/extraction/fallback_extraction.js index 4257558..76706ca 100644 --- a/src/extraction/fallback_extraction.js +++ b/src/extraction/fallback_extraction.js @@ -25,11 +25,10 @@ const OPEN_GRAPH_PROPERTY_VALUES = { */ function getFeatureInfo() { const hostname = new URL(window.location.href).host; - for (const [vendorDomainsStr, featureInfo] of Object.entries(extractionData)) { - const vendorDomains = vendorDomainsStr.split('_'); - for (const domain of vendorDomains) { + for (const siteInfo of extractionData) { + for (const domain of siteInfo.domains) { if (hostname.includes(domain)) { - return featureInfo; + return siteInfo.features; } } } @@ -44,11 +43,13 @@ function findValue(extractors) { if (value) { return value; } - throw new Error('Element found did not return a valid value for the product feature.'); + // eslint-disable-next-line no-console + console.warn('Element found did not return a valid value for the product feature.'); } } - // None of the selectors matched an element on the page - throw new Error('No elements found with vendor data for the product feature.'); + // eslint-disable-next-line no-console + console.warn('No elements found with vendor data for the product feature.'); + return null; } /** diff --git a/src/extraction/fallback_extraction_selectors.js b/src/extraction/fallback_extraction_selectors.js index 90eb797..7d434dc 100644 --- a/src/extraction/fallback_extraction_selectors.js +++ b/src/extraction/fallback_extraction_selectors.js @@ -2,10 +2,13 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -import {getPriceInSubunits} from 'commerce/extraction/utils'; +import {parsePrice} from 'commerce/extraction/utils'; function inUnits(fn) { - return element => getPriceInSubunits(fn(element)); + return (element) => { + const priceString = fn(element); + return parsePrice([priceString]); + }; } function fromProperty(property) { @@ -18,98 +21,115 @@ function fromAttribute(attribute) { /** - * CSS selector data by site (represented by a string of acceptable hostnames), where each - * selector is paired with a method that extracts the value from the element returned by - * that selector. + * CSS selector data by site, where each selector is paired with a method that + * extracts the value from the element returned by that selector. */ -const fallbackExtractionData = { - 'amazon.com_www.amazon.com_smile.amazon.com': { - title: [ - ['#productTitle', fromProperty('innerText')], - ['.product-title', fromProperty('innerText')], - ], - price: [ - ['#priceblock_dealprice', inUnits(fromProperty('innerText'))], - ['#priceblock_ourprice', inUnits(fromProperty('innerText'))], - ['#price_inside_buybox', inUnits(fromProperty('innerText'))], - ['#buybox .a-color-price', inUnits(fromProperty('innerText'))], - ['input[name="displayedPrice"]', inUnits(fromAttribute('value'))], - ['.a-size-large.a-color-price.guild_priceblock_ourprice', inUnits(fromProperty('innerText'))], - ['.a-color-price.a-size-medium.a-align-bottom', inUnits(fromProperty('innerText'))], - ['.display-price', inUnits(fromProperty('innerText'))], - ['.offer-price', inUnits(fromProperty('innerText'))], - ], - image: [ - ['#landingImage', fromProperty('src')], - ['#imgBlkFront', fromProperty('src')], - ['#ebooksImgBlkFront', fromProperty('src')], - ], +const fallbackExtractionData = [ + { + domains: ['amazon.com', 'www.amazon.com', 'smile.amazon.com'], + features: { + title: [ + ['#productTitle', fromProperty('innerText')], + ['.product-title', fromProperty('innerText')], + ], + price: [ + ['#priceblock_dealprice', inUnits(fromProperty('innerText'))], + ['#priceblock_ourprice', inUnits(fromProperty('innerText'))], + ['#price_inside_buybox', inUnits(fromProperty('innerText'))], + ['#buybox .a-color-price', inUnits(fromProperty('innerText'))], + ['input[name="displayedPrice"]', inUnits(fromAttribute('value'))], + ['.a-size-large.a-color-price.guild_priceblock_ourprice', inUnits(fromProperty('innerText'))], + ['.a-color-price.a-size-medium.a-align-bottom', inUnits(fromProperty('innerText'))], + ['.display-price', inUnits(fromProperty('innerText'))], + ['.offer-price', inUnits(fromProperty('innerText'))], + ], + image: [ + ['#landingImage', fromProperty('src')], + ['#imgBlkFront', fromProperty('src')], + ['#ebooksImgBlkFront', fromProperty('src')], + ], + }, }, - 'bestbuy.com_www.bestbuy.com': { - title: [ - ['.sku-title h1', fromProperty('innerText')], - ], - price: [ - ['.priceView-hero-price.priceView-purchase-price', inUnits(fromProperty('innerText'))], - ], - image: [ - ['img.primary-image', fromProperty('src')], - ], + { + domains: ['bestbuy.com', 'www.bestbuy.com'], + features: { + title: [ + ['.sku-title h1', fromProperty('innerText')], + ], + price: [ + ['.priceView-hero-price.priceView-purchase-price', inUnits(fromProperty('innerText'))], + ], + image: [ + ['img.primary-image', fromProperty('src')], + ], + }, }, - 'ebay.com_www.ebay.com': { - title: [ - ['#itemTitle', fromProperty('innerText')], - ['.product-title', fromProperty('innerText')], - ], - price: [ - ['#prcIsum', inUnits(fromProperty('innerText'))], - ['#orgPrc', inUnits(fromProperty('innerText'))], - ['#mm-saleDscPrc', inUnits(fromProperty('innerText'))], - ['.display-price', inUnits(fromProperty('innerText'))], - ], - image: [ - ['#icImg', fromProperty('src')], - ['.vi-image-gallery__image.vi-image-gallery__image--absolute-center', fromProperty('src')], - ], + { + domains: ['ebay.com', 'www.ebay.com'], + features: { + title: [ + ['#itemTitle', fromProperty('innerText')], + ['.product-title', fromProperty('innerText')], + ], + price: [ + ['#prcIsum', inUnits(fromProperty('innerText'))], + ['#orgPrc', inUnits(fromProperty('innerText'))], + ['#mm-saleDscPrc', inUnits(fromProperty('innerText'))], + ['.display-price', inUnits(fromProperty('innerText'))], + ], + image: [ + ['#icImg', fromProperty('src')], + ['.vi-image-gallery__image.vi-image-gallery__image--absolute-center', fromProperty('src')], + ], + }, }, - 'homedepot.com_www.homedepot.com': { - title: [ - ['h1.product-title__title', fromProperty('innerText')], - ], - price: [ - ['#ajaxPrice', inUnits(fromAttribute('content'))], - ['#ajaxPriceAlt', inUnits(fromProperty('innerText'))], - ], - image: [ - ['#mainImage', fromProperty('src')], - ], + { + domains: ['homedepot.com', 'www.homedepot.com'], + features: { + title: [ + ['h1.product-title__title', fromProperty('innerText')], + ], + price: [ + ['#ajaxPrice', inUnits(fromAttribute('content'))], + ['#ajaxPriceAlt', inUnits(fromProperty('innerText'))], + ], + image: [ + ['#mainImage', fromProperty('src')], + ], + }, }, - 'walmart.com_www.walmart.com': { - title: [ - ['h1.prod-ProductTitle', fromAttribute('content')], - ['h1.prod-ProductTitle', fromProperty('innerText')], - ], - price: [ - ['.PriceRange.prod-PriceHero', inUnits(fromProperty('innerText'))], - ['.price-group', inUnits(fromAttribute('aria-label'))], - ['.price-group', inUnits(fromProperty('innerText'))], - ], - image: [ - ['.prod-hero-image-image', fromProperty('src')], - ['.prod-hero-image-carousel-image', fromProperty('src')], - ], + { + domains: ['walmart.com', 'www.walmart.com'], + features: { + title: [ + ['h1.prod-ProductTitle', fromAttribute('content')], + ['h1.prod-ProductTitle', fromProperty('innerText')], + ], + price: [ + ['.PriceRange.prod-PriceHero', inUnits(fromProperty('innerText'))], + ['.price-group', inUnits(fromAttribute('aria-label'))], + ['.price-group', inUnits(fromProperty('innerText'))], + ], + image: [ + ['.prod-hero-image-image', fromProperty('src')], + ['.prod-hero-image-carousel-image', fromProperty('src')], + ], + }, }, - 'mkelly.me_www.mkelly.me': { - title: [ - ['#title', fromProperty('innerText')], - ], - price: [ - ['#price', inUnits(fromProperty('innerText'))], - ], - image: [ - ['img', fromProperty('src')], - ], + { + domains: ['mkelly.me', 'www.mkelly.me'], + features: { + title: [ + ['#title', fromProperty('innerText')], + ], + price: [ + ['#price', inUnits(fromProperty('innerText'))], + ], + image: [ + ['img', fromProperty('src')], + ], + }, }, -}; +]; export default fallbackExtractionData; diff --git a/src/extraction/fathom_extraction.js b/src/extraction/fathom_extraction.js index 8ff6141..66f70f4 100644 --- a/src/extraction/fathom_extraction.js +++ b/src/extraction/fathom_extraction.js @@ -12,7 +12,7 @@ import defaultCoefficients from 'commerce/extraction/fathom_default_coefficients.json'; import RulesetFactory from 'commerce/extraction/ruleset_factory'; -import {getPriceInSubunits} from 'commerce/extraction/utils'; +import {parsePrice} from 'commerce/extraction/utils'; // Minimum score to be considered the "correct" feature element extracted by Fathom const SCORE_THRESHOLD = 4; @@ -42,7 +42,8 @@ const PRODUCT_FEATURES = { price: { ...FEATURE_DEFAULTS, getValueFromElement(element) { - return getPriceInSubunits(element); + const tokens = Array.from(element.childNodes).map(node => node.textContent); + return parsePrice(tokens); }, }, }; diff --git a/src/extraction/utils.js b/src/extraction/utils.js index 9ae3e12..8adb5e7 100644 --- a/src/extraction/utils.js +++ b/src/extraction/utils.js @@ -3,19 +3,25 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /** - * Converts a price element (from Fathom extraction) or string (from fallback extraction) into - * a numerical price value in subunits (like cents); e.g. $10.00 and "$10.00" both - * return 1000. If string parsing fails, returns NaN. - * @param {HTMLElement|string} price + * Converts an array of price tokens into a numerical price value in subunits. + * E.g. ["$10.00"] and ["$", "10", "00", "/each"] both return 1000. + * If string parsing fails, returns NaN. + * @param {Array.String} The price token strings extracted from the page * @returns {Number} the price in subunits */ -export function getPriceInSubunits(price) { - let priceUnits = []; - if (typeof price === 'string') { - priceUnits = getPriceUnitsFromStr(price); - } else { - priceUnits = getPriceUnitsFromArr(Array.from(price.childNodes)); - } +export function parsePrice(tokens) { + const priceUnits = ( + tokens + // Split tokens by $ and . to get the numbers between them + .flatMap(token => token.split(/[.$]/)) + // Filter out any tokens that do not contain a digit + .filter(token => /\d/g.test(token)) + // Remove any non-digit characters for each token in the list + .map(token => token.replace(/\D/g, '')) + // Convert price token strings to integers + .map(token => parseInt(token, 10)) + ); + // Convert units and subunits to a single integer value in subunits switch (priceUnits.length) { case 1: @@ -26,44 +32,3 @@ export function getPriceInSubunits(price) { return NaN; } } - -/** - * Extracts price units from textContent from text and/or DOM nodes - * @param {Array} Array of DOM nodes - * @returns {Array.Number} - */ -function getPriceUnitsFromArr(arr) { - return cleanPriceTokens(arr.flatMap(token => splitString(token.textContent))); -} - -/** - * Extracts price units from a string - * @param {String} - * @returns {Array.Number} - */ -function getPriceUnitsFromStr(str) { - return cleanPriceTokens(splitString(str)); -} - -/** - * Filters and cleans string tokens - * @param {Array.String} - * @returns {Array.Number} - */ -function cleanPriceTokens(tokens) { - // Filter out any tokens that do not contain a digit - const priceTokens = tokens.filter(token => /\d/g.test(token)); - - // Remove any non-digit characters for each token in the list - const cleanedPriceTokens = priceTokens.map(token => token.replace(/\D/g, '')); - - // Convert price token strings to integers - return cleanedPriceTokens.map(token => parseInt(token, 10)); -} - -/** - * Separates a string into an array of substrings using '$' and '.' as separators - */ -function splitString(str) { - return str.split(/[.$]/); -} diff --git a/src/utils.js b/src/utils.js index 9d05f34..f8c8469 100644 --- a/src/utils.js +++ b/src/utils.js @@ -51,101 +51,3 @@ export async function retry(callback, maxRetries = 5, delayFactor = 2, initialDe export function validatePropType(value, propType) { return checkPropTypes({value: propType}, {value}, 'prop', 'Validation'); } - -/** - * Returns true if the string contains a number. - */ -function hasNumber(string) { - return /\d/.test(string); -} - -/** - * Returns true if the string contains a dollar sign. - */ -function hasDollarSign(string) { - return /\$/.test(string); -} - -/** - * Get the main and sub unit elements for the product price. - * - * @returns {Object} A string:element object with 'mainUnit' and 'subUnit' keys. - */ -export function getPriceUnitElements(element) { - let isMainUnit = true; - const priceElements = {}; - // Loop through children: first element containing a digit is main unit, - // second is subunit. - for (const priceSubEle of element.children) { - if (hasNumber(priceSubEle.innerText)) { - if (isMainUnit) { - priceElements.mainUnit = priceSubEle; - isMainUnit = false; - } else { - priceElements.subUnit = priceSubEle; - } - } - } - return priceElements; -} - -/** - * Reformats price string to be of form "$NX.XX". - */ -export function cleanPriceString(priceStr) { - // Remove any commas - let cleanedPriceStr = priceStr.replace(/,/g, ''); - // Add a '$' at the beginning if not present; common for strings pulled from element attributes - if (!hasDollarSign) { - cleanedPriceStr = cleanedPriceStr.replace(/^/, '$'); - } - // Remove any characters preceding the '$' and following the '.XX' - cleanedPriceStr = cleanedPriceStr.substring(cleanedPriceStr.indexOf('$')); - cleanedPriceStr = cleanedPriceStr.substring(0, cleanedPriceStr.indexOf('.') + 3); - return cleanedPriceStr; -} - -/** - * Checks if a price object has subunits and returns a price string. - * - * @param {HTMLElement} - The element containing the price - * @param {String} extractUsing - The property/attribute to use to get the product price - */ -export function getPriceString(element, extractUsing) { - if (element.children.length > 0) { - const priceObj = getPriceUnitElements(element); - // Check for subunits e.g. dollars and cents. - if ('mainUnit' in priceObj) { - const mainUnitStr = priceObj.mainUnit.innerText; - // If no subunits, then main units contain subunits - const subUnitStr = priceObj.subUnit ? `.${priceObj.subUnit.innerText}` : ''; - const priceStr = `${mainUnitStr}${subUnitStr}`; - return cleanPriceString(hasDollarSign(priceStr) ? priceStr : `$${priceStr}`); - } - } - const priceStr = extractValueFromElement(element, extractUsing); - return cleanPriceString(priceStr); -} - -/** - * Extracts and returns the string value for a given element property or attribute. - * - * @param {HTMLElement} element - * @param {String} extractUsing - The property/attribute to use to get the product price - */ -export function extractValueFromElement(element, extractUsing) { - switch (extractUsing) { - case 'content': - return element.getAttribute('content'); - case 'innerText': - return element.innerText; - case 'src': - return element.src; - case 'value': - return element.getAttribute('value'); - case 'aria-label': - return element.getAttribute('aria-label'); - default: - throw new Error(`Unrecognized extraction property or attribute '${extractUsing}'.`); - } -}