From 0bca7d982a28862e66c296e1509a2890dbccf9e3 Mon Sep 17 00:00:00 2001 From: Bianca Danforth Date: Thu, 1 Nov 2018 11:02:17 -0700 Subject: [PATCH] Fix #158: Add 'attempt_extraction' and 'complete_extraction' probes * Update 'method' extra key for 'complete_extraction' event to have values of 'fathom', 'css_selectors', 'open_graph' or 'none' to distinguish between the two fallback extraction methods to Fathom: CSS Selectors or Open Graph attributes. * Note: Since none of our five supported sites use Open Graph attributes currently for product information, we should not expect to see any successful extraction using that method for the MVP. * Important caveats for making conclusions using these probes as-is: * The coverage values that these probes will suggest will not be very accurate initially until we ensure we are extracting on only **product pages** for a supported site, rather than any page on the site (see Issues #225 and #181). * Successful extraction does not mean that the information extracted for the product is correct. It only means that _a_ value was extracted for each product feature (i.e. title, image and price) on the page. --- docs/METRICS.md | 2 +- src/extraction/index.js | 42 ++++++++++++++++++++++++++++------------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/docs/METRICS.md b/docs/METRICS.md index f241d6a..c038c08 100644 --- a/docs/METRICS.md +++ b/docs/METRICS.md @@ -140,7 +140,7 @@ Below is a sample ping for the `badge_toolbar_button` and `visit_supported_site` - `'badge_type'`: Indicates what, if any, badge was present on the browserAction toolbar button. One of 'add', 'price_alert', or 'none'. A value of 'unknown' is possible if the badge text is unrecognized. - `'extraction_id'`: A unique identifier to associate an extraction attempt to an extraction completion event for a given page. - `'is_bg_update'`: 'true' if the extraction is associated with a background price check; otherwise 'false'. -- `method`: The extraction method that was successful, if any. One of: 'fathom', 'fallback' or 'neither'. A value of 'neither' means that extraction failed. +- `method`: The extraction method that was successful, if any. One of: 'fathom', 'css_selectors', 'open_graph' or 'none'. A value of 'none' means that all extraction methods failed. - `'price'`: The price of the product in subunits (e.g. a $10.00 product would have a value of `'1000'`). For the MVP, the units here are always cents (USD currency only). - `'price_alert'`: 'true' if the product has an active Price Alert; otherwise 'false'. - `'price_last_high'`: The last high price of the product in subunits (e.g. a $10.00 product would have a value of `'1000'`). For the MVP, the units here are always cents (USD currency only). diff --git a/src/extraction/index.js b/src/extraction/index.js index f5dbff5..dc64165 100644 --- a/src/extraction/index.js +++ b/src/extraction/index.js @@ -8,9 +8,11 @@ * been parsed but before all resources have been loaded. */ +import uuidv4 from 'uuid/v4'; + import config from 'commerce/config/content'; import extractProductWithFathom from 'commerce/extraction/fathom'; -import extractProductWithFallback from 'commerce/extraction/selector'; +import extractProductWithCSSSelectors from 'commerce/extraction/selector'; import extractProductWithOpenGraph from 'commerce/extraction/open_graph'; import {shouldExtract} from 'commerce/privacy'; import recordEvent from 'commerce/telemetry/content'; @@ -20,25 +22,39 @@ import recordEvent from 'commerce/telemetry/content'; * return either a valid ExtractedProduct, or null if a valid product could not * be found. */ -const EXTRACTION_METHODS = [ - extractProductWithFathom, - extractProductWithFallback, - extractProductWithOpenGraph, -]; +const EXTRACTION_METHODS = { + fathom: extractProductWithFathom, + css_selectors: extractProductWithCSSSelectors, + open_graph: extractProductWithOpenGraph, +}; /** * Perform product extraction, trying each method from EXTRACTION_METHODS in * order until one of them returns a truthy result. * @return {ExtractedProduct|null} */ -function extractProduct() { - for (const extract of EXTRACTION_METHODS) { +function extractProduct(isBackgroundUpdate) { + const baseExtra = { + extraction_id: uuidv4(), + is_bg_update: isBackgroundUpdate, + }; + recordEvent('attempt_extraction', 'product_page', null, { + ...baseExtra, + }); + for (const [method, extract] of Object.entries(EXTRACTION_METHODS)) { const extractedProduct = extract(window.document); if (extractedProduct) { + recordEvent('complete_extraction', 'product_page', null, { + ...baseExtra, + method, + }); return extractedProduct; } } - + recordEvent('complete_extraction', 'product_page', null, { + ...baseExtra, + method: 'none', + }); return null; } @@ -58,8 +74,8 @@ async function sendProductToBackground(extractedProduct, sendTelemetry) { * Checks to see if any product information for the page was found, * and if so, sends it to the background script. */ -async function attemptExtraction() { - const extractedProduct = extractProduct(); +async function attemptExtraction(isBackgroundUpdate) { + const extractedProduct = extractProduct(isBackgroundUpdate); if (extractedProduct) { await sendProductToBackground(extractedProduct); } @@ -105,9 +121,9 @@ async function attemptExtraction() { } // Extract immediately, and again if the readyState changes. - let extractedProduct = await attemptExtraction(); + let extractedProduct = await attemptExtraction(isBackgroundUpdate); document.addEventListener('readystatechange', async () => { - extractedProduct = await attemptExtraction(); + extractedProduct = await attemptExtraction(isBackgroundUpdate); }); // Messy workaround for bug 1493470: Resend product info to the background