-
Notifications
You must be signed in to change notification settings - Fork 15
Fix #29: Attempt extraction after parsing is finished, before loading. #143
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
/* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | ||
|
||
/** | ||
* Content script injected into tabs to attempt extracting information about a | ||
* product from the webpage. Set to run at "document_end" after the page has | ||
* been parsed but before all resources have been loaded. | ||
*/ | ||
|
||
import config from 'commerce/config/content'; | ||
import extractProductWithFathom from 'commerce/extraction/fathom'; | ||
import extractProductWithFallback from 'commerce/extraction/selector'; | ||
import extractProductWithOpenGraph from 'commerce/extraction/open_graph'; | ||
|
||
/** | ||
* Extraction methods are given the document object for the page, and must | ||
* return either a valid ExtractedProduct, or null if a valid product could not | ||
* be found. | ||
*/ | ||
const EXTRACTION_METHODS = [ | ||
extractProductWithFathom, | ||
extractProductWithFallback, | ||
extractProductWithOpenGraph, | ||
]; | ||
|
||
/** | ||
* Perform product extraction, trying each method from EXTRACTION_METHODS in | ||
* order until one of them returns a truthy result. | ||
* @return {ExtractedProduct|null} | ||
*/ | ||
function extractProduct() { | ||
for (const extract of EXTRACTION_METHODS) { | ||
const extractedProduct = extract(window.document); | ||
if (extractedProduct) { | ||
return extractedProduct; | ||
} | ||
} | ||
|
||
return null; | ||
} | ||
|
||
/** | ||
* Checks to see if any product information for the page was found, | ||
* and if so, sends it to the background script. | ||
*/ | ||
async function attemptExtraction() { | ||
const extractedProduct = extractProduct(); | ||
if (extractedProduct) { | ||
await browser.runtime.sendMessage({ | ||
from: 'content', | ||
subject: 'ready', | ||
extractedProduct: { | ||
...extractedProduct, | ||
url: document.location.href, | ||
date: (new Date()).toISOString(), | ||
}, | ||
}); | ||
} | ||
} | ||
|
||
(async function main() { | ||
// If we're in an iframe, don't bother extracting a product EXCEPT if we were | ||
// started by the background script for a price check. | ||
const isInIframe = window !== window.top; | ||
const isBackgroundUpdate = window.location.hash === '#moz-commerce-background'; | ||
if (isInIframe && !isBackgroundUpdate) { | ||
return; | ||
} | ||
|
||
// Only perform extraction on allowlisted sites. Background updates get a | ||
// pass; we don't want to accidentally freeze updates for products that are | ||
// being tracked no matter what. | ||
const url = new URL(document.location.href); | ||
const allowList = await config.get('extractionAllowlist'); | ||
const allowAll = allowList.length === 1 && allowList[0] === '*'; | ||
if (!allowAll && !isBackgroundUpdate && !allowList.includes(url.host)) { | ||
return; | ||
} | ||
|
||
// Extract immediately, and again if the readyState changes. | ||
attemptExtraction(); | ||
document.addEventListener('readystatechange', () => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We probably only want to add this listener and attempt extraction again if the first attempt failed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The idea was to perform a second extraction in case JavaScript has modified the page and changed the product info, to ensure we get the correct product info every time. I'm not sure how common this is, though, or how performing two extractions affects the user's experience. I think we should keep it, but can be convinced otherwise. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah hmm... so see if we can get something at all to put in the popup sooner, and then update with the second round of extraction... Perhaps we could add a scalar probe that increments if the results of the first to the second extraction are different and compare that to the number of extraction attempts? Might be a wishlist probe. What do you think about that idea @Osmose ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I wouldn't bother adding a probe like that initially. |
||
attemptExtraction(); | ||
}); | ||
}()); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
/* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | ||
|
||
/** | ||
* Product extraction via Open Graph tags. | ||
*/ | ||
|
||
const OPEN_GRAPH_PROPERTY_VALUES = { | ||
title: 'og:title', | ||
image: 'og:image', | ||
price: 'og:price:amount', | ||
}; | ||
|
||
/** | ||
* Returns any product information available on the page from Open Graph <meta> | ||
* tags. | ||
*/ | ||
export default function extractProduct() { | ||
const extractedProduct = {}; | ||
for (const [feature, propertyValue] of Object.entries(OPEN_GRAPH_PROPERTY_VALUES)) { | ||
const metaEle = document.querySelector(`meta[property='${propertyValue}']`); | ||
|
||
// Fail early if any required tags aren't found. | ||
if (!metaEle) { | ||
return null; | ||
} | ||
|
||
extractedProduct[feature] = metaEle.getAttribute('content'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a bug here, since the propType for |
||
} | ||
|
||
return extractedProduct; | ||
} |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fallback extraction does not currently return
null
if it doesn’t find anything, so this would be a good time to change that before adding this existence check. (My bad)We do also check for correct data types via
propTypes
further downstream, but it'd be nice to be consistent between the two extraction methods.