This repository has been archived by the owner on Dec 3, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #143 from mozilla/robust-extraction
Fix #29: Attempt extraction after parsing is finished, before loading.
- Loading branch information
Showing
11 changed files
with
146 additions
and
97 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
/* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | ||
|
||
/** | ||
* Content script injected into tabs to attempt extracting information about a | ||
* product from the webpage. Set to run at "document_end" after the page has | ||
* been parsed but before all resources have been loaded. | ||
*/ | ||
|
||
import config from 'commerce/config/content'; | ||
import extractProductWithFathom from 'commerce/extraction/fathom'; | ||
import extractProductWithFallback from 'commerce/extraction/selector'; | ||
import extractProductWithOpenGraph from 'commerce/extraction/open_graph'; | ||
|
||
/** | ||
* Extraction methods are given the document object for the page, and must | ||
* return either a valid ExtractedProduct, or null if a valid product could not | ||
* be found. | ||
*/ | ||
const EXTRACTION_METHODS = [ | ||
extractProductWithFathom, | ||
extractProductWithFallback, | ||
extractProductWithOpenGraph, | ||
]; | ||
|
||
/** | ||
* Perform product extraction, trying each method from EXTRACTION_METHODS in | ||
* order until one of them returns a truthy result. | ||
* @return {ExtractedProduct|null} | ||
*/ | ||
function extractProduct() { | ||
for (const extract of EXTRACTION_METHODS) { | ||
const extractedProduct = extract(window.document); | ||
if (extractedProduct) { | ||
return extractedProduct; | ||
} | ||
} | ||
|
||
return null; | ||
} | ||
|
||
/** | ||
* Checks to see if any product information for the page was found, | ||
* and if so, sends it to the background script. | ||
*/ | ||
async function attemptExtraction() { | ||
const extractedProduct = extractProduct(); | ||
if (extractedProduct) { | ||
await browser.runtime.sendMessage({ | ||
from: 'content', | ||
subject: 'ready', | ||
extractedProduct: { | ||
...extractedProduct, | ||
url: document.location.href, | ||
date: (new Date()).toISOString(), | ||
}, | ||
}); | ||
} | ||
} | ||
|
||
(async function main() { | ||
// If we're in an iframe, don't bother extracting a product EXCEPT if we were | ||
// started by the background script for a price check. | ||
const isInIframe = window !== window.top; | ||
const isBackgroundUpdate = window.location.hash === '#moz-commerce-background'; | ||
if (isInIframe && !isBackgroundUpdate) { | ||
return; | ||
} | ||
|
||
// Only perform extraction on allowlisted sites. Background updates get a | ||
// pass; we don't want to accidentally freeze updates for products that are | ||
// being tracked no matter what. | ||
const url = new URL(document.location.href); | ||
const allowList = await config.get('extractionAllowlist'); | ||
const allowAll = allowList.length === 1 && allowList[0] === '*'; | ||
if (!allowAll && !isBackgroundUpdate && !allowList.includes(url.host)) { | ||
return; | ||
} | ||
|
||
// Extract immediately, and again if the readyState changes. | ||
attemptExtraction(); | ||
document.addEventListener('readystatechange', () => { | ||
attemptExtraction(); | ||
}); | ||
}()); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
/* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | ||
|
||
/** | ||
* Product extraction via Open Graph tags. | ||
*/ | ||
|
||
const OPEN_GRAPH_PROPERTY_VALUES = { | ||
title: 'og:title', | ||
image: 'og:image', | ||
price: 'og:price:amount', | ||
}; | ||
|
||
/** | ||
* Returns any product information available on the page from Open Graph <meta> | ||
* tags. | ||
*/ | ||
export default function extractProduct() { | ||
const extractedProduct = {}; | ||
for (const [feature, propertyValue] of Object.entries(OPEN_GRAPH_PROPERTY_VALUES)) { | ||
const metaEle = document.querySelector(`meta[property='${propertyValue}']`); | ||
|
||
// Fail early if any required tags aren't found. | ||
if (!metaEle) { | ||
return null; | ||
} | ||
|
||
extractedProduct[feature] = metaEle.getAttribute('content'); | ||
} | ||
|
||
return extractedProduct; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters