From 5dde49ce459f546ead042b46c272c5bc44ab2c93 Mon Sep 17 00:00:00 2001 From: Niels Steensma Date: Sun, 11 Aug 2024 19:52:35 +0200 Subject: [PATCH] Added functionality to connect to remote Chromium instance --- README.md | 1 + lib/Dhalang/configuration.rb | 2 ++ lib/js/dhalang.js | 18 +++++++++++++----- lib/js/html-scraper.js | 7 +++++-- lib/js/pdf-generator.js | 7 +++++-- lib/js/screenshot-generator.js | 7 +++++-- 6 files changed, 31 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 523b762..b7be496 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ Dhalang::Screenshot.get_from_url("https://www.google.com", :jpeg, {navigationTim Below table lists all possible configuration parameters that can be set: | Key | Description | Default | |--------------------|-----------------------------------------------------------------------------------------|---------------------------------| +| browserWebsocketUrl | Websocket url of remote chromium browser to use | navigationTimeout | Amount of milliseconds until Puppeteer while timeout when navigating to the given page | 10000 | | printToPDFTimeout | Amount of milliseconds until Puppeteer while timeout when calling Page.printToPDF | 0 (unlimited) | | navigationWaitForSelector | If set, Dhalang will wait for the specified selector to appear before creating the screenshot or PDF | None | diff --git a/lib/Dhalang/configuration.rb b/lib/Dhalang/configuration.rb index 0630e6d..ec2b828 100644 --- a/lib/Dhalang/configuration.rb +++ b/lib/Dhalang/configuration.rb @@ -3,6 +3,7 @@ module Dhalang class Configuration NODE_MODULES_PATH = Dir.pwd + '/node_modules/'.freeze USER_OPTIONS = { + browserWebsocketUrl: '', navigationTimeout: 10000, printToPDFTimeout: 0, # unlimited navigationWaitUntil: 'load', @@ -48,6 +49,7 @@ class Configuration private_constant :DEFAULT_JPEG_OPTIONS private attr_accessor :page_url + private attr_accessor :browser_websocket_url private attr_accessor :temp_file_path private attr_accessor :temp_file_extension private attr_accessor :user_options diff --git a/lib/js/dhalang.js b/lib/js/dhalang.js index 5f600d1..372130d 100644 --- a/lib/js/dhalang.js +++ b/lib/js/dhalang.js @@ -14,6 +14,7 @@ const fs = require('fs') /** * @typedef {Object} UserOptions + * @property {string} browserWebsocketUrl - The websocket url of remote Chromium browser to use. * @property {number} navigationTimeout - Maximum in milliseconds until navigation times out, we use a default of 10 seconds as timeout. * @property {string} navigationWaitUntil - Determines when the navigation was finished, we wait here until the Window.load event is fired ( meaning all images, stylesheet, etc was loaded ). * @property {string} navigationWaitForSelector - If set, specifies the selector Puppeteer should wait for to appear before continuing. @@ -47,7 +48,7 @@ exports.getConfiguration = function () { /** * Launches Puppeteer and returns its instance. - * @param {UserOptions} configuration - The configuration to use. + * @param {Configuration} configuration - The configuration to use. * @returns {Promise} * The launched instance of Puppeteer. */ @@ -55,10 +56,17 @@ exports.launchPuppeteer = async function (configuration) { module.paths.push(configuration.puppeteerPath); const puppeteer = require('puppeteer'); const launchArgs = ['--no-sandbox', '--disable-setuid-sandbox'].concat(configuration.userOptions.chromeOptions).filter((item, index, self) => self.indexOf(item) === index); - return await puppeteer.launch({ - args: launchArgs, - headless: configuration.userOptions.isHeadless - }); + + if (configuration.userOptions['browserWebsocketUrl'] !== "") { + return await puppeteer.connect( { + "browserWSEndpoint": configuration.userOptions.browserWebsocketUrl + }) + } else { + return await puppeteer.launch({ + args: launchArgs, + headless: configuration.userOptions.isHeadless + }); + } } /** diff --git a/lib/js/html-scraper.js b/lib/js/html-scraper.js index b38ad72..6e307bb 100644 --- a/lib/js/html-scraper.js +++ b/lib/js/html-scraper.js @@ -6,9 +6,10 @@ const scrapeHtml = async () => { const configuration = dhalang.getConfiguration(); let browser; + let page; try { browser = await dhalang.launchPuppeteer(configuration); - const page = await browser.newPage(); + page = await browser.newPage(); await dhalang.configure(page, configuration.userOptions); await dhalang.navigate(page, configuration); const html = await page.content(); @@ -17,8 +18,10 @@ const scrapeHtml = async () => { console.error(error.message); process.exit(1); } finally { - if (browser) { + if (browser && configuration.userOptions['browserWebsocketUrl'] === "") { browser.close(); + } else { + page.close(); } process.exit(0); } diff --git a/lib/js/pdf-generator.js b/lib/js/pdf-generator.js index cd16edb..5283228 100644 --- a/lib/js/pdf-generator.js +++ b/lib/js/pdf-generator.js @@ -5,9 +5,10 @@ const createPdf = async () => { const configuration = dhalang.getConfiguration(); let browser; + let page; try { browser = await dhalang.launchPuppeteer(configuration); - const page = await browser.newPage(); + page = await browser.newPage(); await dhalang.configure(page, configuration.userOptions); await dhalang.navigate(page, configuration); const pdfOptions = await dhalang.getConfiguredPdfOptions(page, configuration); @@ -21,8 +22,10 @@ const createPdf = async () => { console.error(error.message); process.exit(1); } finally { - if (browser) { + if (browser && configuration.userOptions['browserWebsocketUrl'] === "") { browser.close(); + } else { + page.close(); } process.exit(); } diff --git a/lib/js/screenshot-generator.js b/lib/js/screenshot-generator.js index 3ed6aa1..81f7d34 100644 --- a/lib/js/screenshot-generator.js +++ b/lib/js/screenshot-generator.js @@ -5,9 +5,10 @@ const createScreenshot = async () => { const configuration = dhalang.getConfiguration(); let browser; + let page; try { browser = await dhalang.launchPuppeteer(configuration); - const page = await browser.newPage(); + page = await browser.newPage(); await dhalang.configure(page, configuration.userOptions); await dhalang.navigate(page, configuration); @@ -23,8 +24,10 @@ const createScreenshot = async () => { console.error(error.message); process.exit(1); } finally { - if (browser) { + if (browser && configuration.userOptions['browserWebsocketUrl'] === "") { browser.close(); + } else { + page.close(); } process.exit(); }