Skip to content

Commit

Permalink
Merge
Browse files Browse the repository at this point in the history
  • Loading branch information
NielsSteensma committed Mar 27, 2024
2 parents 305ae3b + e85312d commit 0b51aa8
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 92 deletions.
2 changes: 1 addition & 1 deletion lib/Dhalang.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ module Dhalang
require_relative 'PDF'
require_relative 'Screenshot'
require_relative 'Scraper'
require_relative 'Dhalang/node_script_invoker'
require_relative 'Dhalang/version'
require_relative 'Dhalang/url_utils'
require_relative 'Dhalang/file_utils'
require_relative 'Dhalang/error'
require_relative 'Dhalang/configuration'
require_relative 'Dhalang/node_script_invoker'
require 'uri'
require 'tempfile'
require 'shellwords'
Expand Down
162 changes: 81 additions & 81 deletions lib/Dhalang/configuration.rb
Original file line number Diff line number Diff line change
@@ -1,87 +1,87 @@
module Dhalang
# Groups Puppeteer and Dhalang configuration.
class Configuration
NODE_MODULES_PATH = Dir.pwd + '/node_modules/'.freeze
USER_OPTIONS = {
navigationTimeout: 10000,
printToPDFTimeout: 0, # unlimited
navigationWaitUntil: 'load',
navigationWaitForSelector: '',
navigationWaitForXPath: '',
userAgent: '',
isHeadless: true,
viewPort: '',
httpAuthenticationCredentials: '',
isAutoHeight: false,
chromeOptions: []
}.freeze
DEFAULT_PDF_OPTIONS = {
scale: 1,
displayHeaderFooter: false,
headerTemplate: '',
footerTemplate: '',
headerTemplateFile: '',
footerTemplateFile: '',
printBackground: true,
landscape: false,
pageRanges: '',
format: 'A4',
width: '',
height: '',
margin: { top: 36, right: 36, bottom: 20, left: 36 },
preferCSSPageSize: true,
omitBackground: false
}.freeze
DEFAULT_SCREENSHOT_OPTIONS = {
fullPage: true,
clip: nil,
omitBackground: false
}.freeze
DEFAULT_JPEG_OPTIONS = {
quality: 100
}.freeze
# Groups Puppeteer and Dhalang configuration.
class Configuration
NODE_MODULES_PATH = Dir.pwd + '/node_modules/'.freeze
USER_OPTIONS = {
navigationTimeout: 10000,
printToPDFTimeout: 0, # unlimited
navigationWaitUntil: 'load',
navigationWaitForSelector: '',
navigationWaitForXPath: '',
userAgent: '',
isHeadless: true,
viewPort: '',
httpAuthenticationCredentials: '',
isAutoHeight: false,
chromeOptions: []
}.freeze
DEFAULT_PDF_OPTIONS = {
scale: 1,
displayHeaderFooter: false,
headerTemplate: '',
footerTemplate: '',
headerTemplateFile: '',
footerTemplateFile: '',
printBackground: true,
landscape: false,
pageRanges: '',
format: 'A4',
width: '',
height: '',
margin: { top: 36, right: 36, bottom: 20, left: 36 },
preferCSSPageSize: true,
omitBackground: false
}.freeze
DEFAULT_SCREENSHOT_OPTIONS = {
fullPage: true,
clip: nil,
omitBackground: false
}.freeze
DEFAULT_JPEG_OPTIONS = {
quality: 100
}.freeze

private_constant :NODE_MODULES_PATH
private_constant :USER_OPTIONS
private_constant :DEFAULT_PDF_OPTIONS
private_constant :DEFAULT_SCREENSHOT_OPTIONS
private_constant :DEFAULT_JPEG_OPTIONS
private_constant :NODE_MODULES_PATH
private_constant :USER_OPTIONS
private_constant :DEFAULT_PDF_OPTIONS
private_constant :DEFAULT_SCREENSHOT_OPTIONS
private_constant :DEFAULT_JPEG_OPTIONS

private attr_accessor :page_url
private attr_accessor :temp_file_path
private attr_accessor :temp_file_extension
private attr_accessor :user_options
private attr_accessor :pdf_options
private attr_accessor :screenshot_options
private attr_accessor :jpeg_options
private attr_accessor :page_url
private attr_accessor :temp_file_path
private attr_accessor :temp_file_extension
private attr_accessor :user_options
private attr_accessor :pdf_options
private attr_accessor :screenshot_options
private attr_accessor :jpeg_options

# @param [Hash] options Custom options for overriding Puppeteer configuration.
# @param [String] page_url Url for Puppeteer to visit.
# @param [String] temp_file_path Absolute path of temp file to write results of scripts towards.
# Can be nil for scripts using stdout.
# @param [String] temp_file_extension Extension of temp file. Can be nil for scripts using stdout.
def initialize(options, page_url, temp_file_path = nil, temp_file_extension = nil)
self.page_url = page_url
self.temp_file_path = temp_file_path
self.temp_file_extension = temp_file_extension
self.user_options = USER_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value]}
self.pdf_options = DEFAULT_PDF_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value] }
self.screenshot_options = DEFAULT_SCREENSHOT_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value] }
self.jpeg_options = DEFAULT_JPEG_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value] }
end
# @param [Hash] custom_options Changes that override default.
# @param [String] page_url Url for Puppeteer to visit.
# @param [String] temp_file_path Absolute path of temp file to use for writing script results.
# Can be nil for scripts using stdout.
# @param [String] temp_file_extension Extension of temp file. Can be nil for scripts using stdout.
def initialize(custom_options, page_url, temp_file_path = nil, temp_file_extension = nil)
self.page_url = page_url
self.temp_file_path = temp_file_path
self.temp_file_extension = temp_file_extension
self.user_options = USER_OPTIONS.map { |key, default_value| [key, custom_options.fetch(key, default_value)] }
self.pdf_options = DEFAULT_PDF_OPTIONS.map { |key, default_value| [key, custom_options.fetch(key, default_value)] }
self.screenshot_options = DEFAULT_SCREENSHOT_OPTIONS.map { |key, default_value| [key, custom_options.fetch(key, default_value)] }
self.jpeg_options = DEFAULT_JPEG_OPTIONS.map { |key, default_value| [key, custom_options.fetch(key, default_value)] }
end

# Returns configuration as JSON string.
def json
return {
webPageUrl: page_url,
tempFilePath: temp_file_path,
puppeteerPath: NODE_MODULES_PATH,
imageType: temp_file_extension,
userOptions: user_options.to_h,
pdfOptions: pdf_options.to_h,
screenshotOptions: screenshot_options.to_h,
jpegOptions: jpeg_options.to_h
}.to_json
end
# Returns configuration as JSON string.
def json
return {
webPageUrl: page_url,
tempFilePath: temp_file_path,
puppeteerPath: NODE_MODULES_PATH,
imageType: temp_file_extension,
userOptions: user_options.to_h,
pdfOptions: pdf_options.to_h,
screenshotOptions: screenshot_options.to_h,
jpegOptions: jpeg_options.to_h
}.to_json
end
end
end
end
11 changes: 6 additions & 5 deletions lib/Dhalang/node_script_invoker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ class NodeScriptInvoker

# Executes JS script under given script_path by launching a new Node process.
#
# @param [String] script_path Absolute path of the JS script to execute.
# @param [Object] options Set of options to use, configurable by the user.
# @param [String] script_path Absolute path of JS script to execute.
# @param [Configuration] configuration Configuration to use.
def self.execute_script(script_path, configuration)
command = create_node_command(script_path, configuration)
Open3.popen2e(command) do |_stdin, stdouterr, wait|
Expand Down Expand Up @@ -37,12 +37,13 @@ def self.execute_script_and_read_stdout(script_path, configuration)
end
end

# Returns a [String] with the node command to invoke the provided script with the configuration.

# Returns a [String] with node command that invokes the provided script with the configuration.
#
# @param [String] script_path Absolute path of JS script to invoke.
# @param [Object] configuration JSON with options to use for Puppeteer.
# @param [Configuration] configuration Configuration to use.
private_class_method def self.create_node_command(script_path, configuration)
"node #{script_path} #{Shellwords.escape(configuration)}"
"node #{script_path} #{Shellwords.escape(configuration.json)}"
end
end
end
5 changes: 3 additions & 2 deletions lib/PDF.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,15 @@ def self.get_from_html(html, options = {})

# Groups and executes the logic for creating a PDF of a webpage.
#
# @param [String] url The url to create a PDF for.
# @param [Hash] options Set of options to use, passed by the user of this library.
#
# @return [String] The PDF that was created as binary.
private_class_method def self.get(url, options)
temp_file = FileUtils.create_temp_file("pdf")
begin
configuration = Configuration.new(url, temp_file.path, "pdf", options)
NodeScriptInvoker.execute_script(SCRIPT_PATH, configuration.json)
configuration = Configuration.new(options, url, temp_file.path, "pdf")
NodeScriptInvoker.execute_script(SCRIPT_PATH, configuration)
binary_pdf_content = FileUtils.read_binary(temp_file.path)
ensure
FileUtils.delete(temp_file)
Expand Down
6 changes: 3 additions & 3 deletions lib/Screenshot.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def self.get_from_url_as_png(url, options = {})
get_from_url(url, :png, options)
end

# Captures a screenshot of the webpage under the given url.
# Captures ascreenshot of the webpage under the given url.
#
# @param [String] url The url to take a screenshot of.
# @param [String] image_type The image type (JPEG/PNG/WEBP) to use for storing the screenshot.
Expand All @@ -43,9 +43,9 @@ def self.get_from_url(url, image_type, options = {})
validate_options(options)

temp_file = FileUtils.create_temp_file(image_type)
configuration = Configuration.new(url, temp_file.path, image_type, options)
begin
NodeScriptInvoker.execute_script(SCRIPT_PATH, configuration.json)
configuration = Configuration.new(options, url, temp_file.path, image_type)
NodeScriptInvoker.execute_script(SCRIPT_PATH, configuration)
binary_image_content = FileUtils.read_binary(temp_file.path)
ensure
FileUtils.delete(temp_file)
Expand Down

0 comments on commit 0b51aa8

Please sign in to comment.