From f9fc4770804edfc404def28335cf8ad04e2e724e Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 31 Mar 2024 12:43:07 +0200 Subject: [PATCH 1/3] Improve the implementation of the `PDFDocument.fingerprints`-getter - Add explicit `length` validation of the /ID entries. Given the `EMPTY_FINGERPRINT` constant we're already *implicitly* assuming a particular length. - Move the constants into the `fingerprints`-getter, since they're not used anywhere else. - Replace the `hexString` helper function with the standard `Uint8Array.prototype.toHex` method; see https://github.com/tc39/proposal-arraybuffer-base64 --- src/core/document.js | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index 2dab34d062000..625c7f903de63 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -862,10 +862,6 @@ const STARTXREF_SIGNATURE = new Uint8Array([ ]); const ENDOBJ_SIGNATURE = new Uint8Array([0x65, 0x6e, 0x64, 0x6f, 0x62, 0x6a]); -const FINGERPRINT_FIRST_BYTES = 1024; -const EMPTY_FINGERPRINT = - "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; - function find(stream, signature, limit = 1024, backwards = false) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert(limit > 0, 'The "limit" must be a positive integer.'); @@ -1548,30 +1544,24 @@ class PDFDocument { } get fingerprints() { + const FINGERPRINT_FIRST_BYTES = 1024; + const EMPTY_FINGERPRINT = "\x00".repeat(16); + function validate(data) { return ( typeof data === "string" && - data.length > 0 && + data.length === 16 && data !== EMPTY_FINGERPRINT ); } - function hexString(hash) { - const buf = []; - for (const num of hash) { - const hex = num.toString(16); - buf.push(hex.padStart(2, "0")); - } - return buf.join(""); - } - - const idArray = this.xref.trailer.get("ID"); + const id = this.xref.trailer.get("ID"); let hashOriginal, hashModified; - if (Array.isArray(idArray) && validate(idArray[0])) { - hashOriginal = stringToBytes(idArray[0]); + if (Array.isArray(id) && validate(id[0])) { + hashOriginal = stringToBytes(id[0]); - if (idArray[1] !== idArray[0] && validate(idArray[1])) { - hashModified = stringToBytes(idArray[1]); + if (id[1] !== id[0] && validate(id[1])) { + hashModified = stringToBytes(id[1]); } } else { hashOriginal = calculateMD5( @@ -1582,8 +1572,8 @@ class PDFDocument { } return shadow(this, "fingerprints", [ - hexString(hashOriginal), - hashModified ? hexString(hashModified) : null, + hashOriginal.toHex(), + hashModified?.toHex() ?? null, ]); } From bfc645bab17c0544aee1466513200dfd396f2579 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 10 Apr 2024 12:45:22 +0200 Subject: [PATCH 2/3] Introduce some `Uint8Array.fromBase64` and `Uint8Array.prototype.toBase64` usage in the main code-base See https://github.com/tc39/proposal-arraybuffer-base64 --- src/core/xfa/template.js | 4 ++-- src/display/font_loader.js | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/core/xfa/template.js b/src/core/xfa/template.js index 8ec158ff888fe..454ceb5f0fca2 100644 --- a/src/core/xfa/template.js +++ b/src/core/xfa/template.js @@ -102,7 +102,7 @@ import { getStringOption, HTMLResult, } from "./utils.js"; -import { stringToBytes, Util, warn } from "../../shared/util.js"; +import { Util, warn } from "../../shared/util.js"; import { getMetrics } from "./fonts.js"; import { recoverJsURL } from "../core_utils.js"; import { searchNode } from "./som.js"; @@ -3427,7 +3427,7 @@ class Image extends StringObject { } if (!buffer && this.transferEncoding === "base64") { - buffer = stringToBytes(atob(this[$content])); + buffer = Uint8Array.fromBase64(this[$content]); } if (!buffer) { diff --git a/src/display/font_loader.js b/src/display/font_loader.js index 1014c536b67e0..4115a71eb5d79 100644 --- a/src/display/font_loader.js +++ b/src/display/font_loader.js @@ -15,7 +15,6 @@ import { assert, - bytesToString, FontRenderOps, isNodeJS, shadow, @@ -399,9 +398,8 @@ class FontFaceObject { if (!this.data || this.disableFontFace) { return null; } - const data = bytesToString(this.data); // Add the @font-face rule to the document. - const url = `url(data:${this.mimetype};base64,${btoa(data)});`; + const url = `url(data:${this.mimetype};base64,${this.data.toBase64()});`; let rule; if (!this.cssFontInfo) { rule = `@font-face {font-family:"${this.loadedName}";src:${url}}`; From 8f47d06d075f6c803f43f22fee8bdc3930c33b17 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 14 Oct 2024 14:26:11 +0200 Subject: [PATCH 3/3] Add helper functions to allow using new `Uint8Array` methods This allows using the new methods in browsers that support them, e.g. Firefox 133+, while still providing fallbacks where necessary; see https://github.com/tc39/proposal-arraybuffer-base64 *Please note:* These are not actual polyfills, but only implements what we need in the PDF.js code-base. Eventually this patch should be reverted, once support is generally available. --- src/core/document.js | 5 +++-- src/core/xfa/template.js | 4 ++-- src/display/font_loader.js | 3 ++- src/shared/util.js | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index 625c7f903de63..434cbac2a6325 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -26,6 +26,7 @@ import { stringToBytes, stringToPDFString, stringToUTF8String, + toHexUtil, unreachable, Util, warn, @@ -1572,8 +1573,8 @@ class PDFDocument { } return shadow(this, "fingerprints", [ - hashOriginal.toHex(), - hashModified?.toHex() ?? null, + toHexUtil(hashOriginal), + hashModified ? toHexUtil(hashModified) : null, ]); } diff --git a/src/core/xfa/template.js b/src/core/xfa/template.js index 454ceb5f0fca2..96d18930c1c53 100644 --- a/src/core/xfa/template.js +++ b/src/core/xfa/template.js @@ -90,6 +90,7 @@ import { XFAObject, XFAObjectArray, } from "./xfa_object.js"; +import { fromBase64Util, Util, warn } from "../../shared/util.js"; import { getBBox, getColor, @@ -102,7 +103,6 @@ import { getStringOption, HTMLResult, } from "./utils.js"; -import { Util, warn } from "../../shared/util.js"; import { getMetrics } from "./fonts.js"; import { recoverJsURL } from "../core_utils.js"; import { searchNode } from "./som.js"; @@ -3427,7 +3427,7 @@ class Image extends StringObject { } if (!buffer && this.transferEncoding === "base64") { - buffer = Uint8Array.fromBase64(this[$content]); + buffer = fromBase64Util(this[$content]); } if (!buffer) { diff --git a/src/display/font_loader.js b/src/display/font_loader.js index 4115a71eb5d79..1ec7160c8500a 100644 --- a/src/display/font_loader.js +++ b/src/display/font_loader.js @@ -19,6 +19,7 @@ import { isNodeJS, shadow, string32, + toBase64Util, unreachable, warn, } from "../shared/util.js"; @@ -399,7 +400,7 @@ class FontFaceObject { return null; } // Add the @font-face rule to the document. - const url = `url(data:${this.mimetype};base64,${this.data.toBase64()});`; + const url = `url(data:${this.mimetype};base64,${toBase64Util(this.data)});`; let rule; if (!this.cssFontInfo) { rule = `@font-face {font-family:"${this.loadedName}";src:${url}}`; diff --git a/src/shared/util.js b/src/shared/util.js index eccd578974b92..700f9f87104bb 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -1097,6 +1097,35 @@ const FontRenderOps = { TRANSLATE: 8, }; +// TODO: Remove this once `Uint8Array.prototype.toHex` is generally available. +function toHexUtil(arr) { + if (Uint8Array.prototype.toHex) { + return arr.toHex(); + } + const buf = []; + for (const num of arr) { + buf.push(num.toString(16).padStart(2, "0")); + } + return buf.join(""); +} + +// TODO: Remove this once `Uint8Array.prototype.toBase64` is generally +// available. +function toBase64Util(arr) { + if (Uint8Array.prototype.toBase64) { + return arr.toBase64(); + } + return btoa(bytesToString(arr)); +} + +// TODO: Remove this once `Uint8Array.fromBase64` is generally available. +function fromBase64Util(str) { + if (Uint8Array.fromBase64) { + return Uint8Array.fromBase64(str); + } + return stringToBytes(atob(str)); +} + export { AbortException, AnnotationActionEventType, @@ -1120,6 +1149,7 @@ export { FONT_IDENTITY_MATRIX, FontRenderOps, FormatError, + fromBase64Util, getModificationDate, getUuid, getVerbosityLevel, @@ -1149,6 +1179,8 @@ export { stringToPDFString, stringToUTF8String, TextRenderingMode, + toBase64Util, + toHexUtil, UnexpectedResponseException, UnknownErrorException, unreachable,