From 2b69fb76ac463c36f82f37cedb5c004ec364654a Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 1 May 2024 12:28:33 +0200 Subject: [PATCH 1/2] [api-minor] Improve the `FileSpec` implementation - Check that the `filename` is actually a string, before parsing it further. - Use proper "shadowing" in the `filename` getter. - Add a bit more validation of the data in `pickPlatformItem`. - Last, but not least, return both the original `filename` and the (path stripped) variant needed in the display-layer and viewer. --- src/core/catalog.js | 4 ++-- src/core/file_spec.js | 31 ++++++++++++++++++++----------- src/display/annotation_layer.js | 11 ++++------- test/unit/annotation_spec.js | 9 ++++++--- test/unit/api_spec.js | 16 +++++++++------- web/pdf_attachment_viewer.js | 22 ++++++---------------- 6 files changed, 47 insertions(+), 46 deletions(-) diff --git a/src/core/catalog.js b/src/core/catalog.js index a30ccf337750c..f2089ebcce013 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -1619,8 +1619,8 @@ class Catalog { /* xref = */ null, /* skipContent = */ true ); - const { filename } = fs.serializable; - url = filename; + const { rawFilename } = fs.serializable; + url = rawFilename; } else if (typeof urlDict === "string") { url = urlDict; } diff --git a/src/core/file_spec.js b/src/core/file_spec.js index 11ddb38e452b6..003ae64f0c90a 100644 --- a/src/core/file_spec.js +++ b/src/core/file_spec.js @@ -18,6 +18,9 @@ import { BaseStream } from "./base_stream.js"; import { Dict } from "./primitives.js"; function pickPlatformItem(dict) { + if (!(dict instanceof Dict)) { + return null; + } // Look for the filename in this order: // UF, F, Unix, Mac, DOS if (dict.has("UF")) { @@ -34,6 +37,10 @@ function pickPlatformItem(dict) { return null; } +function stripPath(str) { + return str.substring(str.lastIndexOf("/") + 1); +} + /** * "A PDF file can refer to the contents of another file by using a File * Specification (PDF 1.1)", see the spec (7.11) for more details. @@ -66,26 +73,27 @@ class FileSpec { } get filename() { - if (!this._filename && this.root) { - const filename = pickPlatformItem(this.root) || "unnamed"; - this._filename = stringToPDFString(filename) + let filename = ""; + + const item = pickPlatformItem(this.root); + if (item && typeof item === "string") { + filename = stringToPDFString(item) .replaceAll("\\\\", "\\") .replaceAll("\\/", "/") .replaceAll("\\", "/"); } - return this._filename; + return shadow(this, "filename", filename || "unnamed"); } get content() { if (!this.#contentAvailable) { return null; } - if (!this.contentRef && this.root) { - this.contentRef = pickPlatformItem(this.root.get("EF")); - } + this._contentRef ||= pickPlatformItem(this.root?.get("EF")); + let content = null; - if (this.contentRef) { - const fileObj = this.xref.fetchIfRef(this.contentRef); + if (this._contentRef) { + const fileObj = this.xref.fetchIfRef(this._contentRef); if (fileObj instanceof BaseStream) { content = fileObj.getBytes(); } else { @@ -94,7 +102,7 @@ class FileSpec { ); } } else { - warn("Embedded file specification does not have a content"); + warn("Embedded file specification does not have any content"); } return content; } @@ -111,7 +119,8 @@ class FileSpec { get serializable() { return { - filename: this.filename, + rawFilename: this.filename, + filename: stripPath(this.filename), content: this.content, description: this.description, }; diff --git a/src/display/annotation_layer.js b/src/display/annotation_layer.js index 6be8d71e9da90..5245ecbc7714f 100644 --- a/src/display/annotation_layer.js +++ b/src/display/annotation_layer.js @@ -37,7 +37,6 @@ import { } from "../shared/util.js"; import { DOMSVGFactory, - getFilenameFromUrl, PDFDateString, setLayerDimensions, } from "./display_utils.js"; @@ -2859,15 +2858,13 @@ class FileAttachmentAnnotationElement extends AnnotationElement { constructor(parameters) { super(parameters, { isRenderable: true }); - const { filename, content, description } = this.data.file; - this.filename = getFilenameFromUrl(filename, /* onlyStripPath = */ true); - this.content = content; + const { file } = this.data; + this.filename = file.filename; + this.content = file.content; this.linkService.eventBus?.dispatch("fileattachmentannotation", { source: this, - filename, - content, - description, + ...file, }); } diff --git a/test/unit/annotation_spec.js b/test/unit/annotation_spec.js index f118eae3d5e5e..78d065fd07136 100644 --- a/test/unit/annotation_spec.js +++ b/test/unit/annotation_spec.js @@ -4033,9 +4033,12 @@ describe("annotation", function () { idFactoryMock ); expect(data.annotationType).toEqual(AnnotationType.FILEATTACHMENT); - expect(data.file.filename).toEqual("Test.txt"); - expect(data.file.content).toEqual(stringToBytes("Test attachment")); - expect(data.file.description).toEqual("abc"); + expect(data.file).toEqual({ + rawFilename: "Test.txt", + filename: "Test.txt", + content: stringToBytes("Test attachment"), + description: "abc", + }); }); }); diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 6a0c034a204b3..04646e89324cc 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1475,12 +1475,12 @@ describe("api", function () { const pdfDoc = await loadingTask.promise; const attachments = await pdfDoc.getAttachments(); - const { filename, content, description } = attachments["foo.txt"]; - expect(filename).toEqual("foo.txt"); - expect(content).toEqual( - new Uint8Array([98, 97, 114, 32, 98, 97, 122, 32, 10]) - ); - expect(description).toEqual(""); + expect(attachments["foo.txt"]).toEqual({ + rawFilename: "foo.txt", + filename: "foo.txt", + content: new Uint8Array([98, 97, 114, 32, 98, 97, 122, 32, 10]), + description: "", + }); await loadingTask.destroy(); }); @@ -1490,7 +1490,9 @@ describe("api", function () { const pdfDoc = await loadingTask.promise; const attachments = await pdfDoc.getAttachments(); - const { filename, content, description } = attachments["empty.pdf"]; + const { rawFilename, filename, content, description } = + attachments["empty.pdf"]; + expect(rawFilename).toEqual("Empty page.pdf"); expect(filename).toEqual("Empty page.pdf"); expect(content instanceof Uint8Array).toEqual(true); expect(content.length).toEqual(2357); diff --git a/web/pdf_attachment_viewer.js b/web/pdf_attachment_viewer.js index eb0cfd9e612f4..d80270466a3dc 100644 --- a/web/pdf_attachment_viewer.js +++ b/web/pdf_attachment_viewer.js @@ -18,7 +18,6 @@ /** @typedef {import("./download_manager.js").DownloadManager} DownloadManager */ import { BaseTreeViewer } from "./base_tree_viewer.js"; -import { getFilenameFromUrl } from "pdfjs-lib"; import { waitOnEventOrTimeout } from "./event_utils.js"; /** @@ -122,19 +121,13 @@ class PDFAttachmentViewer extends BaseTreeViewer { let attachmentsCount = 0; for (const name in attachments) { const item = attachments[name]; - const content = item.content, - description = item.description, - filename = getFilenameFromUrl( - item.filename, - /* onlyStripPath = */ true - ); const div = document.createElement("div"); div.className = "treeItem"; const element = document.createElement("a"); - this._bindLink(element, { content, description, filename }); - element.textContent = this._normalizeTextContent(filename); + this._bindLink(element, item); + element.textContent = this._normalizeTextContent(item.filename); div.append(element); @@ -148,7 +141,7 @@ class PDFAttachmentViewer extends BaseTreeViewer { /** * Used to append FileAttachment annotations to the sidebar. */ - #appendAttachment({ filename, content, description }) { + #appendAttachment(item) { const renderedPromise = this._renderedCapability.promise; renderedPromise.then(() => { @@ -158,15 +151,12 @@ class PDFAttachmentViewer extends BaseTreeViewer { const attachments = this._attachments || Object.create(null); for (const name in attachments) { - if (filename === name) { + if (item.filename === name) { return; // Ignore the new attachment if it already exists. } } - attachments[filename] = { - filename, - content, - description, - }; + attachments[item.filename] = item; + this.render({ attachments, keepRenderedCapability: true, From a790f2df5d58b22dd1380daf895ddf1c36c218c6 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 3 May 2024 08:29:41 +0200 Subject: [PATCH 2/2] [api-minor] Remove the unused `onlyStripPath` option from the `getFilenameFromUrl` helper function --- src/display/display_utils.js | 7 ++----- test/unit/display_utils_spec.js | 7 ------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/display/display_utils.js b/src/display/display_utils.js index 67213232306ce..adf9cbdea9d72 100644 --- a/src/display/display_utils.js +++ b/src/display/display_utils.js @@ -803,13 +803,10 @@ function isPdfFile(filename) { /** * Gets the filename from a given URL. * @param {string} url - * @param {boolean} [onlyStripPath] * @returns {string} */ -function getFilenameFromUrl(url, onlyStripPath = false) { - if (!onlyStripPath) { - [url] = url.split(/[#?]/, 1); - } +function getFilenameFromUrl(url) { + [url] = url.split(/[#?]/, 1); return url.substring(url.lastIndexOf("/") + 1); } diff --git a/test/unit/display_utils_spec.js b/test/unit/display_utils_spec.js index 374f4b24f2db2..1d684bfd7b9de 100644 --- a/test/unit/display_utils_spec.js +++ b/test/unit/display_utils_spec.js @@ -189,13 +189,6 @@ describe("display_utils", function () { const url = "https://server.org/filename.pdf?foo=bar"; expect(getFilenameFromUrl(url)).toEqual("filename.pdf"); }); - - it("should get the filename from a relative URL, keeping the anchor", function () { - const url = "../../part1#part2.pdf"; - expect(getFilenameFromUrl(url, /* onlyStripPath = */ true)).toEqual( - "part1#part2.pdf" - ); - }); }); describe("getPdfFilenameFromUrl", function () {