From 92263c6a580b58d3102a93e38530617a05b656af Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Wed, 24 Jul 2024 18:55:31 +0200 Subject: [PATCH] [Editor] Correctly save a non-ascii alt text --- src/core/struct_tree.js | 15 +++++++++---- test/unit/api_spec.js | 47 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/src/core/struct_tree.js b/src/core/struct_tree.js index 9f4f552b0f3e64..1461a9bd8e6fc2 100644 --- a/src/core/struct_tree.js +++ b/src/core/struct_tree.js @@ -15,6 +15,7 @@ import { AnnotationPrefix, stringToPDFString, warn } from "../shared/util.js"; import { Dict, isName, Name, Ref, RefSetCache } from "./primitives.js"; +import { isAscii, stringToUTF16String } from "./core_utils.js"; import { NumberTree } from "./name_number_tree.js"; import { writeObject } from "./writer.js"; @@ -281,6 +282,12 @@ class StructTreeRoot { } } + static convertString(str) { + return isAscii(str) + ? str + : stringToUTF16String(str, /* bigEndian = */ true); + } + static async #writeKids({ newAnnotationsByPage, structTreeRootRef, @@ -316,19 +323,19 @@ class StructTreeRoot { tagDict.set("S", Name.get(type)); if (title) { - tagDict.set("T", title); + tagDict.set("T", this.convertString(title)); } if (lang) { tagDict.set("Lang", lang); } if (alt) { - tagDict.set("Alt", alt); + tagDict.set("Alt", this.convertString(alt)); } if (expanded) { - tagDict.set("E", expanded); + tagDict.set("E", this.convertString(expanded)); } if (actualText) { - tagDict.set("ActualText", actualText); + tagDict.set("ActualText", this.convertString(actualText)); } await this.#updateParentTag({ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 680ec6a9eb080a..6d6a2b0bdf5bfb 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -2524,6 +2524,21 @@ describe("api", function () { alt: "Hello World", }, }); + // Test if an alt-text using utf-16 is correctly handled. + // The Mahjong tile code is 0x1F000. + pdfDoc.annotationStorage.setValue("pdfjs_internal_editor_1", { + annotationType: AnnotationEditorType.STAMP, + rect: [128, 400, 148, 420], + rotation: 0, + bitmap: structuredClone(bitmap), + bitmapId: "im2", + pageIndex: 0, + structTreeParentId: "p3R_mc14", + accessibilityData: { + type: "Figure", + alt: "Γειά σου with a Mahjong tile 🀀", + }, + }); const data = await pdfDoc.saveDocument(); await loadingTask.destroy(); @@ -2532,7 +2547,7 @@ describe("api", function () { pdfDoc = await loadingTask.promise; const page = await pdfDoc.getPage(1); const tree = await page.getStructTree(); - const [predecessor, leaf] = findNode( + let [predecessor, leaf] = findNode( null, tree, 0, @@ -2560,6 +2575,36 @@ describe("api", function () { alt: "Hello World", }); + let count = 0; + [predecessor, leaf] = findNode(null, tree, 0, node => { + if (node.role === "Figure") { + count += 1; + return count === 2; + } + return false; + }); + + expect(predecessor).toEqual({ + role: "Span", + children: [ + { + type: "content", + id: "p3R_mc14", + }, + ], + }); + + expect(leaf).toEqual({ + role: "Figure", + children: [ + { + type: "annotation", + id: "pdfjs_internal_id_481R", + }, + ], + alt: "Γειά σου with a Mahjong tile 🀀", + }); + await loadingTask.destroy(); });