diff --git a/src/core/annotation.js b/src/core/annotation.js index 23fa2925268ae..353537110b1b5 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -23,26 +23,25 @@ import { AnnotationType, assert, BASELINE_FACTOR, - escapeString, FeatureTest, getModificationDate, IDENTITY_MATRIX, - isAscii, LINE_DESCENT_FACTOR, LINE_FACTOR, OPS, RenderingIntentFlag, shadow, stringToPDFString, - stringToUTF16BEString, unreachable, Util, warn, } from "../shared/util.js"; import { collectActions, + escapeString, getInheritableProperty, getRotationMatrix, + isAscii, numberToString, stringToUTF16String, } from "./core_utils.js"; @@ -1879,7 +1878,11 @@ class WidgetAnnotation extends Annotation { value, }; - const encoder = val => (isAscii(val) ? val : stringToUTF16BEString(val)); + const encoder = val => { + return isAscii(val) + ? val + : stringToUTF16String(val, /* bigEndian = */ true); + }; dict.set("V", Array.isArray(value) ? value.map(encoder) : encoder(value)); const maybeMK = this._getMKDict(rotation); @@ -3546,14 +3549,19 @@ class FreeTextAnnotation extends MarkupAnnotation { freetext.set("DA", da); freetext.set( "Contents", - isAscii(value) ? value : stringToUTF16BEString(value) + isAscii(value) + ? value + : stringToUTF16String(value, /* bigEndian = */ true) ); freetext.set("F", 4); freetext.set("Border", [0, 0, 0]); freetext.set("Rotate", rotation); if (user) { - freetext.set("T", isAscii(user) ? user : stringToUTF16BEString(user)); + freetext.set( + "T", + isAscii(user) ? user : stringToUTF16String(user, /* bigEndian = */ true) + ); } if (apRef || ap) { diff --git a/src/core/core_utils.js b/src/core/core_utils.js index f8ba471eee52d..a4c23b7c21114 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -313,6 +313,19 @@ function escapePDFName(str) { return buffer.join(""); } +// Replace "(", ")", "\n", "\r" and "\" by "\(", "\)", "\\n", "\\r" and "\\" +// in order to write it in a PDF file. +function escapeString(str) { + return str.replace(/([()\\\n\r])/g, match => { + if (match === "\n") { + return "\\n"; + } else if (match === "\r") { + return "\\r"; + } + return `\\${match}`; + }); +} + function _collectJS(entry, xref, list, parents) { if (!entry) { return; @@ -572,6 +585,10 @@ function getNewAnnotationsMap(annotationStorage) { return newAnnotationsByPage.size > 0 ? newAnnotationsByPage : null; } +function isAscii(str) { + return /^[\x00-\x7F]*$/.test(str); +} + function stringToUTF16HexString(str) { const buf = []; for (let i = 0, ii = str.length; i < ii; i++) { @@ -584,8 +601,11 @@ function stringToUTF16HexString(str) { return buf.join(""); } -function stringToUTF16String(str) { +function stringToUTF16String(str, bigEndian = false) { const buf = []; + if (bigEndian) { + buf.push("\xFE\xFF"); + } for (let i = 0, ii = str.length; i < ii; i++) { const char = str.charCodeAt(i); buf.push( @@ -614,11 +634,13 @@ export { DocStats, encodeToXmlString, escapePDFName, + escapeString, getArrayLookupTableFactory, getInheritableProperty, getLookupTableFactory, getNewAnnotationsMap, getRotationMatrix, + isAscii, isWhiteSpace, log2, MissingDataException, diff --git a/src/core/writer.js b/src/core/writer.js index 1de23297bb58b..51626f157c22e 100644 --- a/src/core/writer.js +++ b/src/core/writer.js @@ -13,9 +13,14 @@ * limitations under the License. */ -import { bytesToString, escapeString, warn } from "../shared/util.js"; +import { bytesToString, warn } from "../shared/util.js"; import { Dict, Name, Ref } from "./primitives.js"; -import { escapePDFName, numberToString, parseXFAPath } from "./core_utils.js"; +import { + escapePDFName, + escapeString, + numberToString, + parseXFAPath, +} from "./core_utils.js"; import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js"; import { BaseStream } from "./base_stream.js"; import { calculateMD5 } from "./crypto.js"; diff --git a/src/shared/util.js b/src/shared/util.js index e193268bcd360..6394e383c2c84 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -1037,36 +1037,6 @@ function stringToPDFString(str) { return strBuf.join(""); } -function escapeString(str) { - // replace "(", ")", "\n", "\r" and "\" - // by "\(", "\)", "\\n", "\\r" and "\\" - // in order to write it in a PDF file. - return str.replace(/([()\\\n\r])/g, match => { - if (match === "\n") { - return "\\n"; - } else if (match === "\r") { - return "\\r"; - } - return `\\${match}`; - }); -} - -function isAscii(str) { - return /^[\x00-\x7F]*$/.test(str); -} - -function stringToUTF16BEString(str) { - const buf = ["\xFE\xFF"]; - for (let i = 0, ii = str.length; i < ii; i++) { - const char = str.charCodeAt(i); - buf.push( - String.fromCharCode((char >> 8) & 0xff), - String.fromCharCode(char & 0xff) - ); - } - return buf.join(""); -} - function stringToUTF8String(str) { return decodeURIComponent(escape(str)); } @@ -1167,7 +1137,6 @@ export { createPromiseCapability, createValidAbsoluteUrl, DocumentActionEventType, - escapeString, FeatureTest, FONT_IDENTITY_MATRIX, FontType, @@ -1180,7 +1149,6 @@ export { InvalidPDFException, isArrayBuffer, isArrayEqual, - isAscii, LINE_DESCENT_FACTOR, LINE_FACTOR, MissingPDFException, @@ -1198,7 +1166,6 @@ export { string32, stringToBytes, stringToPDFString, - stringToUTF16BEString, stringToUTF8String, TextRenderingMode, UnexpectedResponseException, diff --git a/test/unit/core_utils_spec.js b/test/unit/core_utils_spec.js index cc5c9e69eebb5..1a8da5a7c392f 100644 --- a/test/unit/core_utils_spec.js +++ b/test/unit/core_utils_spec.js @@ -17,10 +17,14 @@ import { Dict, Ref } from "../../src/core/primitives.js"; import { encodeToXmlString, escapePDFName, + escapeString, getInheritableProperty, + isAscii, isWhiteSpace, log2, parseXFAPath, + stringToUTF16HexString, + stringToUTF16String, toRomanNumerals, validateCSSFont, } from "../../src/core/core_utils.js"; @@ -221,6 +225,14 @@ describe("core_utils", function () { }); }); + describe("escapeString", function () { + it("should escape (, ), \\n, \\r, and \\", function () { + expect(escapeString("((a\\a))\n(b(b\\b)\rb)")).toEqual( + "\\(\\(a\\\\a\\)\\)\\n\\(b\\(b\\\\b\\)\\rb\\)" + ); + }); + }); + describe("encodeToXmlString", function () { it("should get a correctly encoded string with some entities", function () { const str = "\"\u0397ell😂' & "; @@ -333,4 +345,50 @@ describe("core_utils", function () { expect(cssFontInfo.italicAngle).toEqual("2.718"); }); }); + + describe("isAscii", function () { + it("handles ascii/non-ascii strings", function () { + expect(isAscii("hello world")).toEqual(true); + expect(isAscii("こんにちは世界の")).toEqual(false); + expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual( + false + ); + }); + }); + + describe("stringToUTF16HexString", function () { + it("should encode a string in UTF16 hexadecimal format", function () { + expect(stringToUTF16HexString("hello world")).toEqual( + "00680065006c006c006f00200077006f0072006c0064" + ); + + expect(stringToUTF16HexString("こんにちは世界の")).toEqual( + "30533093306b3061306f4e16754c306e" + ); + }); + }); + + describe("stringToUTF16String", function () { + it("should encode a string in UTF16", function () { + expect(stringToUTF16String("hello world")).toEqual( + "\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d" + ); + + expect(stringToUTF16String("こんにちは世界の")).toEqual( + "\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e" + ); + }); + + it("should encode a string in UTF16BE with a BOM", function () { + expect( + stringToUTF16String("hello world", /* bigEndian = */ true) + ).toEqual("\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"); + + expect( + stringToUTF16String("こんにちは世界の", /* bigEndian = */ true) + ).toEqual( + "\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e" + ); + }); + }); }); diff --git a/test/unit/util_spec.js b/test/unit/util_spec.js index 39874013ec9b9..43ee828836857 100644 --- a/test/unit/util_spec.js +++ b/test/unit/util_spec.js @@ -17,14 +17,11 @@ import { bytesToString, createPromiseCapability, createValidAbsoluteUrl, - escapeString, getModificationDate, isArrayBuffer, - isAscii, string32, stringToBytes, stringToPDFString, - stringToUTF16BEString, } from "../../src/shared/util.js"; describe("util", function () { @@ -246,40 +243,10 @@ describe("util", function () { }); }); - describe("escapeString", function () { - it("should escape (, ), \\n, \\r, and \\", function () { - expect(escapeString("((a\\a))\n(b(b\\b)\rb)")).toEqual( - "\\(\\(a\\\\a\\)\\)\\n\\(b\\(b\\\\b\\)\\rb\\)" - ); - }); - }); - describe("getModificationDate", function () { it("should get a correctly formatted date", function () { const date = new Date(Date.UTC(3141, 5, 9, 2, 6, 53)); expect(getModificationDate(date)).toEqual("31410609020653"); }); }); - - describe("isAscii", function () { - it("handles ascii/non-ascii strings", function () { - expect(isAscii("hello world")).toEqual(true); - expect(isAscii("こんにちは世界の")).toEqual(false); - expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual( - false - ); - }); - }); - - describe("stringToUTF16BEString", function () { - it("should encode a string in UTF16BE with a BOM", function () { - expect(stringToUTF16BEString("hello world")).toEqual( - "\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d" - ); - expect(stringToUTF16BEString("こんにちは世界の")).toEqual( - "\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61" + - "\x30\x6f\x4e\x16\x75\x4c\x30\x6e" - ); - }); - }); });