Skip to content

Commit

Permalink
Merge pull request #15701 from Snuffleupagus/move-string-helpers
Browse files Browse the repository at this point in the history
Move some string helper functions to the worker-thread
  • Loading branch information
timvandermeij authored Nov 19, 2022
2 parents 3d49459 + 7d029f8 commit d6908ee
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 75 deletions.
20 changes: 14 additions & 6 deletions src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,25 @@ import {
AnnotationType,
assert,
BASELINE_FACTOR,
escapeString,
FeatureTest,
getModificationDate,
IDENTITY_MATRIX,
isAscii,
LINE_DESCENT_FACTOR,
LINE_FACTOR,
OPS,
RenderingIntentFlag,
shadow,
stringToPDFString,
stringToUTF16BEString,
unreachable,
Util,
warn,
} from "../shared/util.js";
import {
collectActions,
escapeString,
getInheritableProperty,
getRotationMatrix,
isAscii,
numberToString,
stringToUTF16String,
} from "./core_utils.js";
Expand Down Expand Up @@ -1879,7 +1878,11 @@ class WidgetAnnotation extends Annotation {
value,
};

const encoder = val => (isAscii(val) ? val : stringToUTF16BEString(val));
const encoder = val => {
return isAscii(val)
? val
: stringToUTF16String(val, /* bigEndian = */ true);
};
dict.set("V", Array.isArray(value) ? value.map(encoder) : encoder(value));

const maybeMK = this._getMKDict(rotation);
Expand Down Expand Up @@ -3546,14 +3549,19 @@ class FreeTextAnnotation extends MarkupAnnotation {
freetext.set("DA", da);
freetext.set(
"Contents",
isAscii(value) ? value : stringToUTF16BEString(value)
isAscii(value)
? value
: stringToUTF16String(value, /* bigEndian = */ true)
);
freetext.set("F", 4);
freetext.set("Border", [0, 0, 0]);
freetext.set("Rotate", rotation);

if (user) {
freetext.set("T", isAscii(user) ? user : stringToUTF16BEString(user));
freetext.set(
"T",
isAscii(user) ? user : stringToUTF16String(user, /* bigEndian = */ true)
);
}

if (apRef || ap) {
Expand Down
24 changes: 23 additions & 1 deletion src/core/core_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,19 @@ function escapePDFName(str) {
return buffer.join("");
}

// Replace "(", ")", "\n", "\r" and "\" by "\(", "\)", "\\n", "\\r" and "\\"
// in order to write it in a PDF file.
function escapeString(str) {
return str.replace(/([()\\\n\r])/g, match => {
if (match === "\n") {
return "\\n";
} else if (match === "\r") {
return "\\r";
}
return `\\${match}`;
});
}

function _collectJS(entry, xref, list, parents) {
if (!entry) {
return;
Expand Down Expand Up @@ -572,6 +585,10 @@ function getNewAnnotationsMap(annotationStorage) {
return newAnnotationsByPage.size > 0 ? newAnnotationsByPage : null;
}

function isAscii(str) {
return /^[\x00-\x7F]*$/.test(str);
}

function stringToUTF16HexString(str) {
const buf = [];
for (let i = 0, ii = str.length; i < ii; i++) {
Expand All @@ -584,8 +601,11 @@ function stringToUTF16HexString(str) {
return buf.join("");
}

function stringToUTF16String(str) {
function stringToUTF16String(str, bigEndian = false) {
const buf = [];
if (bigEndian) {
buf.push("\xFE\xFF");
}
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.charCodeAt(i);
buf.push(
Expand Down Expand Up @@ -614,11 +634,13 @@ export {
DocStats,
encodeToXmlString,
escapePDFName,
escapeString,
getArrayLookupTableFactory,
getInheritableProperty,
getLookupTableFactory,
getNewAnnotationsMap,
getRotationMatrix,
isAscii,
isWhiteSpace,
log2,
MissingDataException,
Expand Down
9 changes: 7 additions & 2 deletions src/core/writer.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@
* limitations under the License.
*/

import { bytesToString, escapeString, warn } from "../shared/util.js";
import { bytesToString, warn } from "../shared/util.js";
import { Dict, Name, Ref } from "./primitives.js";
import { escapePDFName, numberToString, parseXFAPath } from "./core_utils.js";
import {
escapePDFName,
escapeString,
numberToString,
parseXFAPath,
} from "./core_utils.js";
import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./crypto.js";
Expand Down
33 changes: 0 additions & 33 deletions src/shared/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -1037,36 +1037,6 @@ function stringToPDFString(str) {
return strBuf.join("");
}

function escapeString(str) {
// replace "(", ")", "\n", "\r" and "\"
// by "\(", "\)", "\\n", "\\r" and "\\"
// in order to write it in a PDF file.
return str.replace(/([()\\\n\r])/g, match => {
if (match === "\n") {
return "\\n";
} else if (match === "\r") {
return "\\r";
}
return `\\${match}`;
});
}

function isAscii(str) {
return /^[\x00-\x7F]*$/.test(str);
}

function stringToUTF16BEString(str) {
const buf = ["\xFE\xFF"];
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.charCodeAt(i);
buf.push(
String.fromCharCode((char >> 8) & 0xff),
String.fromCharCode(char & 0xff)
);
}
return buf.join("");
}

function stringToUTF8String(str) {
return decodeURIComponent(escape(str));
}
Expand Down Expand Up @@ -1167,7 +1137,6 @@ export {
createPromiseCapability,
createValidAbsoluteUrl,
DocumentActionEventType,
escapeString,
FeatureTest,
FONT_IDENTITY_MATRIX,
FontType,
Expand All @@ -1180,7 +1149,6 @@ export {
InvalidPDFException,
isArrayBuffer,
isArrayEqual,
isAscii,
LINE_DESCENT_FACTOR,
LINE_FACTOR,
MissingPDFException,
Expand All @@ -1198,7 +1166,6 @@ export {
string32,
stringToBytes,
stringToPDFString,
stringToUTF16BEString,
stringToUTF8String,
TextRenderingMode,
UnexpectedResponseException,
Expand Down
58 changes: 58 additions & 0 deletions test/unit/core_utils_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,14 @@ import { Dict, Ref } from "../../src/core/primitives.js";
import {
encodeToXmlString,
escapePDFName,
escapeString,
getInheritableProperty,
isAscii,
isWhiteSpace,
log2,
parseXFAPath,
stringToUTF16HexString,
stringToUTF16String,
toRomanNumerals,
validateCSSFont,
} from "../../src/core/core_utils.js";
Expand Down Expand Up @@ -221,6 +225,14 @@ describe("core_utils", function () {
});
});

describe("escapeString", function () {
it("should escape (, ), \\n, \\r, and \\", function () {
expect(escapeString("((a\\a))\n(b(b\\b)\rb)")).toEqual(
"\\(\\(a\\\\a\\)\\)\\n\\(b\\(b\\\\b\\)\\rb\\)"
);
});
});

describe("encodeToXmlString", function () {
it("should get a correctly encoded string with some entities", function () {
const str = "\"\u0397ell😂' & <W😂rld>";
Expand Down Expand Up @@ -333,4 +345,50 @@ describe("core_utils", function () {
expect(cssFontInfo.italicAngle).toEqual("2.718");
});
});

describe("isAscii", function () {
it("handles ascii/non-ascii strings", function () {
expect(isAscii("hello world")).toEqual(true);
expect(isAscii("こんにちは世界の")).toEqual(false);
expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual(
false
);
});
});

describe("stringToUTF16HexString", function () {
it("should encode a string in UTF16 hexadecimal format", function () {
expect(stringToUTF16HexString("hello world")).toEqual(
"00680065006c006c006f00200077006f0072006c0064"
);

expect(stringToUTF16HexString("こんにちは世界の")).toEqual(
"30533093306b3061306f4e16754c306e"
);
});
});

describe("stringToUTF16String", function () {
it("should encode a string in UTF16", function () {
expect(stringToUTF16String("hello world")).toEqual(
"\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
);

expect(stringToUTF16String("こんにちは世界の")).toEqual(
"\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
);
});

it("should encode a string in UTF16BE with a BOM", function () {
expect(
stringToUTF16String("hello world", /* bigEndian = */ true)
).toEqual("\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d");

expect(
stringToUTF16String("こんにちは世界の", /* bigEndian = */ true)
).toEqual(
"\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
);
});
});
});
33 changes: 0 additions & 33 deletions test/unit/util_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,11 @@ import {
bytesToString,
createPromiseCapability,
createValidAbsoluteUrl,
escapeString,
getModificationDate,
isArrayBuffer,
isAscii,
string32,
stringToBytes,
stringToPDFString,
stringToUTF16BEString,
} from "../../src/shared/util.js";

describe("util", function () {
Expand Down Expand Up @@ -246,40 +243,10 @@ describe("util", function () {
});
});

describe("escapeString", function () {
it("should escape (, ), \\n, \\r, and \\", function () {
expect(escapeString("((a\\a))\n(b(b\\b)\rb)")).toEqual(
"\\(\\(a\\\\a\\)\\)\\n\\(b\\(b\\\\b\\)\\rb\\)"
);
});
});

describe("getModificationDate", function () {
it("should get a correctly formatted date", function () {
const date = new Date(Date.UTC(3141, 5, 9, 2, 6, 53));
expect(getModificationDate(date)).toEqual("31410609020653");
});
});

describe("isAscii", function () {
it("handles ascii/non-ascii strings", function () {
expect(isAscii("hello world")).toEqual(true);
expect(isAscii("こんにちは世界の")).toEqual(false);
expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual(
false
);
});
});

describe("stringToUTF16BEString", function () {
it("should encode a string in UTF16BE with a BOM", function () {
expect(stringToUTF16BEString("hello world")).toEqual(
"\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
);
expect(stringToUTF16BEString("こんにちは世界の")).toEqual(
"\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61" +
"\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
);
});
});
});

0 comments on commit d6908ee

Please sign in to comment.