Skip to content

Commit

Permalink
Print correctly documents containing chars with an unicode greater th…
Browse files Browse the repository at this point in the history
…an 0xFFFF (bug 1669097)
  • Loading branch information
calixteman committed Jan 22, 2024
1 parent d549c2e commit 06601fd
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 34 deletions.
2 changes: 1 addition & 1 deletion src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -3826,7 +3826,7 @@ class FreeTextAnnotation extends MarkupAnnotation {
fontColor,
strokeAlpha
);
this._streams.push(this.appearance, FakeUnicodeFont.toUnicodeStream);
this._streams.push(this.appearance);
} else {
warn(
"FreeTextAnnotation: OffscreenCanvas is not supported, annotation may not render correctly."
Expand Down
12 changes: 12 additions & 0 deletions src/core/core_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,17 @@ const XMLEntities = {
/* ' */ 0x27: "'",
};

function* codePointIter(str) {
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.codePointAt(i);
if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
// char is represented by two u16
i++;
}
yield char;
}
}

function encodeToXmlString(str) {
const buffer = [];
let start = 0;
Expand Down Expand Up @@ -602,6 +613,7 @@ function getRotationMatrix(rotation, width, height) {

export {
arrayBuffersToBytes,
codePointIter,
collectActions,
encodeToXmlString,
escapePDFName,
Expand Down
38 changes: 5 additions & 33 deletions src/core/default_appearance.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@
* limitations under the License.
*/

import { Dict, Name } from "./primitives.js";
import {
codePointIter,
escapePDFName,
getRotationMatrix,
numberToString,
stringToUTF16HexString,
} from "./core_utils.js";
import { Dict, Name } from "./primitives.js";
import {
LINE_DESCENT_FACTOR,
LINE_FACTOR,
Expand Down Expand Up @@ -251,35 +252,6 @@ class FakeUnicodeFont {
);
}

get toUnicodeRef() {
if (!FakeUnicodeFont._toUnicodeRef) {
const toUnicode = `/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (UCS) /Supplement 0 >> def
/CMapName /Adobe-Identity-UCS def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
1 beginbfrange
<0000> <FFFF> <0000>
endbfrange
endcmap CMapName currentdict /CMap defineresource pop end end`;
const toUnicodeStream = (FakeUnicodeFont.toUnicodeStream =
new StringStream(toUnicode));
const toUnicodeDict = new Dict(this.xref);
toUnicodeStream.dict = toUnicodeDict;
toUnicodeDict.set("Length", toUnicode.length);
FakeUnicodeFont._toUnicodeRef =
this.xref.getNewPersistentRef(toUnicodeStream);
}

return FakeUnicodeFont._toUnicodeRef;
}

get fontDescriptorRef() {
if (!FakeUnicodeFont._fontDescriptorRef) {
const fontDescriptor = new Dict(this.xref);
Expand Down Expand Up @@ -350,7 +322,7 @@ endcmap CMapName currentdict /CMap defineresource pop end end`;
baseFont.set("Subtype", Name.get("Type0"));
baseFont.set("Encoding", Name.get("Identity-H"));
baseFont.set("DescendantFonts", [this.descendantFontRef]);
baseFont.set("ToUnicode", this.toUnicodeRef);
baseFont.set("ToUnicode", Name.get("Identity-H"));

return this.xref.getNewPersistentRef(baseFont);
}
Expand Down Expand Up @@ -420,8 +392,8 @@ endcmap CMapName currentdict /CMap defineresource pop end end`;
// languages, like arabic, it'd be wrong because of ligatures.
const lineWidth = ctx.measureText(line).width;
maxWidth = Math.max(maxWidth, lineWidth);
for (const char of line.split("")) {
const code = char.charCodeAt(0);
for (const code of codePointIter(line)) {
const char = String.fromCodePoint(code);
let width = this.widths.get(code);
if (width === undefined) {
const metrics = ctx.measureText(char);
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -626,3 +626,4 @@
!file_pdfjs_form.pdf
!issue17492.pdf
!issue17540.pdf
!bug1669097.pdf
Binary file added test/pdfs/bug1669097.pdf
Binary file not shown.
25 changes: 25 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -9643,5 +9643,30 @@
"structTreeParentId": null
}
}
},
{
"id": "bug1669097",
"file": "pdfs/bug1669097.pdf",
"md5": "561b3abac1fe49e1c9cd265cbf4a456e",
"rounds": 1,
"type": "eq",
"save": true,
"print": true,
"annotationStorage": {
"24R": {
"value": "😇👽🖖"
},
"pdfjs_internal_editor_0": {
"annotationType": 3,
"color": [0, 0, 0],
"fontSize": 10,
"value": "😇😇😇😇😇😇👽👽👽👽👽👽🖖",
"pageIndex": 0,
"rect": [267, 638, 452, 658],
"rotation": 0,
"structTreeParentId": null,
"id": null
}
}
}
]

0 comments on commit 06601fd

Please sign in to comment.