Skip to content

Commit

Permalink
Merge pull request #17986 from calixteman/fix_struct_tree
Browse files Browse the repository at this point in the history
Allow to insert several annotations under the same parent in the structure tree
  • Loading branch information
calixteman authored Apr 24, 2024
2 parents 885dd72 + 45fa867 commit d1f494d
Show file tree
Hide file tree
Showing 6 changed files with 242 additions and 111 deletions.
6 changes: 6 additions & 0 deletions src/core/primitives.js
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,12 @@ class RefSetCache {
clear() {
this._map.clear();
}

*items() {
for (const [ref, value] of this._map) {
yield [Ref.fromString(ref), value];
}
}
}

function isName(v, name) {
Expand Down
208 changes: 98 additions & 110 deletions src/core/struct_tree.js
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,19 @@ class StructTreeRoot {
newRefs,
}) {
const root = pdfManager.catalog.cloneDict();
const cache = new RefSetCache();
cache.put(catalogRef, root);

const structTreeRootRef = xref.getNewTemporaryRef();
root.set("StructTreeRoot", structTreeRootRef);

const buffer = [];
await writeObject(catalogRef, root, buffer, xref);
newRefs.push({ ref: catalogRef, data: buffer.join("") });

const structTreeRoot = new Dict(xref);
structTreeRoot.set("Type", Name.get("StructTreeRoot"));
const parentTreeRef = xref.getNewTemporaryRef();
structTreeRoot.set("ParentTree", parentTreeRef);
const kids = [];
structTreeRoot.set("K", kids);
cache.put(structTreeRootRef, structTreeRoot);

const parentTree = new Dict(xref);
const nums = [];
Expand All @@ -144,18 +144,18 @@ class StructTreeRoot {
nums,
xref,
pdfManager,
newRefs,
buffer,
cache,
});
structTreeRoot.set("ParentTreeNextKey", nextKey);

buffer.length = 0;
await writeObject(parentTreeRef, parentTree, buffer, xref);
newRefs.push({ ref: parentTreeRef, data: buffer.join("") });
cache.put(parentTreeRef, parentTree);

buffer.length = 0;
await writeObject(structTreeRootRef, structTreeRoot, buffer, xref);
newRefs.push({ ref: structTreeRootRef, data: buffer.join("") });
const buffer = [];
for (const [ref, obj] of cache.items()) {
buffer.length = 0;
await writeObject(ref, obj, buffer, xref);
newRefs.push({ ref, data: buffer.join("") });
}
}

async canUpdateStructTree({ pdfManager, xref, newAnnotationsByPage }) {
Expand Down Expand Up @@ -232,6 +232,8 @@ class StructTreeRoot {
const xref = this.dict.xref;
const structTreeRoot = this.dict.clone();
const structTreeRootRef = this.ref;
const cache = new RefSetCache();
cache.put(structTreeRootRef, structTreeRoot);

let parentTreeRef = structTreeRoot.getRaw("ParentTree");
let parentTree;
Expand All @@ -243,6 +245,7 @@ class StructTreeRoot {
structTreeRoot.set("ParentTree", parentTreeRef);
}
parentTree = parentTree.clone();
cache.put(parentTreeRef, parentTree);

let nums = parentTree.getRaw("Nums");
let numsRef = null;
Expand All @@ -255,47 +258,27 @@ class StructTreeRoot {
parentTree.set("Nums", nums);
}

let kids = structTreeRoot.getRaw("K");
let kidsRef = null;
if (kids instanceof Ref) {
kidsRef = kids;
kids = xref.fetch(kidsRef);
} else {
kidsRef = xref.getNewTemporaryRef();
structTreeRoot.set("K", kidsRef);
}
kids = Array.isArray(kids) ? kids.slice() : [kids];

const buffer = [];
const newNextkey = await StructTreeRoot.#writeKids({
newAnnotationsByPage,
structTreeRootRef,
kids,
kids: null,
nums,
xref,
pdfManager,
newRefs,
buffer,
cache,
});
structTreeRoot.set("ParentTreeNextKey", newNextkey);

buffer.length = 0;
await writeObject(kidsRef, kids, buffer, xref);
newRefs.push({ ref: kidsRef, data: buffer.join("") });

if (numsRef) {
buffer.length = 0;
await writeObject(numsRef, nums, buffer, xref);
newRefs.push({ ref: numsRef, data: buffer.join("") });
cache.put(numsRef, nums);
}

buffer.length = 0;
await writeObject(parentTreeRef, parentTree, buffer, xref);
newRefs.push({ ref: parentTreeRef, data: buffer.join("") });

buffer.length = 0;
await writeObject(structTreeRootRef, structTreeRoot, buffer, xref);
newRefs.push({ ref: structTreeRootRef, data: buffer.join("") });
const buffer = [];
for (const [ref, obj] of cache.items()) {
buffer.length = 0;
await writeObject(ref, obj, buffer, xref);
newRefs.push({ ref, data: buffer.join("") });
}
}

static async #writeKids({
Expand All @@ -305,8 +288,7 @@ class StructTreeRoot {
nums,
xref,
pdfManager,
newRefs,
buffer,
cache,
}) {
const objr = Name.get("OBJR");
let nextKey = -Infinity;
Expand Down Expand Up @@ -349,19 +331,15 @@ class StructTreeRoot {
tagDict.set("ActualText", actualText);
}

if (structTreeParent) {
await this.#updateParentTag({
structTreeParent,
tagDict,
newTagRef: tagRef,
fallbackRef: structTreeRootRef,
xref,
newRefs,
buffer,
});
} else {
tagDict.set("P", structTreeRootRef);
}
await this.#updateParentTag({
structTreeParent,
tagDict,
newTagRef: tagRef,
structTreeRootRef,
fallbackKids: kids,
xref,
cache,
});

const objDict = new Dict(xref);
tagDict.set("K", objDict);
Expand All @@ -372,23 +350,24 @@ class StructTreeRoot {
}
objDict.set("Obj", ref);

buffer.length = 0;
await writeObject(tagRef, tagDict, buffer, xref);
newRefs.push({ ref: tagRef, data: buffer.join("") });

cache.put(tagRef, tagDict);
nums.push(parentTreeId, tagRef);
kids.push(tagRef);
}
}
return nextKey + 1;
}

static #collectParents({ elements, xref, pageDict, numberTree }) {
const idToElement = new Map();
const idToElements = new Map();
for (const element of elements) {
if (element.structTreeParentId) {
const id = parseInt(element.structTreeParentId.split("_mc")[1], 10);
idToElement.set(id, element);
let elems = idToElements.get(id);
if (!elems) {
elems = [];
idToElements.set(id, elems);
}
elems.push(element);
}
}

Expand All @@ -400,13 +379,16 @@ class StructTreeRoot {
const parentArray = numberTree.get(id);

const updateElement = (kid, pageKid, kidRef) => {
const element = idToElement.get(kid);
if (element) {
const elems = idToElements.get(kid);
if (elems) {
const parentRef = pageKid.getRaw("P");
const parentDict = xref.fetchIfRef(parentRef);
if (parentRef instanceof Ref && parentDict instanceof Dict) {
// It should always the case, but we check just in case.
element.structTreeParent = { ref: kidRef, dict: pageKid };
const params = { ref: kidRef, dict: pageKid };
for (const element of elems) {
element.structTreeParent = params;
}
}
return true;
}
Expand All @@ -431,67 +413,73 @@ class StructTreeRoot {
if (Number.isInteger(kid) && updateElement(kid, pageKid, kidRef)) {
break;
}
if (!(kid instanceof Dict)) {
continue;
}
if (!isName(kid.get("Type"), "MCR")) {
break;
}
const mcid = kid.get("MCID");
if (Number.isInteger(mcid) && updateElement(mcid, pageKid, kidRef)) {
break;
}
}
}
}

static async #updateParentTag({
structTreeParent: { ref, dict },
structTreeParent,
tagDict,
newTagRef,
fallbackRef,
structTreeRootRef,
fallbackKids,
xref,
newRefs,
buffer,
cache,
}) {
// We get the parent of the tag.
const parentRef = dict.getRaw("P");
let parentDict = xref.fetchIfRef(parentRef);

tagDict.set("P", parentRef);
let ref = null;
let parentRef;
if (structTreeParent) {
({ ref } = structTreeParent);

// We get the kids in order to insert a new tag at the right position.
let saveParentDict = false;
let parentKids;
let parentKidsRef = parentDict.getRaw("K");
if (!(parentKidsRef instanceof Ref)) {
parentKids = parentKidsRef;
parentKidsRef = xref.getNewTemporaryRef();
parentDict = parentDict.clone();
parentDict.set("K", parentKidsRef);
saveParentDict = true;
// We get the parent of the tag.
parentRef = structTreeParent.dict.getRaw("P") || structTreeRootRef;
} else {
parentKids = xref.fetch(parentKidsRef);
}

if (Array.isArray(parentKids)) {
const index = parentKids.indexOf(ref);
if (index >= 0) {
parentKids = parentKids.slice();
parentKids.splice(index + 1, 0, newTagRef);
} else {
warn("Cannot update the struct tree: parent kid not found.");
tagDict.set("P", fallbackRef);
return;
}
} else if (parentKids instanceof Dict) {
parentKids = [parentKidsRef, newTagRef];
parentKidsRef = xref.getNewTemporaryRef();
parentDict.set("K", parentKidsRef);
saveParentDict = true;
parentRef = structTreeRootRef;
}

buffer.length = 0;
await writeObject(parentKidsRef, parentKids, buffer, xref);
newRefs.push({ ref: parentKidsRef, data: buffer.join("") });
tagDict.set("P", parentRef);

if (!saveParentDict) {
// We get the kids in order to insert a new tag at the right position.
const parentDict = xref.fetchIfRef(parentRef);
if (!parentDict) {
fallbackKids.push(newTagRef);
return;
}

buffer.length = 0;
await writeObject(parentRef, parentDict, buffer, xref);
newRefs.push({ ref: parentRef, data: buffer.join("") });
let cachedParentDict = cache.get(parentRef);
if (!cachedParentDict) {
cachedParentDict = parentDict.clone();
cache.put(parentRef, cachedParentDict);
}
const parentKidsRaw = cachedParentDict.getRaw("K");
let cachedParentKids =
parentKidsRaw instanceof Ref ? cache.get(parentKidsRaw) : null;
if (!cachedParentKids) {
cachedParentKids = xref.fetchIfRef(parentKidsRaw);
cachedParentKids = Array.isArray(cachedParentKids)
? cachedParentKids.slice()
: [parentKidsRaw];
const parentKidsRef = xref.getNewTemporaryRef();
cachedParentDict.set("K", parentKidsRef);
cache.put(parentKidsRef, cachedParentKids);
}

const index = cachedParentKids.indexOf(ref);
cachedParentKids.splice(
index >= 0 ? index + 1 : cachedParentKids.length,
0,
newTagRef
);
}
}

Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -645,3 +645,4 @@
!issue12213.pdf
!tracemonkey_freetext.pdf
!issue17998.pdf
!pdfjs_wikipedia.pdf
Binary file added test/pdfs/pdfjs_wikipedia.pdf
Binary file not shown.
Loading

0 comments on commit d1f494d

Please sign in to comment.