From 92cc69729c0bcc4bb65dc4c5a5e4276d3f669fe1 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 10 Sep 2024 11:34:55 +0200 Subject: [PATCH] Ensure that textLayers can be rendered in parallel, without interfering with each other Note that the textContent is returned in "chunks" from the API, through the use of `ReadableStream`s, and on the main-thread we're (normally) using just one temporary canvas in order to measure the size of the textLayer `span`s; see the [`#layout`](https://github.com/mozilla/pdf.js/blob/5b4c2fe1a845169ac2b4f8f6335337c434077637/src/display/text_layer.js#L396-L428) method. *Order of events, for parallel textLayer rendering:* 1. Call [`render`](https://github.com/mozilla/pdf.js/blob/5b4c2fe1a845169ac2b4f8f6335337c434077637/src/display/text_layer.js#L155-L177) of the textLayer for page A. 2. Immediately call `render` of the textLayer for page B. 3. The first text-chunk for pageA arrives, and it's parsed/layout which means updating the cached [fontSize/fontFamily](https://github.com/mozilla/pdf.js/blob/5b4c2fe1a845169ac2b4f8f6335337c434077637/src/display/text_layer.js#L409-L413) for the textLayer of page A. 4. The first text-chunk for pageB arrives, which means updating the cached fontSize/fontFamily *for the textLayer of page B* since this data is unique to each `TextLayer`-instance. 5. The second text-chunk for pageA arrives, and we don't update the canvas-font since the cached fontSize/fontFamily still apply from step 3 above. Where this potentially breaks down is between the last steps, since we're using just one temporary canvas for all measurements but have *individual* fontSize/fontFamily caches for each textLayer. Hence it's possible that the canvas-font has actually changed, despite the cached values suggesting otherwise, and to address this we instead cache the fontSize/fontFamily globally through a new (static) helper method. *Note:* Includes a basic unit-test, using dummy text-content, which fails on `master` and passes with this patch. Finally, pun intended, ensure that temporary textLayer-data is cleared *before* the `render`-promise resolves to avoid any intermittent problems in the unit-tests. --- src/display/text_layer.js | 37 ++++---- test/unit/text_layer_spec.js | 160 +++++++++++++++++++++++++++++++++++ 2 files changed, 178 insertions(+), 19 deletions(-) diff --git a/src/display/text_layer.js b/src/display/text_layer.js index bace7a87ea9995..7425747e5aec61 100644 --- a/src/display/text_layer.js +++ b/src/display/text_layer.js @@ -83,6 +83,8 @@ class TextLayer { static #canvasContexts = new Map(); + static #canvasCtxFonts = new WeakMap(); + static #minFontSize = null; static #pendingTextLayers = new Set(); @@ -111,8 +113,6 @@ class TextLayer { this.#scale = viewport.scale * (globalThis.devicePixelRatio || 1); this.#rotation = viewport.rotation; this.#layoutTextParams = { - prevFontSize: null, - prevFontFamily: null, div: null, properties: null, ctx: null, @@ -128,13 +128,13 @@ class TextLayer { // Always clean-up the temporary canvas once rendering is no longer pending. this.#capability.promise - .catch(() => { - // Avoid "Uncaught promise" messages in the console. - }) - .then(() => { + .finally(() => { TextLayer.#pendingTextLayers.delete(this); this.#layoutTextParams = null; this.#styleCache = null; + }) + .catch(() => { + // Avoid "Uncaught promise" messages in the console. }); if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { @@ -195,8 +195,6 @@ class TextLayer { onBefore?.(); this.#scale = scale; const params = { - prevFontSize: null, - prevFontFamily: null, div: null, properties: null, ctx: TextLayer.#getCtx(this.#lang), @@ -394,7 +392,7 @@ class TextLayer { } #layout(params) { - const { div, properties, ctx, prevFontSize, prevFontFamily } = params; + const { div, properties, ctx } = params; const { style } = div; let transform = ""; @@ -406,12 +404,7 @@ class TextLayer { const { fontFamily } = style; const { canvasWidth, fontSize } = properties; - if (prevFontSize !== fontSize || prevFontFamily !== fontFamily) { - ctx.font = `${fontSize * this.#scale}px ${fontFamily}`; - params.prevFontSize = fontSize; - params.prevFontFamily = fontFamily; - } - + TextLayer.#ensureCtxFont(ctx, fontSize * this.#scale, fontFamily); // Only measure the width for multi-char text divs, see `appendText`. const { width } = ctx.measureText(div.textContent); @@ -469,6 +462,15 @@ class TextLayer { return canvasContext; } + static #ensureCtxFont(ctx, size, family) { + const cached = this.#canvasCtxFonts.get(ctx); + if (cached && size === cached.size && family === cached.family) { + return; // The font is already set. + } + ctx.font = `${size}px ${family}`; + this.#canvasCtxFonts.set(ctx, { size, family }); + } + /** * Compute the minimum font size enforced by the browser. */ @@ -497,9 +499,8 @@ class TextLayer { } const ctx = this.#getCtx(lang); - const savedFont = ctx.font; ctx.canvas.width = ctx.canvas.height = DEFAULT_FONT_SIZE; - ctx.font = `${DEFAULT_FONT_SIZE}px ${fontFamily}`; + this.#ensureCtxFont(ctx, DEFAULT_FONT_SIZE, fontFamily); const metrics = ctx.measureText(""); // Both properties aren't available by default in Firefox. @@ -510,7 +511,6 @@ class TextLayer { this.#ascentCache.set(fontFamily, ratio); ctx.canvas.width = ctx.canvas.height = 0; - ctx.font = savedFont; return ratio; } @@ -550,7 +550,6 @@ class TextLayer { } ctx.canvas.width = ctx.canvas.height = 0; - ctx.font = savedFont; const ratio = ascent ? ascent / (ascent + descent) : DEFAULT_FONT_ASCENT; this.#ascentCache.set(fontFamily, ratio); diff --git a/test/unit/text_layer_spec.js b/test/unit/text_layer_spec.js index 5b0b8a1df22bf3..644e7424581292 100644 --- a/test/unit/text_layer_spec.js +++ b/test/unit/text_layer_spec.js @@ -90,4 +90,164 @@ describe("textLayer", function () { await loadingTask.destroy(); }); + + it("creates textLayers in parallel, from ReadableStream", async function () { + if (isNodeJS) { + pending("document.createElement is not supported in Node.js."); + } + if (typeof ReadableStream.from !== "function") { + pending("ReadableStream.from is not supported."); + } + const getTransform = container => { + const transform = []; + + for (const span of container.childNodes) { + const t = span.style.transform; + expect(t).toMatch(/^scaleX\([\d.]+\)$/); + + transform.push(t); + } + return transform; + }; + + const loadingTask = getDocument(buildGetDocumentParams("basicapi.pdf")); + const pdfDocument = await loadingTask.promise; + const [page1, page2] = await Promise.all([ + pdfDocument.getPage(1), + pdfDocument.getPage(2), + ]); + + // Create text-content streams with dummy content. + const items1 = [ + { + str: "Chapter A", + dir: "ltr", + width: 100, + height: 20, + transform: [20, 0, 0, 20, 45, 744], + fontName: "g_d0_f1", + hasEOL: false, + }, + { + str: "page 1", + dir: "ltr", + width: 50, + height: 20, + transform: [20, 0, 0, 20, 45, 744], + fontName: "g_d0_f1", + hasEOL: false, + }, + ]; + const items2 = [ + { + str: "Chapter B", + dir: "ltr", + width: 120, + height: 10, + transform: [10, 0, 0, 10, 492, 16], + fontName: "g_d0_f2", + hasEOL: false, + }, + { + str: "page 2", + dir: "ltr", + width: 60, + height: 10, + transform: [10, 0, 0, 10, 492, 16], + fontName: "g_d0_f2", + hasEOL: false, + }, + ]; + + const styles = { + g_d0_f1: { + ascent: 0.75, + descent: -0.25, + fontFamily: "serif", + vertical: false, + }, + g_d0_f2: { + ascent: 0.5, + descent: -0.5, + fontFamily: "sans-serif", + vertical: false, + }, + }; + const lang = "en"; + + // Render the textLayers serially, to have something to compare against. + const serialContainer1 = document.createElement("div"), + serialContainer2 = document.createElement("div"); + + const serialTextLayer1 = new TextLayer({ + textContentSource: { items: items1, styles, lang }, + container: serialContainer1, + viewport: page1.getViewport({ scale: 1 }), + }); + await serialTextLayer1.render(); + + const serialTextLayer2 = new TextLayer({ + textContentSource: { items: items2, styles, lang }, + container: serialContainer2, + viewport: page2.getViewport({ scale: 1 }), + }); + await serialTextLayer2.render(); + + const serialTransform1 = getTransform(serialContainer1), + serialTransform2 = getTransform(serialContainer2); + + expect(serialTransform1.length).toEqual(2); + expect(serialTransform2.length).toEqual(2); + + // Reset any global textLayer-state before rendering in parallel. + TextLayer.cleanup(); + + const container1 = document.createElement("div"), + container2 = document.createElement("div"); + const waitCapability1 = Promise.withResolvers(); + + const streamGenerator1 = (async function* () { + for (const item of items1) { + yield { items: [item], styles, lang }; + await waitCapability1.promise; + } + })(); + const streamGenerator2 = (async function* () { + for (const item of items2) { + yield { items: [item], styles, lang }; + } + })(); + + const textLayer1 = new TextLayer({ + textContentSource: ReadableStream.from(streamGenerator1), + container: container1, + viewport: page1.getViewport({ scale: 1 }), + }); + const textLayer1Promise = textLayer1.render(); + + const textLayer2 = new TextLayer({ + textContentSource: ReadableStream.from(streamGenerator2), + container: container2, + viewport: page2.getViewport({ scale: 1 }), + }); + await textLayer2.render(); + + // Ensure that the first textLayer has its rendering "paused" while + // the second textLayer renders. + waitCapability1.resolve(); + await textLayer1Promise; + + // Sanity check to make sure that all text was parsed. + expect(textLayer1.textContentItemsStr).toEqual(["Chapter A", "page 1"]); + expect(textLayer2.textContentItemsStr).toEqual(["Chapter B", "page 2"]); + + // Ensure that the transforms are identical when parsing in series/parallel. + const transform1 = getTransform(container1), + transform2 = getTransform(container2); + + expect(transform1).toEqual(serialTransform1); + expect(transform2).toEqual(serialTransform2); + + await loadingTask.destroy(); + }); });