From 789dc33cc3dfa0a539ce3beea263ab6ea3ceb904 Mon Sep 17 00:00:00 2001 From: Ronald Eddings Date: Sat, 20 Jun 2026 05:36:30 -0500 Subject: [PATCH] =?UTF-8?q?Release=200.18.3=20=E2=80=94=20native=20screens?= =?UTF-8?q?hot=20renderer=20+=20cross-platform=20OCR?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps every surface to 0.18.3. Two headline changes: the DOM-render screenshot path is now a dependency-free native renderer (html-to-image removed), and OCR is reworked to be native-first with a bundled cross-platform engine (ocrad.js removed, Tesseract.js added). Screenshots — native DOM renderer - The default DOM-render screenshot hung for the full CLI timeout on any backgrounded tab: html-to-image resolved its image load inside requestAnimationFrame, which Chrome suspends for hidden tabs, so the render never completed. Replaced the html-to-image dependency with a direct native renderer: getComputedStyle -> inline cssText, XMLSerializer -> , Image/decode, Canvas, and FileReader/fetch for embedding images, canvas snapshots, and background-image resources. - Works fully backgrounded with no focus, and is dramatically faster on large pages: inline subtrees are deep-cloned wholesale instead of having styles inlined per descendant, so SVG-heavy pages (e.g. a full Wikipedia article, ~11k px tall, ~2.6k inline icons) render in ~7s on a hidden tab instead of timing out. - A DOM-render timeout guard now fails fast with a clear error instead of a silent 45s hang. - html-to-image, its vendored runner, and its patches are removed entirely. OCR — native-first + cross-platform - `canvas ocr` now returns the canvas's native accessible text (aria-label / aria-labelledby / fallback subtree / figcaption) plus the page's semantic textbox model, instead of low-quality pixel OCR. ocrad.js — an unmaintained pixel-OCR blob — is removed. - New `interceptor ocr `: renders the target via the native path and OCRs it with a bundled Tesseract.js engine. Offline, cross-platform, no native bridge required; returns a deterministic text string with a confidence score. `canvas ocr` falls back to it for pixel-only canvases. The WASM core, worker, and English language data are bundled and loaded from extension-local URLs (works on any page CSP; wasm-unsafe-eval added to the extension CSP). Bump all surfaces to 0.18.3. Suite 484/0. Co-Authored-By: Claude Opus 4.8 --- .../references/command-catalog.md | 4 +- bun.lock | 29 +- cli/commands/screenshot.ts | 25 + cli/help.ts | 6 +- cli/index.ts | 2 +- cli/transport.ts | 5 +- cli/version.ts | 2 +- extension/dist-mv2/background-electron.js | 166 +++- extension/dist-mv2/content.js | 254 ++++-- extension/dist-mv2/manifest.json | 2 +- extension/dist-mv2/screenshot-runner.js | 855 ------------------ extension/manifest.json | 5 +- .../src/background/capabilities/canvas.ts | 124 ++- .../src/background/capabilities/screenshot.ts | 77 +- extension/src/background/router.ts | 2 +- extension/src/content/dom-screenshot.ts | 342 ++++--- extension/src/offscreen.ts | 77 +- extension/src/screenshot-runner.ts | 16 - extension/tesseract-assets/eng.traineddata.gz | Bin 0 -> 1984273 bytes package.json | 5 +- scripts/build.sh | 16 +- test/screenshot-minimized-preflight.test.ts | 45 +- 22 files changed, 841 insertions(+), 1218 deletions(-) delete mode 100644 extension/dist-mv2/screenshot-runner.js delete mode 100644 extension/src/screenshot-runner.ts create mode 100644 extension/tesseract-assets/eng.traineddata.gz diff --git a/.agents/skills/interceptor-browser/references/command-catalog.md b/.agents/skills/interceptor-browser/references/command-catalog.md index 87ffee9..061943c 100644 --- a/.agents/skills/interceptor-browser/references/command-catalog.md +++ b/.agents/skills/interceptor-browser/references/command-catalog.md @@ -114,9 +114,11 @@ Pixels only when observer data is insufficient: ```bash interceptor canvas read 1 [--format png] [--region 10,20,300,120] [--webgl] interceptor canvas diff 1 -interceptor canvas ocr 1 # Experimental — fallback only +interceptor canvas ocr 1 # Native canvas text: aria/fallback + semantic model (no pixel OCR) ``` +`canvas ocr` returns the canvas's *native* accessible text (aria-label / aria-labelledby / fallback subtree / figcaption) plus the page's semantic textbox model — no pixel OCR. For a canvas-rendered editor prefer `scene text`; for genuine pixel-only text use `interceptor macos vision text` (native macOS Vision OCR). + Canvas indexes are DOM canvas indexes. ## Scene (rich editors) diff --git a/bun.lock b/bun.lock index 0a52c14..b95ab97 100644 --- a/bun.lock +++ b/bun.lock @@ -5,14 +5,13 @@ "": { "name": "interceptor-browser", "dependencies": { - "html-to-image": "^1.11.13", + "tesseract.js": "^7.0.0", }, "devDependencies": { "@happy-dom/global-registrator": "^20.9.0", "@types/bun": "^1.3.14", "@types/chrome": "^0.1.43", "happy-dom": "^20.9.0", - "ocrad.js": "^0.0.1", "typescript": "5.9.3", }, }, @@ -36,22 +35,44 @@ "@types/ws": ["@types/ws@8.18.1", "", { "dependencies": { "@types/node": "*" } }, "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg=="], + "bmp-js": ["bmp-js@0.1.0", "", {}, "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw=="], + "bun-types": ["bun-types@1.3.14", "", { "dependencies": { "@types/node": "*" } }, "sha512-4N0ig0fEomHt5R0KCFWjovxow98rIoRwKolrYdCcknNwMekCXRnWEUvgu5soYV8QXtVsrUD8B95MBOZGPvr6KQ=="], "entities": ["entities@7.0.1", "", {}, "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA=="], "happy-dom": ["happy-dom@20.9.0", "", { "dependencies": { "@types/node": ">=20.0.0", "@types/whatwg-mimetype": "^3.0.2", "@types/ws": "^8.18.1", "entities": "^7.0.1", "whatwg-mimetype": "^3.0.0", "ws": "^8.18.3" } }, "sha512-GZZ9mKe8r646NUAf/zemnGbjYh4Bt8/MqASJY+pSm5ZDtc3YQox+4gsLI7yi1hba6o+eCsGxpHn5+iEVn31/FQ=="], - "html-to-image": ["html-to-image@1.11.13", "", {}, "sha512-cuOPoI7WApyhBElTTb9oqsawRvZ0rHhaHwghRLlTuffoD1B2aDemlCruLeZrUIIdvG7gs9xeELEPm6PhuASqrg=="], + "idb-keyval": ["idb-keyval@6.2.5", "", {}, "sha512-eKQkTnS0relYsSOYomx8ozIbmdsQCKUdhyuIaQ2DZgKuaxtyQQMkyD/wlnQN32pO3yutN1b1L8uqwcDKaJd7/Q=="], + + "is-url": ["is-url@1.2.4", "", {}, "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww=="], + + "node-fetch": ["node-fetch@2.7.0", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A=="], + + "opencollective-postinstall": ["opencollective-postinstall@2.0.3", "", { "bin": { "opencollective-postinstall": "index.js" } }, "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q=="], + + "regenerator-runtime": ["regenerator-runtime@0.13.11", "", {}, "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg=="], + + "tesseract.js": ["tesseract.js@7.0.0", "", { "dependencies": { "bmp-js": "^0.1.0", "idb-keyval": "^6.2.0", "is-url": "^1.2.4", "node-fetch": "^2.6.9", "opencollective-postinstall": "^2.0.3", "regenerator-runtime": "^0.13.3", "tesseract.js-core": "^7.0.0", "wasm-feature-detect": "^1.8.0", "zlibjs": "^0.3.1" } }, "sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA=="], - "ocrad.js": ["ocrad.js@0.0.1", "", {}, "sha512-4W8Kcf4ewFJUgEGIBH4FxEgusHPmk0dSzD+CBMaaZeOb1RPM1Rv0+cA177pHDlN2yK8Gelb4KTczxtbQmq+p/w=="], + "tesseract.js-core": ["tesseract.js-core@7.0.0", "", {}, "sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw=="], + + "tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="], "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], "undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="], + "wasm-feature-detect": ["wasm-feature-detect@1.8.0", "", {}, "sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ=="], + + "webidl-conversions": ["webidl-conversions@3.0.1", "", {}, "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="], + "whatwg-mimetype": ["whatwg-mimetype@3.0.0", "", {}, "sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q=="], + "whatwg-url": ["whatwg-url@5.0.0", "", { "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" } }, "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw=="], + "ws": ["ws@8.20.0", "", { "peerDependencies": { "bufferutil": "^4.0.1", "utf-8-validate": ">=5.0.2" }, "optionalPeers": ["bufferutil", "utf-8-validate"] }, "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA=="], + + "zlibjs": ["zlibjs@0.3.1", "", {}, "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w=="], } } diff --git a/cli/commands/screenshot.ts b/cli/commands/screenshot.ts index 4417e5d..80ba2f2 100644 --- a/cli/commands/screenshot.ts +++ b/cli/commands/screenshot.ts @@ -44,6 +44,31 @@ export function parseScreenshotCommand(filtered: string[]): Action { return ssAction } + case "ocr": { + // interceptor ocr "" | | --element N | --region X,Y,W,H + // Renders the target via the native screenshot path, then OCRs it with the + // bundled Tesseract engine (offline, cross-platform, no Mac, no agent). + const a: Action = { type: "ocr" } + const pos = filtered[1] + if (pos && !pos.startsWith("--")) { + if (/^e\d/.test(pos)) a.ref = pos + else a.selector = pos + } + if (filtered.includes("--selector")) a.selector = filtered[filtered.indexOf("--selector") + 1] + if (filtered.includes("--ref")) a.ref = filtered[filtered.indexOf("--ref") + 1] + if (filtered.includes("--element")) a.element = parseInt(filtered[filtered.indexOf("--element") + 1]) + if (filtered.includes("--region")) { + const rp = filtered[filtered.indexOf("--region") + 1].split(",").map(Number) + a.region = { x: rp[0], y: rp[1], width: rp[2], height: rp[3] } + } + if (filtered.includes("--scale")) a.scale = parseFloat(filtered[filtered.indexOf("--scale") + 1]) + if (filtered.includes("--target-max-long-edge")) { + const parsed = parseInt(filtered[filtered.indexOf("--target-max-long-edge") + 1]) + if (Number.isFinite(parsed) && parsed > 0) a.target_max_long_edge = parsed + } + return a + } + case "canvas": switch (filtered[1]) { case "list": diff --git a/cli/help.ts b/cli/help.ts index 4f5059b..aacb8af 100644 --- a/cli/help.ts +++ b/cli/help.ts @@ -125,6 +125,9 @@ Capture: interceptor screenshot --quality 80 Encode quality 0-100 (defaults: png 92, jpeg 92, webp 85) interceptor screenshot --target-max-long-edge 1568 Clamp output long edge in pixels (auto-resize at capture) interceptor screenshot --clip X,Y,W,H [deprecated alias for --region] + interceptor ocr "" OCR text from an element (bundled Tesseract — offline, cross-platform, no Mac) + interceptor ocr --region X,Y,W,H OCR a page region + interceptor ocr --element N OCR an element by ref interceptor eval Run JS in isolated world interceptor eval --main Run JS in page context @@ -179,8 +182,7 @@ Canvas: interceptor canvas objects --kind text Filter derived objects by kind interceptor canvas model Inspect host-state and app-model signals interceptor canvas routes Inspect candidate first-party canvas-related routes - interceptor canvas ocr N OCR text from canvas N - interceptor canvas ocr N --region X,Y,W,H OCR a canvas crop + interceptor canvas ocr N Native canvas text (aria/fallback + semantic model; no pixel OCR) interceptor canvas read N Read canvas as data URL interceptor canvas read N --format png PNG format interceptor canvas read N --region X,Y,W,H Read pixel region diff --git a/cli/index.ts b/cli/index.ts index 14dcb2e..ce22591 100644 --- a/cli/index.ts +++ b/cli/index.ts @@ -38,7 +38,7 @@ const ACTION_CMDS = new Set(["click", "type", "select", "focus", "blur", "hover" const NAV_CMDS = new Set(["navigate", "back", "forward", "scroll", "wait", "wait-stable", "wait_for"]) const TAB_CMDS = new Set(["tabs", "tab", "window", "frames", "session"]) const NET_CMDS = new Set(["network", "net", "headers"]) -const SS_CMDS = new Set(["screenshot", "canvas", "capture"]) +const SS_CMDS = new Set(["screenshot", "canvas", "capture", "ocr"]) const DATA_CMDS = new Set(["cookies", "storage", "history", "bookmarks", "downloads", "clear", "clipboard"]) const META_CMDS = new Set(["status", "reload", "meta", "links", "images", "forms", "info", "page_info", "query", "exists", "count", "table", "attr", "style", "events", "search", "notify", "sessions", "capabilities", "modals", "panels"]) const EVAL_CMDS = new Set(["eval"]) diff --git a/cli/transport.ts b/cli/transport.ts index 0bbcc96..07b00c7 100644 --- a/cli/transport.ts +++ b/cli/transport.ts @@ -27,9 +27,12 @@ const ACTION_TIMEOUT_OVERRIDES_MS: Record = { screenshot: 45_000, screenshot_background: 45_000, canvas_read: 45_000, - canvas_ocr: 45_000, + canvas_ocr: 60_000, canvas_diff: 45_000, capture_frame: 45_000, + // OCR: native capture + Tesseract. First call also lazy-loads the WASM core + + // language data, so allow generous headroom. + ocr: 60_000, } function pickTimeoutForAction(actionType: string): number { diff --git a/cli/version.ts b/cli/version.ts index 081c10f..e38f0cb 100644 --- a/cli/version.ts +++ b/cli/version.ts @@ -1,6 +1,6 @@ // Sentinel values used when running from source (`bun run cli`). // scripts/build.sh stamps real build values into this file just before // each `bun build --compile` and restores it afterwards via `git checkout`. -export const VERSION = "0.17.7" +export const VERSION = "0.18.3" export const BUILD_SHA = "dev" export const BUILD_DATE = "dev" diff --git a/extension/dist-mv2/background-electron.js b/extension/dist-mv2/background-electron.js index e25e0c2..ecf2a07 100644 --- a/extension/dist-mv2/background-electron.js +++ b/extension/dist-mv2/background-electron.js @@ -446,6 +446,7 @@ async function uninstallScreenshotCorsRule(tabId) { // extension/src/background/capabilities/screenshot.ts var CAPTURE_TIMEOUT_MS = 5000; +var DOM_RENDER_TIMEOUT_MS = 30000; var VISIBILITY_HINT = "Chrome/Brave window may not be visible — bring it to the front and retry, or pass --tab of a tab in a visible window."; class CaptureTimeoutError extends Error { @@ -539,18 +540,6 @@ function resolveDomMode(action) { return "region"; return "full"; } -async function injectScreenshotRunner(tabId) { - try { - await chrome.scripting.executeScript({ - target: { tabId }, - world: "ISOLATED", - files: ["screenshot-runner.js"] - }); - return { success: true }; - } catch (err) { - return { success: false, error: `failed to inject screenshot-runner.js: ${err.message}` }; - } -} async function reencodeAsWebP(dataUrl, qualityPct) { const res = await fetch(dataUrl); const blob = await res.blob(); @@ -596,9 +585,6 @@ async function handleDomRenderScreenshot(action, tabId) { } await installScreenshotCorsRule(tabId); try { - const inject = await injectScreenshotRunner(tabId); - if (!inject.success) - return { success: false, error: inject.error || "runner injection failed" }; const dsAction = { type: "dom_screenshot", mode, format: renderFormat, quality }; if (action.ref !== undefined) dsAction.ref = action.ref; @@ -612,7 +598,19 @@ async function handleDomRenderScreenshot(action, tabId) { dsAction.scale = scale; if (targetMaxLongEdge !== undefined) dsAction.target_max_long_edge = targetMaxLongEdge; - const renderResult = await sendToContentScript(tabId, dsAction); + let renderResult; + try { + renderResult = await withCaptureTimeout("dom-render", sendToContentScript(tabId, dsAction), DOM_RENDER_TIMEOUT_MS); + } catch (err) { + if (err instanceof CaptureTimeoutError) { + return { + success: false, + error: `DOM-render timed out after ${DOM_RENDER_TIMEOUT_MS}ms — the content script did not return image data. The render stalled (e.g. a resource never settled); retry, or use --pixel for a compositor capture.`, + data: { layer: "dom-render-timeout" } + }; + } + throw err; + } if (!renderResult || !renderResult.success || !renderResult.data) { return { success: false, error: renderResult?.error || "dom render returned no data" }; } @@ -860,10 +858,38 @@ async function transformPixelDataUrl(dataUrl, requestedFormat, quality, targetMa return { success: false, error: `transform failed: ${err.message}` }; } } +async function handleOcr(action, tabId) { + const shot = { type: "screenshot", format: "png", save: false }; + for (const k of ["selector", "element", "ref", "region", "clip", "scale", "target_max_long_edge"]) { + if (action[k] !== undefined) + shot[k] = action[k]; + } + const rendered = await handleDomRenderScreenshot(shot, tabId); + if (!rendered.success) + return rendered; + const dataUrl = rendered.data?.dataUrl; + if (!dataUrl) + return { success: false, error: "capture for OCR produced no image" }; + const ocr = await sendToOffscreen({ type: "ocr", dataUrl }); + if (!ocr.success) + return { success: false, error: ocr.error || "OCR failed" }; + return { + success: true, + data: { + text: (ocr.data?.text || "").trim(), + source: ocr.data?.source || "tesseract", + confidence: ocr.data?.confidence ?? null, + width: rendered.data?.width, + height: rendered.data?.height + } + }; +} async function handleScreenshotActions(action, tabId) { switch (action.type) { case "screenshot_background": return handleScreenshotBackground(action, tabId); + case "ocr": + return handleOcr(action, tabId); case "page_capture": { const mhtml = await chrome.pageCapture.saveAsMHTML({ tabId }); const text = await mhtml.text(); @@ -1050,6 +1076,50 @@ function hostCanvasSignals(limit = 20) { globals: candidateGlobalDetails }; } +function canvasAccessibleText(canvasIndex) { + const canvases = Array.from(document.querySelectorAll("canvas")); + const c = canvases[canvasIndex]; + if (!c) + return { found: false, error: "canvas index out of range", canvasCount: canvases.length }; + const norm = (s) => (s || "").replace(/\s+/g, " ").trim(); + const byIds = (ids) => !ids ? "" : ids.split(/\s+/).map((id) => norm(document.getElementById(id)?.textContent)).filter(Boolean).join(" "); + const sources = {}; + const ariaLabel = norm(c.getAttribute("aria-label")); + if (ariaLabel) + sources.ariaLabel = ariaLabel; + const ariaLabelledby = byIds(c.getAttribute("aria-labelledby")); + if (ariaLabelledby) + sources.ariaLabelledby = ariaLabelledby; + const fallback = norm(c.textContent); + if (fallback) + sources.fallback = fallback; + const fig = c.closest("figure"); + const figcaption = fig ? norm(fig.querySelector("figcaption")?.textContent) : ""; + if (figcaption) + sources.figcaption = figcaption; + const ariaDescribedby = byIds(c.getAttribute("aria-describedby")); + if (ariaDescribedby) + sources.ariaDescribedby = ariaDescribedby; + const title = norm(c.getAttribute("title")); + if (title) + sources.title = title; + const text = [ + sources.ariaLabel, + sources.ariaLabelledby, + sources.fallback, + sources.figcaption, + sources.ariaDescribedby, + sources.title + ].filter(Boolean).join(` +`); + return { + found: !!text, + text, + role: c.getAttribute("role") || "", + sources, + canvasCount: canvases.length + }; +} function canvasObserverSummary(limit = 100, kinds, canvasIndex) { function normalize(kind) { return String(kind || "").trim(); @@ -1303,39 +1373,47 @@ async function handleCanvasActions(action, tabId) { }; } case "canvas_ocr": { - const readResult = await handleCanvasActions({ - type: "canvas_read", - canvasIndex: action.canvasIndex, - region: action.region, - format: "png" - }, tabId); - if (!readResult.success) - return readResult; - const dataUrl = readResult.data.dataUrl; - if (!dataUrl) - return { success: false, error: "canvas OCR requires a readable canvas image" }; - const ocrResult = await sendToOffscreen({ - type: "ocr", - dataUrl - }); - if (!ocrResult.success) - return { success: false, error: ocrResult.error || "canvas OCR failed" }; + const a11y = await executeInMainWorld(tabId, canvasAccessibleText, [action.canvasIndex]); + if (a11y && a11y.error) { + return { success: false, error: a11y.error }; + } const hostSignals = await executeInMainWorld(tabId, hostCanvasSignals, [10]); const semanticText = hostSignals?.docs?.textbox?.exists ? hostSignals.docs.textbox.textSample || "" : ""; - const ocrText = ocrResult.data?.text || ""; - const normalizedOcr = ocrText.replace(/\s+/g, " ").trim(); - const normalizedSemantic = semanticText.replace(/\s+/g, " ").trim(); - const matchedSemantic = normalizedOcr && normalizedSemantic ? normalizedSemantic.includes(normalizedOcr) || normalizedOcr.includes(normalizedSemantic) : false; + const a11yText = a11y && a11y.found ? a11y.text || "" : ""; + let text = a11yText || semanticText; + let source = a11yText ? "accessibility" : semanticText ? "semantic-textbox" : "none"; + let confidence = null; + let ocrFallbackUsed = false; + if (!text) { + const readResult = await handleCanvasActions({ + type: "canvas_read", + canvasIndex: action.canvasIndex, + region: action.region, + format: "png" + }, tabId); + const dataUrl = readResult.success ? readResult.data.dataUrl : undefined; + if (dataUrl) { + const ocr = await sendToOffscreen({ type: "ocr", dataUrl }); + if (ocr.success && (ocr.data?.text || "").trim()) { + text = ocr.data.text.trim(); + source = "tesseract"; + confidence = ocr.data?.confidence ?? null; + ocrFallbackUsed = true; + } + } + } return { success: true, data: { - text: ocrText, - source: ocrResult.data?.source || "ocr", - confidence: ocrResult.data?.confidence ?? null, + text, + source, + confidence, diagnostics: { - pixelSource: "canvas_read", - strongerSourceAvailable: !!semanticText, - strongerSourceMatched: matchedSemantic + accessibilityText: !!a11yText, + accessibilitySources: a11y?.sources || null, + semanticTextboxAvailable: !!semanticText, + ocrFallbackUsed, + hint: text ? undefined : "No accessible/semantic text and pixel OCR found nothing. For a canvas-rendered editor use `interceptor scene text`." } } }; @@ -3304,7 +3382,7 @@ async function handleMonitorActions(action, tabId) { registerMonitorListeners(); restorePageCommCaptureConfig(); var OS_INPUT_ACTIONS = new Set(["os_click", "os_key", "os_type", "os_move"]); -var SCREENSHOT_ACTIONS = new Set(["screenshot", "screenshot_background", "page_capture"]); +var SCREENSHOT_ACTIONS = new Set(["screenshot", "screenshot_background", "page_capture", "ocr"]); var CAPTURE_STREAM_ACTIONS = new Set(["capture_start", "capture_frame", "capture_stop", "canvas_diff"]); var CANVAS_ACTIONS = new Set([ "canvas_list", diff --git a/extension/dist-mv2/content.js b/extension/dist-mv2/content.js index c2a20dd..5ef2a91 100644 --- a/extension/dist-mv2/content.js +++ b/extension/dist-mv2/content.js @@ -4114,8 +4114,156 @@ async function handleCanvasAction(action) { // extension/src/content/dom-screenshot.ts init_input_simulation(); -function getLibrary() { - return globalThis.__interceptor_h2i ?? null; +var TRANSPARENT_1PX = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGNgYGBgAAAABQABh6FO1AAAAABJRU5ErkJggg=="; +var SKIP_TAGS2 = new Set(["script", "noscript", "style", "link", "meta", "template", "iframe", "object", "embed"]); +function extractUrls(cssValue) { + const out = []; + const re = /url\((['"]?)([^'")]+)\1\)/g; + let m; + while ((m = re.exec(cssValue)) !== null) { + const u = (m[2] || "").trim(); + if (u && u.indexOf("data:") !== 0) + out.push(u); + } + return out; +} +async function fetchResourceAsDataUrl(url) { + try { + const res = await fetch(url, { mode: "cors", cache: "force-cache" }); + if (!res.ok) + return TRANSPARENT_1PX; + const blob = await res.blob(); + return await new Promise((resolve) => { + const reader = new FileReader; + reader.onloadend = () => resolve(typeof reader.result === "string" ? reader.result : TRANSPARENT_1PX); + reader.onerror = () => resolve(TRANSPARENT_1PX); + reader.readAsDataURL(blob); + }); + } catch { + return TRANSPARENT_1PX; + } +} +function loadSvgImage(svgDataUrl) { + return new Promise((resolve, reject) => { + const img = new Image; + img.onload = () => { + img.decode().then(() => resolve(img)).catch(() => resolve(img)); + }; + img.onerror = () => reject(new Error("foreignObject SVG failed to rasterize")); + img.src = svgDataUrl; + }); +} +function inlineComputedStyle(srcEl, cloneEl, collect) { + const cs = window.getComputedStyle(srcEl); + let cssText = ""; + for (let i = 0;i < cs.length; i++) { + const name = cs[i]; + let value = cs.getPropertyValue(name); + if (name === "font-size" && value.endsWith("px")) { + const n = parseFloat(value); + if (n > 0) + value = `${n - 0.1}px`; + } + cssText += `${name}:${value};`; + } + try { + cloneEl.style.cssText = cssText; + } catch {} + const bg = cs.getPropertyValue("background-image"); + if (bg && bg.indexOf("url(") !== -1) { + collect.bgJobs.push({ el: cloneEl, value: bg }); + for (const u of extractUrls(bg)) + collect.urls.add(u); + } +} +function buildStyledClone(src, collect) { + if (src.nodeType === Node.TEXT_NODE) + return src.cloneNode(false); + if (src.nodeType !== Node.ELEMENT_NODE) + return null; + const el = src; + const tag = (el.tagName || "").toLowerCase(); + if (SKIP_TAGS2.has(tag)) + return null; + if (tag === "canvas") { + try { + const dataUrl = el.toDataURL(); + const img = document.createElement("img"); + img.setAttribute("src", dataUrl); + inlineComputedStyle(el, img, collect); + return img; + } catch {} + } + if (tag === "svg") { + const c2 = el.cloneNode(true); + try { + inlineComputedStyle(el, c2, collect); + } catch {} + return c2; + } + const c = el.cloneNode(false); + if (c.style) + inlineComputedStyle(el, c, collect); + if (tag === "img") { + const im = el; + const url = im.currentSrc || im.src; + if (url && url.indexOf("data:") !== 0) { + c.removeAttribute("srcset"); + collect.imgJobs.push({ el: c, url }); + collect.urls.add(url); + } + } + const kids = el.childNodes; + for (let i = 0;i < kids.length; i++) { + const cc = buildStyledClone(kids[i], collect); + if (cc) + c.appendChild(cc); + } + return c; +} +async function nativeRenderToDataUrl(node, o) { + const collect = { imgJobs: [], bgJobs: [], urls: new Set }; + const clone = buildStyledClone(node, collect); + if (!clone) + throw new Error("nothing to render"); + const urlList = Array.from(collect.urls); + const dataUrlMap = new Map; + await Promise.all(urlList.map(async (u) => { + dataUrlMap.set(u, await fetchResourceAsDataUrl(u)); + })); + for (const job of collect.imgJobs) { + job.el.setAttribute("src", dataUrlMap.get(job.url) || TRANSPARENT_1PX); + } + for (const job of collect.bgJobs) { + let v = job.value; + for (const u of extractUrls(job.value)) { + const d = dataUrlMap.get(u); + if (d) + v = v.split(u).join(d); + } + try { + job.el.style.backgroundImage = v; + } catch {} + } + if (o.isFull) { + clone.style.width = `${o.width}px`; + clone.style.height = `${o.height}px`; + } else { + clone.style.margin = "0"; + } + clone.setAttribute("xmlns", "http://www.w3.org/1999/xhtml"); + const xml = new XMLSerializer().serializeToString(clone); + const svg = `` + `${xml}`; + const svgUrl = `data:image/svg+xml;charset=utf-8,${encodeURIComponent(svg)}`; + const img = await loadSvgImage(svgUrl); + const canvas = document.createElement("canvas"); + canvas.width = Math.max(1, Math.round(o.width * o.pixelRatio)); + canvas.height = Math.max(1, Math.round(o.height * o.pixelRatio)); + const ctx = canvas.getContext("2d"); + if (!ctx) + throw new Error("2d canvas context unavailable"); + ctx.drawImage(img, 0, 0, canvas.width, canvas.height); + return o.format === "jpeg" ? canvas.toDataURL("image/jpeg", o.quality) : canvas.toDataURL("image/png"); } async function cropDataUrl(dataUrl, x, y, w, h, format, quality) { return new Promise((resolve) => { @@ -4178,101 +4326,55 @@ function resolveTarget(action) { } } async function handleDomScreenshot(action) { - const lib = getLibrary(); - if (!lib) { - return { - success: false, - error: "html-to-image library not loaded into this frame — SW must inject screenshot-runner.js before dispatching dom_screenshot" - }; - } const { node, error } = resolveTarget(action); if (!node) return { success: false, error: error || "no target resolved" }; const format = action.format === "jpeg" ? "jpeg" : "png"; const qualityPct = typeof action.quality === "number" ? Math.max(1, Math.min(100, action.quality)) : 92; const basePixelRatio = typeof action.scale === "number" && action.scale > 0 ? action.scale : window.devicePixelRatio || 1; + const mode = action.mode || "full"; + const isFull = mode === "full" || mode === "region"; + let width; + let height; + if (isFull) { + width = Math.max(document.documentElement.scrollWidth, document.body?.scrollWidth || 0); + height = Math.max(document.documentElement.scrollHeight, document.body?.scrollHeight || 0); + } else { + const rect = node.getBoundingClientRect(); + width = Math.max(1, Math.ceil(rect.width)); + height = Math.max(1, Math.ceil(rect.height)); + } let pixelRatio = basePixelRatio; const target = typeof action.target_max_long_edge === "number" && action.target_max_long_edge > 0 ? action.target_max_long_edge : undefined; if (target !== undefined) { - const mode = action.mode || "full"; - let longEdgeCss; - if (mode === "full" || mode === "region") { - const docW = Math.max(document.documentElement.scrollWidth, document.body?.scrollWidth || 0); - const docH = Math.max(document.documentElement.scrollHeight, document.body?.scrollHeight || 0); - longEdgeCss = Math.max(docW, docH); - } else { - const rect = node.getBoundingClientRect(); - longEdgeCss = Math.max(rect.width, rect.height); - } + const longEdgeCss = Math.max(width, height); if (longEdgeCss > 0 && longEdgeCss * pixelRatio > target) { pixelRatio = Math.max(0.05, target / longEdgeCss); } } - const TRANSPARENT_1PX = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGNgYGBgAAAABQABh6FO1AAAAABJRU5ErkJggg=="; - const opts = { - cacheBust: true, - pixelRatio, - quality: qualityPct / 100, - skipFonts: true, - imagePlaceholder: TRANSPARENT_1PX, - fetchRequestInit: { mode: "cors", cache: "no-cache" } - }; - const renderWithOpts = async (effectiveOpts) => { - return format === "jpeg" ? await lib.toJpeg(node, effectiveOpts) : await lib.toPng(node, effectiveOpts); - }; - const isFull = (action.mode || "full") === "full" || action.mode === "region"; - if (isFull) { - opts.width = Math.max(document.documentElement.scrollWidth, document.body?.scrollWidth || 0); - opts.height = Math.max(document.documentElement.scrollHeight, document.body?.scrollHeight || 0); - opts.canvasWidth = opts.width * pixelRatio; - opts.canvasHeight = opts.height * pixelRatio; - } try { - let dataUrl; - try { - dataUrl = await renderWithOpts(opts); - } catch (err) { - const msg = err.message || String(err); - const isTaint = /taint|cross-origin|may not be exported/i.test(msg); - if (!isTaint) - throw err; - const filteredOpts = { - ...opts, - filter: (n) => { - if (!(n instanceof Element)) - return true; - const tag = n.tagName?.toLowerCase(); - return tag !== "img" && tag !== "picture" && tag !== "video" && tag !== "canvas"; - } - }; - dataUrl = await renderWithOpts(filteredOpts); - } - const rect = node.getBoundingClientRect(); - let outWidth = Math.round((isFull ? opts.width : rect.width) * pixelRatio); - let outHeight = Math.round((isFull ? opts.height : rect.height) * pixelRatio); - if (action.mode === "region" && action.region) { + let dataUrl = await nativeRenderToDataUrl(node, { + width, + height, + pixelRatio, + format, + quality: qualityPct / 100, + isFull + }); + let outWidth = Math.round(width * pixelRatio); + let outHeight = Math.round(height * pixelRatio); + if (mode === "region" && action.region) { const region = action.region; - const cropX = Math.round(region.x * pixelRatio); - const cropY = Math.round(region.y * pixelRatio); - const cropW = Math.round(region.width * pixelRatio); - const cropH = Math.round(region.height * pixelRatio); - const cropped = await cropDataUrl(dataUrl, cropX, cropY, cropW, cropH, format, qualityPct / 100); + const cropped = await cropDataUrl(dataUrl, Math.round(region.x * pixelRatio), Math.round(region.y * pixelRatio), Math.round(region.width * pixelRatio), Math.round(region.height * pixelRatio), format, qualityPct / 100); if (cropped) { dataUrl = cropped; - outWidth = cropW; - outHeight = cropH; + outWidth = Math.round(region.width * pixelRatio); + outHeight = Math.round(region.height * pixelRatio); } } return { success: true, - data: { - dataUrl, - format, - width: outWidth, - height: outHeight, - pixelRatio, - mode: action.mode || "full" - } + data: { dataUrl, format, width: outWidth, height: outHeight, pixelRatio, mode } }; } catch (err) { return { success: false, error: `dom render failed: ${err.message}` }; diff --git a/extension/dist-mv2/manifest.json b/extension/dist-mv2/manifest.json index d21ce1f..df0c0c8 100644 --- a/extension/dist-mv2/manifest.json +++ b/extension/dist-mv2/manifest.json @@ -1,7 +1,7 @@ { "manifest_version": 2, "name": "Interceptor Electron App Bridge", - "version": "0.17.7", + "version": "0.18.3", "description": "Electron app bridge", "key": "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAr8I3MwCNVrLcP0OCP8fRpiGWHIpbu2iMsyweU4YcX/CWdmO2fnZAJ6UP+IKkM5Zw9mt9kY4CFW4tZt096ChuMKiVBg4HM47ffWHTlo6rXOzdLuXMQ6MtFDuqbQuq6x0tLgYlOr7UxSiPVFgPRuczOz+kPkTO/h8nJNDaouNY1WnG4/ESruv10gwLwxgNKEvisnjxDU6lw9zDm/+pF8aAB8RMJbJQpRRBKDVL8rTRb4yCp6qi8E9VkJRK3sTD9sX0fgZoYd4pkvbK+7kPh9oxiuzPKNCKm9v8XTCVBh+sWBJBdke7PAoERkdXOs2MEijjO2A0X4/tzqygr3oARSghkQIDAQAB", "icons": { diff --git a/extension/dist-mv2/screenshot-runner.js b/extension/dist-mv2/screenshot-runner.js deleted file mode 100644 index e3f5163..0000000 --- a/extension/dist-mv2/screenshot-runner.js +++ /dev/null @@ -1,855 +0,0 @@ -var __defProp = Object.defineProperty; -var __returnValue = (v) => v; -function __exportSetter(name, newValue) { - this[name] = __returnValue.bind(null, newValue); -} -var __export = (target, all) => { - for (var name in all) - __defProp(target, name, { - get: all[name], - enumerable: true, - configurable: true, - set: __exportSetter.bind(all, name) - }); -}; - -// node_modules/html-to-image/es/index.js -var exports_es = {}; -__export(exports_es, { - toSvg: () => toSvg, - toPng: () => toPng, - toPixelData: () => toPixelData, - toJpeg: () => toJpeg, - toCanvas: () => toCanvas, - toBlob: () => toBlob, - getFontEmbedCSS: () => getFontEmbedCSS -}); - -// node_modules/html-to-image/es/util.js -function resolveUrl(url, baseUrl) { - if (url.match(/^[a-z]+:\/\//i)) { - return url; - } - if (url.match(/^\/\//)) { - return window.location.protocol + url; - } - if (url.match(/^[a-z]+:/i)) { - return url; - } - const doc = document.implementation.createHTMLDocument(); - const base = doc.createElement("base"); - const a = doc.createElement("a"); - doc.head.appendChild(base); - doc.body.appendChild(a); - if (baseUrl) { - base.href = baseUrl; - } - a.href = url; - return a.href; -} -var uuid = (() => { - let counter = 0; - const random = () => `0000${(Math.random() * 36 ** 4 << 0).toString(36)}`.slice(-4); - return () => { - counter += 1; - return `u${random()}${counter}`; - }; -})(); -function toArray(arrayLike) { - const arr = []; - for (let i = 0, l = arrayLike.length;i < l; i++) { - arr.push(arrayLike[i]); - } - return arr; -} -var styleProps = null; -function getStyleProperties(options = {}) { - if (styleProps) { - return styleProps; - } - if (options.includeStyleProperties) { - styleProps = options.includeStyleProperties; - return styleProps; - } - styleProps = toArray(window.getComputedStyle(document.documentElement)); - return styleProps; -} -function px(node, styleProperty) { - const win = node.ownerDocument.defaultView || window; - const val = win.getComputedStyle(node).getPropertyValue(styleProperty); - return val ? parseFloat(val.replace("px", "")) : 0; -} -function getNodeWidth(node) { - const leftBorder = px(node, "border-left-width"); - const rightBorder = px(node, "border-right-width"); - return node.clientWidth + leftBorder + rightBorder; -} -function getNodeHeight(node) { - const topBorder = px(node, "border-top-width"); - const bottomBorder = px(node, "border-bottom-width"); - return node.clientHeight + topBorder + bottomBorder; -} -function getImageSize(targetNode, options = {}) { - const width = options.width || getNodeWidth(targetNode); - const height = options.height || getNodeHeight(targetNode); - return { width, height }; -} -function getPixelRatio() { - let ratio; - let FINAL_PROCESS; - try { - FINAL_PROCESS = process; - } catch (e) {} - const val = FINAL_PROCESS && FINAL_PROCESS.env ? FINAL_PROCESS.env.devicePixelRatio : null; - if (val) { - ratio = parseInt(val, 10); - if (Number.isNaN(ratio)) { - ratio = 1; - } - } - return ratio || window.devicePixelRatio || 1; -} -var canvasDimensionLimit = 16384; -function checkCanvasDimensions(canvas) { - if (canvas.width > canvasDimensionLimit || canvas.height > canvasDimensionLimit) { - if (canvas.width > canvasDimensionLimit && canvas.height > canvasDimensionLimit) { - if (canvas.width > canvas.height) { - canvas.height *= canvasDimensionLimit / canvas.width; - canvas.width = canvasDimensionLimit; - } else { - canvas.width *= canvasDimensionLimit / canvas.height; - canvas.height = canvasDimensionLimit; - } - } else if (canvas.width > canvasDimensionLimit) { - canvas.height *= canvasDimensionLimit / canvas.width; - canvas.width = canvasDimensionLimit; - } else { - canvas.width *= canvasDimensionLimit / canvas.height; - canvas.height = canvasDimensionLimit; - } - } -} -function canvasToBlob(canvas, options = {}) { - if (canvas.toBlob) { - return new Promise((resolve) => { - canvas.toBlob(resolve, options.type ? options.type : "image/png", options.quality ? options.quality : 1); - }); - } - return new Promise((resolve) => { - const binaryString = window.atob(canvas.toDataURL(options.type ? options.type : undefined, options.quality ? options.quality : undefined).split(",")[1]); - const len = binaryString.length; - const binaryArray = new Uint8Array(len); - for (let i = 0;i < len; i += 1) { - binaryArray[i] = binaryString.charCodeAt(i); - } - resolve(new Blob([binaryArray], { - type: options.type ? options.type : "image/png" - })); - }); -} -function createImage(url) { - return new Promise((resolve, reject) => { - const img = new Image; - img.onload = () => { - img.decode().then(() => { - requestAnimationFrame(() => resolve(img)); - }); - }; - img.onerror = reject; - img.crossOrigin = "anonymous"; - img.decoding = "async"; - img.src = url; - }); -} -async function svgToDataURL(svg) { - return Promise.resolve().then(() => new XMLSerializer().serializeToString(svg)).then(encodeURIComponent).then((html) => `data:image/svg+xml;charset=utf-8,${html}`); -} -async function nodeToDataURL(node, width, height) { - const xmlns = "http://www.w3.org/2000/svg"; - const svg = document.createElementNS(xmlns, "svg"); - const foreignObject = document.createElementNS(xmlns, "foreignObject"); - svg.setAttribute("width", `${width}`); - svg.setAttribute("height", `${height}`); - svg.setAttribute("viewBox", `0 0 ${width} ${height}`); - foreignObject.setAttribute("width", "100%"); - foreignObject.setAttribute("height", "100%"); - foreignObject.setAttribute("x", "0"); - foreignObject.setAttribute("y", "0"); - foreignObject.setAttribute("externalResourcesRequired", "true"); - svg.appendChild(foreignObject); - foreignObject.appendChild(node); - return svgToDataURL(svg); -} -var isInstanceOfElement = (node, instance) => { - if (node instanceof instance) - return true; - const nodePrototype = Object.getPrototypeOf(node); - if (nodePrototype === null) - return false; - return nodePrototype.constructor.name === instance.name || isInstanceOfElement(nodePrototype, instance); -}; - -// node_modules/html-to-image/es/clone-pseudos.js -function formatCSSText(style) { - const content = style.getPropertyValue("content"); - return `${style.cssText} content: '${content.replace(/'|"/g, "")}';`; -} -function formatCSSProperties(style, options) { - return getStyleProperties(options).map((name) => { - const value = style.getPropertyValue(name); - const priority = style.getPropertyPriority(name); - return `${name}: ${value}${priority ? " !important" : ""};`; - }).join(" "); -} -function getPseudoElementStyle(className, pseudo, style, options) { - const selector = `.${className}:${pseudo}`; - const cssText = style.cssText ? formatCSSText(style) : formatCSSProperties(style, options); - return document.createTextNode(`${selector}{${cssText}}`); -} -function clonePseudoElement(nativeNode, clonedNode, pseudo, options) { - const style = window.getComputedStyle(nativeNode, pseudo); - const content = style.getPropertyValue("content"); - if (content === "" || content === "none") { - return; - } - const className = uuid(); - try { - clonedNode.className = `${clonedNode.className} ${className}`; - } catch (err) { - return; - } - const styleElement = document.createElement("style"); - styleElement.appendChild(getPseudoElementStyle(className, pseudo, style, options)); - clonedNode.appendChild(styleElement); -} -function clonePseudoElements(nativeNode, clonedNode, options) { - clonePseudoElement(nativeNode, clonedNode, ":before", options); - clonePseudoElement(nativeNode, clonedNode, ":after", options); -} - -// node_modules/html-to-image/es/mimes.js -var WOFF = "application/font-woff"; -var JPEG = "image/jpeg"; -var mimes = { - woff: WOFF, - woff2: WOFF, - ttf: "application/font-truetype", - eot: "application/vnd.ms-fontobject", - png: "image/png", - jpg: JPEG, - jpeg: JPEG, - gif: "image/gif", - tiff: "image/tiff", - svg: "image/svg+xml", - webp: "image/webp" -}; -function getExtension(url) { - const match = /\.([^./]*?)$/g.exec(url); - return match ? match[1] : ""; -} -function getMimeType(url) { - const extension = getExtension(url).toLowerCase(); - return mimes[extension] || ""; -} - -// node_modules/html-to-image/es/dataurl.js -function getContentFromDataUrl(dataURL) { - return dataURL.split(/,/)[1]; -} -function isDataUrl(url) { - return url.search(/^(data:)/) !== -1; -} -function makeDataUrl(content, mimeType) { - return `data:${mimeType};base64,${content}`; -} -async function fetchAsDataURL(url, init, process2) { - const res = await fetch(url, init); - if (res.status === 404) { - throw new Error(`Resource "${res.url}" not found`); - } - const blob = await res.blob(); - return new Promise((resolve, reject) => { - const reader = new FileReader; - reader.onerror = reject; - reader.onloadend = () => { - try { - resolve(process2({ res, result: reader.result })); - } catch (error) { - reject(error); - } - }; - reader.readAsDataURL(blob); - }); -} -var cache = {}; -function getCacheKey(url, contentType, includeQueryParams) { - let key = url.replace(/\?.*/, ""); - if (includeQueryParams) { - key = url; - } - if (/ttf|otf|eot|woff2?/i.test(key)) { - key = key.replace(/.*\//, ""); - } - return contentType ? `[${contentType}]${key}` : key; -} -async function resourceToDataURL(resourceUrl, contentType, options) { - const cacheKey = getCacheKey(resourceUrl, contentType, options.includeQueryParams); - if (cache[cacheKey] != null) { - return cache[cacheKey]; - } - if (options.cacheBust) { - resourceUrl += (/\?/.test(resourceUrl) ? "&" : "?") + new Date().getTime(); - } - let dataURL; - try { - const content = await fetchAsDataURL(resourceUrl, options.fetchRequestInit, ({ res, result }) => { - if (!contentType) { - contentType = res.headers.get("Content-Type") || ""; - } - return getContentFromDataUrl(result); - }); - dataURL = makeDataUrl(content, contentType); - } catch (error) { - dataURL = options.imagePlaceholder || ""; - let msg = `Failed to fetch resource: ${resourceUrl}`; - if (error) { - msg = typeof error === "string" ? error : error.message; - } - if (msg) { - console.warn(msg); - } - } - cache[cacheKey] = dataURL; - return dataURL; -} - -// node_modules/html-to-image/es/clone-node.js -async function cloneCanvasElement(canvas) { - const dataURL = canvas.toDataURL(); - if (dataURL === "data:,") { - return canvas.cloneNode(false); - } - return createImage(dataURL); -} -async function cloneVideoElement(video, options) { - if (video.currentSrc) { - const canvas = document.createElement("canvas"); - const ctx = canvas.getContext("2d"); - canvas.width = video.clientWidth; - canvas.height = video.clientHeight; - ctx === null || ctx === undefined || ctx.drawImage(video, 0, 0, canvas.width, canvas.height); - const dataURL2 = canvas.toDataURL(); - return createImage(dataURL2); - } - const poster = video.poster; - const contentType = getMimeType(poster); - const dataURL = await resourceToDataURL(poster, contentType, options); - return createImage(dataURL); -} -async function cloneIFrameElement(iframe, options) { - var _a; - try { - if ((_a = iframe === null || iframe === undefined ? undefined : iframe.contentDocument) === null || _a === undefined ? undefined : _a.body) { - return await cloneNode(iframe.contentDocument.body, options, true); - } - } catch (_b) {} - return iframe.cloneNode(false); -} -async function cloneSingleNode(node, options) { - if (isInstanceOfElement(node, HTMLCanvasElement)) { - return cloneCanvasElement(node); - } - if (isInstanceOfElement(node, HTMLVideoElement)) { - return cloneVideoElement(node, options); - } - if (isInstanceOfElement(node, HTMLIFrameElement)) { - return cloneIFrameElement(node, options); - } - return node.cloneNode(isSVGElement(node)); -} -var isSlotElement = (node) => node.tagName != null && node.tagName.toUpperCase() === "SLOT"; -var isSVGElement = (node) => node.tagName != null && node.tagName.toUpperCase() === "SVG"; -async function cloneChildren(nativeNode, clonedNode, options) { - var _a, _b; - if (isSVGElement(clonedNode)) { - return clonedNode; - } - let children = []; - if (isSlotElement(nativeNode) && nativeNode.assignedNodes) { - children = toArray(nativeNode.assignedNodes()); - } else if (isInstanceOfElement(nativeNode, HTMLIFrameElement) && ((_a = nativeNode.contentDocument) === null || _a === undefined ? undefined : _a.body)) { - children = toArray(nativeNode.contentDocument.body.childNodes); - } else { - children = toArray(((_b = nativeNode.shadowRoot) !== null && _b !== undefined ? _b : nativeNode).childNodes); - } - if (children.length === 0 || isInstanceOfElement(nativeNode, HTMLVideoElement)) { - return clonedNode; - } - await children.reduce((deferred, child) => deferred.then(() => cloneNode(child, options)).then((clonedChild) => { - if (clonedChild) { - clonedNode.appendChild(clonedChild); - } - }), Promise.resolve()); - return clonedNode; -} -function cloneCSSStyle(nativeNode, clonedNode, options) { - const targetStyle = clonedNode.style; - if (!targetStyle) { - return; - } - const sourceStyle = window.getComputedStyle(nativeNode); - if (sourceStyle.cssText) { - targetStyle.cssText = sourceStyle.cssText; - targetStyle.transformOrigin = sourceStyle.transformOrigin; - } else { - getStyleProperties(options).forEach((name) => { - let value = sourceStyle.getPropertyValue(name); - if (name === "font-size" && value.endsWith("px")) { - const reducedFont = Math.floor(parseFloat(value.substring(0, value.length - 2))) - 0.1; - value = `${reducedFont}px`; - } - if (isInstanceOfElement(nativeNode, HTMLIFrameElement) && name === "display" && value === "inline") { - value = "block"; - } - if (name === "d" && clonedNode.getAttribute("d")) { - value = `path(${clonedNode.getAttribute("d")})`; - } - targetStyle.setProperty(name, value, sourceStyle.getPropertyPriority(name)); - }); - } -} -function cloneInputValue(nativeNode, clonedNode) { - if (isInstanceOfElement(nativeNode, HTMLTextAreaElement)) { - clonedNode.innerHTML = nativeNode.value; - } - if (isInstanceOfElement(nativeNode, HTMLInputElement)) { - clonedNode.setAttribute("value", nativeNode.value); - } -} -function cloneSelectValue(nativeNode, clonedNode) { - if (isInstanceOfElement(nativeNode, HTMLSelectElement)) { - const clonedSelect = clonedNode; - const selectedOption = Array.from(clonedSelect.children).find((child) => nativeNode.value === child.getAttribute("value")); - if (selectedOption) { - selectedOption.setAttribute("selected", ""); - } - } -} -function decorate(nativeNode, clonedNode, options) { - if (isInstanceOfElement(clonedNode, Element)) { - cloneCSSStyle(nativeNode, clonedNode, options); - clonePseudoElements(nativeNode, clonedNode, options); - cloneInputValue(nativeNode, clonedNode); - cloneSelectValue(nativeNode, clonedNode); - } - return clonedNode; -} -async function ensureSVGSymbols(clone, options) { - const uses = clone.querySelectorAll ? clone.querySelectorAll("use") : []; - if (uses.length === 0) { - return clone; - } - const processedDefs = {}; - for (let i = 0;i < uses.length; i++) { - const use = uses[i]; - const id = use.getAttribute("xlink:href"); - if (id) { - const exist = clone.querySelector(id); - const definition = document.querySelector(id); - if (!exist && definition && !processedDefs[id]) { - processedDefs[id] = await cloneNode(definition, options, true); - } - } - } - const nodes = Object.values(processedDefs); - if (nodes.length) { - const ns = "http://www.w3.org/1999/xhtml"; - const svg = document.createElementNS(ns, "svg"); - svg.setAttribute("xmlns", ns); - svg.style.position = "absolute"; - svg.style.width = "0"; - svg.style.height = "0"; - svg.style.overflow = "hidden"; - svg.style.display = "none"; - const defs = document.createElementNS(ns, "defs"); - svg.appendChild(defs); - for (let i = 0;i < nodes.length; i++) { - defs.appendChild(nodes[i]); - } - clone.appendChild(svg); - } - return clone; -} -async function cloneNode(node, options, isRoot) { - if (!isRoot && options.filter && !options.filter(node)) { - return null; - } - return Promise.resolve(node).then((clonedNode) => cloneSingleNode(clonedNode, options)).then((clonedNode) => cloneChildren(node, clonedNode, options)).then((clonedNode) => decorate(node, clonedNode, options)).then((clonedNode) => ensureSVGSymbols(clonedNode, options)); -} - -// node_modules/html-to-image/es/embed-resources.js -var URL_REGEX = /url\((['"]?)([^'"]+?)\1\)/g; -var URL_WITH_FORMAT_REGEX = /url\([^)]+\)\s*format\((["']?)([^"']+)\1\)/g; -var FONT_SRC_REGEX = /src:\s*(?:url\([^)]+\)\s*format\([^)]+\)[,;]\s*)+/g; -function toRegex(url) { - const escaped = url.replace(/([.*+?^${}()|\[\]\/\\])/g, "\\$1"); - return new RegExp(`(url\\(['"]?)(${escaped})(['"]?\\))`, "g"); -} -function parseURLs(cssText) { - const urls = []; - cssText.replace(URL_REGEX, (raw, quotation, url) => { - urls.push(url); - return raw; - }); - return urls.filter((url) => !isDataUrl(url)); -} -async function embed(cssText, resourceURL, baseURL, options, getContentFromUrl) { - try { - const resolvedURL = baseURL ? resolveUrl(resourceURL, baseURL) : resourceURL; - const contentType = getMimeType(resourceURL); - let dataURL; - if (getContentFromUrl) { - const content = await getContentFromUrl(resolvedURL); - dataURL = makeDataUrl(content, contentType); - } else { - dataURL = await resourceToDataURL(resolvedURL, contentType, options); - } - return cssText.replace(toRegex(resourceURL), `$1${dataURL}$3`); - } catch (error) {} - return cssText; -} -function filterPreferredFontFormat(str, { preferredFontFormat }) { - return !preferredFontFormat ? str : str.replace(FONT_SRC_REGEX, (match) => { - while (true) { - const [src, , format] = URL_WITH_FORMAT_REGEX.exec(match) || []; - if (!format) { - return ""; - } - if (format === preferredFontFormat) { - return `src: ${src};`; - } - } - }); -} -function shouldEmbed(url) { - return url.search(URL_REGEX) !== -1; -} -async function embedResources(cssText, baseUrl, options) { - if (!shouldEmbed(cssText)) { - return cssText; - } - const filteredCSSText = filterPreferredFontFormat(cssText, options); - const urls = parseURLs(filteredCSSText); - return urls.reduce((deferred, url) => deferred.then((css) => embed(css, url, baseUrl, options)), Promise.resolve(filteredCSSText)); -} - -// node_modules/html-to-image/es/embed-images.js -async function embedProp(propName, node, options) { - var _a; - const propValue = (_a = node.style) === null || _a === undefined ? undefined : _a.getPropertyValue(propName); - if (propValue) { - const cssString = await embedResources(propValue, null, options); - node.style.setProperty(propName, cssString, node.style.getPropertyPriority(propName)); - return true; - } - return false; -} -async function embedBackground(clonedNode, options) { - await embedProp("background", clonedNode, options) || await embedProp("background-image", clonedNode, options); - await embedProp("mask", clonedNode, options) || await embedProp("-webkit-mask", clonedNode, options) || await embedProp("mask-image", clonedNode, options) || await embedProp("-webkit-mask-image", clonedNode, options); -} -async function embedImageNode(clonedNode, options) { - const isImageElement = isInstanceOfElement(clonedNode, HTMLImageElement); - if (!(isImageElement && !isDataUrl(clonedNode.src)) && !(isInstanceOfElement(clonedNode, SVGImageElement) && !isDataUrl(clonedNode.href.baseVal))) { - return; - } - const url = isImageElement ? clonedNode.src : clonedNode.href.baseVal; - const dataURL = await resourceToDataURL(url, getMimeType(url), options); - await new Promise((resolve, reject) => { - clonedNode.onload = resolve; - clonedNode.onerror = options.onImageErrorHandler ? (...attributes) => { - try { - resolve(options.onImageErrorHandler(...attributes)); - } catch (error) { - reject(error); - } - } : reject; - const image = clonedNode; - if (image.decode) { - image.decode = resolve; - } - if (image.loading === "lazy") { - image.loading = "eager"; - } - if (isImageElement) { - clonedNode.srcset = ""; - clonedNode.src = dataURL; - } else { - clonedNode.href.baseVal = dataURL; - } - }); -} -async function embedChildren(clonedNode, options) { - const children = toArray(clonedNode.childNodes); - const deferreds = children.map((child) => embedImages(child, options)); - await Promise.all(deferreds).then(() => clonedNode); -} -async function embedImages(clonedNode, options) { - if (isInstanceOfElement(clonedNode, Element)) { - await embedBackground(clonedNode, options); - await embedImageNode(clonedNode, options); - await embedChildren(clonedNode, options); - } -} - -// node_modules/html-to-image/es/apply-style.js -function applyStyle(node, options) { - const { style } = node; - if (options.backgroundColor) { - style.backgroundColor = options.backgroundColor; - } - if (options.width) { - style.width = `${options.width}px`; - } - if (options.height) { - style.height = `${options.height}px`; - } - const manual = options.style; - if (manual != null) { - Object.keys(manual).forEach((key) => { - style[key] = manual[key]; - }); - } - return node; -} - -// node_modules/html-to-image/es/embed-webfonts.js -var cssFetchCache = {}; -async function fetchCSS(url) { - let cache2 = cssFetchCache[url]; - if (cache2 != null) { - return cache2; - } - const res = await fetch(url); - const cssText = await res.text(); - cache2 = { url, cssText }; - cssFetchCache[url] = cache2; - return cache2; -} -async function embedFonts(data, options) { - let cssText = data.cssText; - const regexUrl = /url\(["']?([^"')]+)["']?\)/g; - const fontLocs = cssText.match(/url\([^)]+\)/g) || []; - const loadFonts = fontLocs.map(async (loc) => { - let url = loc.replace(regexUrl, "$1"); - if (!url.startsWith("https://")) { - url = new URL(url, data.url).href; - } - return fetchAsDataURL(url, options.fetchRequestInit, ({ result }) => { - cssText = cssText.replace(loc, `url(${result})`); - return [loc, result]; - }); - }); - return Promise.all(loadFonts).then(() => cssText); -} -function parseCSS(source) { - if (source == null) { - return []; - } - const result = []; - const commentsRegex = /(\/\*[\s\S]*?\*\/)/gi; - let cssText = source.replace(commentsRegex, ""); - const keyframesRegex = new RegExp("((@.*?keyframes [\\s\\S]*?){([\\s\\S]*?}\\s*?)})", "gi"); - while (true) { - const matches = keyframesRegex.exec(cssText); - if (matches === null) { - break; - } - result.push(matches[0]); - } - cssText = cssText.replace(keyframesRegex, ""); - const importRegex = /@import[\s\S]*?url\([^)]*\)[\s\S]*?;/gi; - const combinedCSSRegex = "((\\s*?(?:\\/\\*[\\s\\S]*?\\*\\/)?\\s*?@media[\\s\\S]" + "*?){([\\s\\S]*?)}\\s*?})|(([\\s\\S]*?){([\\s\\S]*?)})"; - const unifiedRegex = new RegExp(combinedCSSRegex, "gi"); - while (true) { - let matches = importRegex.exec(cssText); - if (matches === null) { - matches = unifiedRegex.exec(cssText); - if (matches === null) { - break; - } else { - importRegex.lastIndex = unifiedRegex.lastIndex; - } - } else { - unifiedRegex.lastIndex = importRegex.lastIndex; - } - result.push(matches[0]); - } - return result; -} -async function getCSSRules(styleSheets, options) { - const ret = []; - const deferreds = []; - styleSheets.forEach((sheet) => { - if ("cssRules" in sheet) { - try { - toArray(sheet.cssRules || []).forEach((item, index) => { - if (item.type === CSSRule.IMPORT_RULE) { - let importIndex = index + 1; - const url = item.href; - const deferred = fetchCSS(url).then((metadata) => embedFonts(metadata, options)).then((cssText) => parseCSS(cssText).forEach((rule) => { - try { - sheet.insertRule(rule, rule.startsWith("@import") ? importIndex += 1 : sheet.cssRules.length); - } catch (error) { - console.error("Error inserting rule from remote css", { - rule, - error - }); - } - })).catch((e) => { - console.error("Error loading remote css", e.toString()); - }); - deferreds.push(deferred); - } - }); - } catch (e) { - const inline = styleSheets.find((a) => a.href == null) || document.styleSheets[0]; - if (sheet.href != null) { - deferreds.push(fetchCSS(sheet.href).then((metadata) => embedFonts(metadata, options)).then((cssText) => parseCSS(cssText).forEach((rule) => { - inline.insertRule(rule, inline.cssRules.length); - })).catch((err) => { - console.error("Error loading remote stylesheet", err); - })); - } - console.error("Error inlining remote css file", e); - } - } - }); - return Promise.all(deferreds).then(() => { - styleSheets.forEach((sheet) => { - if ("cssRules" in sheet) { - try { - toArray(sheet.cssRules || []).forEach((item) => { - ret.push(item); - }); - } catch (e) { - console.error(`Error while reading CSS rules from ${sheet.href}`, e); - } - } - }); - return ret; - }); -} -function getWebFontRules(cssRules) { - return cssRules.filter((rule) => rule.type === CSSRule.FONT_FACE_RULE).filter((rule) => shouldEmbed(rule.style.getPropertyValue("src"))); -} -async function parseWebFontRules(node, options) { - if (node.ownerDocument == null) { - throw new Error("Provided element is not within a Document"); - } - const styleSheets = toArray(node.ownerDocument.styleSheets); - const cssRules = await getCSSRules(styleSheets, options); - return getWebFontRules(cssRules); -} -function normalizeFontFamily(font) { - return font.trim().replace(/["']/g, ""); -} -function getUsedFonts(node) { - const fonts = new Set; - function traverse(node2) { - const fontFamily = node2.style.fontFamily || getComputedStyle(node2).fontFamily; - fontFamily.split(",").forEach((font) => { - fonts.add(normalizeFontFamily(font)); - }); - Array.from(node2.children).forEach((child) => { - if (child instanceof HTMLElement) { - traverse(child); - } - }); - } - traverse(node); - return fonts; -} -async function getWebFontCSS(node, options) { - const rules = await parseWebFontRules(node, options); - const usedFonts = getUsedFonts(node); - const cssTexts = await Promise.all(rules.filter((rule) => usedFonts.has(normalizeFontFamily(rule.style.fontFamily))).map((rule) => { - const baseUrl = rule.parentStyleSheet ? rule.parentStyleSheet.href : null; - return embedResources(rule.cssText, baseUrl, options); - })); - return cssTexts.join(` -`); -} -async function embedWebFonts(clonedNode, options) { - const cssText = options.fontEmbedCSS != null ? options.fontEmbedCSS : options.skipFonts ? null : await getWebFontCSS(clonedNode, options); - if (cssText) { - const styleNode = document.createElement("style"); - const sytleContent = document.createTextNode(cssText); - styleNode.appendChild(sytleContent); - if (clonedNode.firstChild) { - clonedNode.insertBefore(styleNode, clonedNode.firstChild); - } else { - clonedNode.appendChild(styleNode); - } - } -} - -// node_modules/html-to-image/es/index.js -async function toSvg(node, options = {}) { - const { width, height } = getImageSize(node, options); - const clonedNode = await cloneNode(node, options, true); - await embedWebFonts(clonedNode, options); - await embedImages(clonedNode, options); - applyStyle(clonedNode, options); - const datauri = await nodeToDataURL(clonedNode, width, height); - return datauri; -} -async function toCanvas(node, options = {}) { - const { width, height } = getImageSize(node, options); - const svg = await toSvg(node, options); - const img = await createImage(svg); - const canvas = document.createElement("canvas"); - const context = canvas.getContext("2d"); - const ratio = options.pixelRatio || getPixelRatio(); - const canvasWidth = options.canvasWidth || width; - const canvasHeight = options.canvasHeight || height; - canvas.width = canvasWidth * ratio; - canvas.height = canvasHeight * ratio; - if (!options.skipAutoScale) { - checkCanvasDimensions(canvas); - } - canvas.style.width = `${canvasWidth}`; - canvas.style.height = `${canvasHeight}`; - if (options.backgroundColor) { - context.fillStyle = options.backgroundColor; - context.fillRect(0, 0, canvas.width, canvas.height); - } - context.drawImage(img, 0, 0, canvas.width, canvas.height); - return canvas; -} -async function toPixelData(node, options = {}) { - const { width, height } = getImageSize(node, options); - const canvas = await toCanvas(node, options); - const ctx = canvas.getContext("2d"); - return ctx.getImageData(0, 0, width, height).data; -} -async function toPng(node, options = {}) { - const canvas = await toCanvas(node, options); - return canvas.toDataURL(); -} -async function toJpeg(node, options = {}) { - const canvas = await toCanvas(node, options); - return canvas.toDataURL("image/jpeg", options.quality || 1); -} -async function toBlob(node, options = {}) { - const canvas = await toCanvas(node, options); - const blob = await canvasToBlob(canvas); - return blob; -} -async function getFontEmbedCSS(node, options = {}) { - return getWebFontCSS(node, options); -} - -// extension/src/screenshot-runner.ts -globalThis.__interceptor_h2i = exports_es; -globalThis.__interceptor_h2i_loaded = true; diff --git a/extension/manifest.json b/extension/manifest.json index 74f7ea1..31f24c3 100644 --- a/extension/manifest.json +++ b/extension/manifest.json @@ -1,7 +1,7 @@ { "manifest_version": 3, "name": "Interceptor", - "version": "0.17.7", + "version": "0.18.3", "minimum_chrome_version": "116", "description": "Browser bridge", "icons": { @@ -44,6 +44,9 @@ "service_worker": "background.js", "type": "module" }, + "content_security_policy": { + "extension_pages": "script-src 'self' 'wasm-unsafe-eval'; object-src 'self'" + }, "action": { "default_title": "Interceptor", "default_popup": "popup.html", diff --git a/extension/src/background/capabilities/canvas.ts b/extension/src/background/capabilities/canvas.ts index ee8d177..8a2548e 100644 --- a/extension/src/background/capabilities/canvas.ts +++ b/extension/src/background/capabilities/canvas.ts @@ -172,6 +172,55 @@ function hostCanvasSignals(limit = 20) { } } +// Native accessibility text for a , read in the MAIN world. A canvas's +// accessible text comes from aria-label / aria-labelledby / aria-describedby / +// title and, per the HTML spec, its child DOM (the "fallback content" the +// accessibility tree exposes) plus an enclosing
's
. This +// is the browser-native text source — no pixel OCR, no library. +function canvasAccessibleText(canvasIndex: number) { + const canvases = Array.from(document.querySelectorAll("canvas")) + const c = canvases[canvasIndex] + if (!c) return { found: false, error: "canvas index out of range", canvasCount: canvases.length } + + const norm = (s: string | null | undefined) => (s || "").replace(/\s+/g, " ").trim() + const byIds = (ids: string | null) => + !ids ? "" : ids.split(/\s+/).map((id) => norm(document.getElementById(id)?.textContent)).filter(Boolean).join(" ") + + const sources: Record = {} + const ariaLabel = norm(c.getAttribute("aria-label")) + if (ariaLabel) sources.ariaLabel = ariaLabel + const ariaLabelledby = byIds(c.getAttribute("aria-labelledby")) + if (ariaLabelledby) sources.ariaLabelledby = ariaLabelledby + // Child DOM of is its accessibility fallback subtree. + const fallback = norm(c.textContent) + if (fallback) sources.fallback = fallback + const fig = c.closest("figure") + const figcaption = fig ? norm(fig.querySelector("figcaption")?.textContent) : "" + if (figcaption) sources.figcaption = figcaption + const ariaDescribedby = byIds(c.getAttribute("aria-describedby")) + if (ariaDescribedby) sources.ariaDescribedby = ariaDescribedby + const title = norm(c.getAttribute("title")) + if (title) sources.title = title + + // Priority: explicit name → fallback content → caption → description → title. + const text = [ + sources.ariaLabel, + sources.ariaLabelledby, + sources.fallback, + sources.figcaption, + sources.ariaDescribedby, + sources.title + ].filter(Boolean).join("\n") + + return { + found: !!text, + text, + role: c.getAttribute("role") || "", + sources, + canvasCount: canvases.length + } +} + function canvasObserverSummary(limit = 100, kinds?: string[], canvasIndex?: number) { function normalize(kind: unknown): string { return String(kind || "").trim() @@ -454,42 +503,57 @@ export async function handleCanvasActions( } case "canvas_ocr": { - const readResult = await handleCanvasActions({ - type: "canvas_read", - canvasIndex: action.canvasIndex, - region: action.region, - format: "png" - }, tabId) - if (!readResult.success) return readResult - const dataUrl = (readResult.data as { dataUrl?: string }).dataUrl - if (!dataUrl) return { success: false, error: "canvas OCR requires a readable canvas image" } - - const ocrResult = await sendToOffscreen({ - type: "ocr", - dataUrl - }) as { success: boolean; data?: { text?: string; source?: string; confidence?: number | null }; error?: string } - - if (!ocrResult.success) return { success: false, error: ocrResult.error || "canvas OCR failed" } - + // Native text only — no pixel OCR. Read the canvas's accessibility text + // (aria-* + fallback subtree) plus the page's own semantic textbox model. + // For a canvas-rendered editor prefer `scene text`; for genuine pixel-only + // text use `macos vision text` (native macOS Vision OCR). + const a11y = await executeInMainWorld>( + tabId, canvasAccessibleText, [action.canvasIndex] + ) + if (a11y && (a11y as { error?: string }).error) { + return { success: false, error: (a11y as { error?: string }).error } + } const hostSignals = await executeInMainWorld>(tabId, hostCanvasSignals, [10]) - const semanticText = hostSignals?.docs?.textbox?.exists ? hostSignals.docs.textbox.textSample || "" : "" - const ocrText = ocrResult.data?.text || "" - const normalizedOcr = ocrText.replace(/\s+/g, " ").trim() - const normalizedSemantic = semanticText.replace(/\s+/g, " ").trim() - const matchedSemantic = normalizedOcr && normalizedSemantic - ? normalizedSemantic.includes(normalizedOcr) || normalizedOcr.includes(normalizedSemantic) - : false + const semanticText = hostSignals?.docs?.textbox?.exists ? (hostSignals.docs.textbox.textSample || "") : "" + const a11yText = a11y && (a11y as { found?: boolean }).found ? ((a11y as { text?: string }).text || "") : "" + + let text = a11yText || semanticText + let source = a11yText ? "accessibility" : (semanticText ? "semantic-textbox" : "none") + let confidence: number | null = null + let ocrFallbackUsed = false + + // No native text → OCR the canvas pixels with the bundled Tesseract engine + // (offline, cross-platform — works on browser-only / non-macOS installs). + if (!text) { + const readResult = await handleCanvasActions({ + type: "canvas_read", canvasIndex: action.canvasIndex, region: action.region, format: "png" + }, tabId) + const dataUrl = readResult.success ? (readResult.data as { dataUrl?: string }).dataUrl : undefined + if (dataUrl) { + const ocr = await sendToOffscreen({ type: "ocr", dataUrl }) as { success: boolean; data?: { text?: string; confidence?: number | null }; error?: string } + if (ocr.success && (ocr.data?.text || "").trim()) { + text = (ocr.data!.text as string).trim() + source = "tesseract" + confidence = ocr.data?.confidence ?? null + ocrFallbackUsed = true + } + } + } return { success: true, data: { - text: ocrText, - source: ocrResult.data?.source || "ocr", - confidence: ocrResult.data?.confidence ?? null, + text, + source, + confidence, diagnostics: { - pixelSource: "canvas_read", - strongerSourceAvailable: !!semanticText, - strongerSourceMatched: matchedSemantic + accessibilityText: !!a11yText, + accessibilitySources: (a11y as { sources?: unknown })?.sources || null, + semanticTextboxAvailable: !!semanticText, + ocrFallbackUsed, + hint: text + ? undefined + : "No accessible/semantic text and pixel OCR found nothing. For a canvas-rendered editor use `interceptor scene text`." } } } diff --git a/extension/src/background/capabilities/screenshot.ts b/extension/src/background/capabilities/screenshot.ts index bbb2c45..23ee3a2 100644 --- a/extension/src/background/capabilities/screenshot.ts +++ b/extension/src/background/capabilities/screenshot.ts @@ -127,19 +127,6 @@ function resolveDomMode(action: { [key: string]: unknown }): DomScreenshotMode { return "full" } -async function injectScreenshotRunner(tabId: number): Promise<{ success: boolean; error?: string }> { - try { - await chrome.scripting.executeScript({ - target: { tabId }, - world: "ISOLATED" as chrome.scripting.ExecutionWorld, - files: ["screenshot-runner.js"] - }) - return { success: true } - } catch (err) { - return { success: false, error: `failed to inject screenshot-runner.js: ${(err as Error).message}` } - } -} - // Re-encode a PNG/JPEG dataUrl as WebP using OffscreenCanvas. // html-to-image only emits PNG/JPEG, and chrome.tabs.captureVisibleTab only // accepts PNG/JPEG. WebP support is added by re-encoding at the SW boundary. @@ -204,11 +191,11 @@ async function handleDomRenderScreenshot( } } + // The DNR CORS rule lets the content script fetch third-party / + // background-image resources CORS-clean so they can be embedded as data URLs. + // No library injection is needed — content.ts renders natively. await installScreenshotCorsRule(tabId) try { - const inject = await injectScreenshotRunner(tabId) - if (!inject.success) return { success: false, error: inject.error || "runner injection failed" } - const dsAction: { type: string; [key: string]: unknown } = { type: "dom_screenshot", mode, format: renderFormat, quality } if (action.ref !== undefined) dsAction.ref = action.ref if (action.element !== undefined) dsAction.index = action.element @@ -217,7 +204,26 @@ async function handleDomRenderScreenshot( if (scale !== undefined) dsAction.scale = scale if (targetMaxLongEdge !== undefined) dsAction.target_max_long_edge = targetMaxLongEdge - const renderResult = await sendToContentScript(tabId, dsAction) as { success: boolean; error?: string; data?: { dataUrl: string; format: string; width: number; height: number; pixelRatio: number; mode: string } } + // Guard the content-script render with DOM_RENDER_TIMEOUT_MS so a stalled + // render fails fast with a clear error instead of silently hanging until + // the CLI's 45s WebSocket timeout (which returns no diagnostics at all). + let renderResult: { success: boolean; error?: string; data?: { dataUrl: string; format: string; width: number; height: number; pixelRatio: number; mode: string } } + try { + renderResult = await withCaptureTimeout( + "dom-render", + sendToContentScript(tabId, dsAction), + DOM_RENDER_TIMEOUT_MS + ) as { success: boolean; error?: string; data?: { dataUrl: string; format: string; width: number; height: number; pixelRatio: number; mode: string } } + } catch (err) { + if (err instanceof CaptureTimeoutError) { + return { + success: false, + error: `DOM-render timed out after ${DOM_RENDER_TIMEOUT_MS}ms — the content script did not return image data. The render stalled (e.g. a resource never settled); retry, or use --pixel for a compositor capture.`, + data: { layer: "dom-render-timeout" } + } + } + throw err + } if (!renderResult || !renderResult.success || !renderResult.data) { return { success: false, error: renderResult?.error || "dom render returned no data" } @@ -558,6 +564,40 @@ async function transformPixelDataUrl( } } +// ─── OCR: native capture → bundled Tesseract → deterministic text ───────────── +// Renders the target (selector / element / region / full page) to a PNG via the +// native DOM-render path, then OCRs it in the offscreen document with the +// bundled Tesseract.js engine. Cross-platform, offline, no macOS bridge, and no +// round-trip of pixels to the agent — returns a plain text string. +async function handleOcr( + action: { type: string; [key: string]: unknown }, + tabId: number +): Promise { + const shot: { type: string; [key: string]: unknown } = { type: "screenshot", format: "png", save: false } + for (const k of ["selector", "element", "ref", "region", "clip", "scale", "target_max_long_edge"]) { + if (action[k] !== undefined) shot[k] = action[k] + } + const rendered = await handleDomRenderScreenshot(shot, tabId) + if (!rendered.success) return rendered + const dataUrl = (rendered.data as { dataUrl?: string } | undefined)?.dataUrl + if (!dataUrl) return { success: false, error: "capture for OCR produced no image" } + + const ocr = await sendToOffscreen({ type: "ocr", dataUrl }) as { + success: boolean; data?: { text?: string; source?: string; confidence?: number | null }; error?: string + } + if (!ocr.success) return { success: false, error: ocr.error || "OCR failed" } + return { + success: true, + data: { + text: (ocr.data?.text || "").trim(), + source: ocr.data?.source || "tesseract", + confidence: ocr.data?.confidence ?? null, + width: (rendered.data as { width?: number } | undefined)?.width, + height: (rendered.data as { height?: number } | undefined)?.height + } + } +} + // ─── Public dispatcher ──────────────────────────────────────────────────────── export async function handleScreenshotActions( @@ -568,6 +608,9 @@ export async function handleScreenshotActions( case "screenshot_background": return handleScreenshotBackground(action, tabId) + case "ocr": + return handleOcr(action, tabId) + case "page_capture": { const mhtml = await chrome.pageCapture.saveAsMHTML({ tabId }) const text = await (mhtml as Blob).text() diff --git a/extension/src/background/router.ts b/extension/src/background/router.ts index 37ba033..d6eb9b5 100644 --- a/extension/src/background/router.ts +++ b/extension/src/background/router.ts @@ -30,7 +30,7 @@ restorePageCommCaptureConfig() type ActionResult = { success: boolean; error?: string; data?: unknown; tabId?: number } const OS_INPUT_ACTIONS = new Set(["os_click", "os_key", "os_type", "os_move"]) -const SCREENSHOT_ACTIONS = new Set(["screenshot", "screenshot_background", "page_capture"]) +const SCREENSHOT_ACTIONS = new Set(["screenshot", "screenshot_background", "page_capture", "ocr"]) const CAPTURE_STREAM_ACTIONS = new Set(["capture_start", "capture_frame", "capture_stop", "canvas_diff"]) const CANVAS_ACTIONS = new Set([ "canvas_list", diff --git a/extension/src/content/dom-screenshot.ts b/extension/src/content/dom-screenshot.ts index 1ea021b..58a8925 100644 --- a/extension/src/content/dom-screenshot.ts +++ b/extension/src/content/dom-screenshot.ts @@ -3,15 +3,19 @@ // Content-script handler for the DOM-render screenshot pipeline. // Driven by `case "dom_screenshot":` in content.ts. // -// Pre-conditions: -// - The vendored html-to-image bundle must already be injected into this -// frame's ISOLATED world via chrome.scripting.executeScript({ files: -// ["screenshot-runner.js"], world: "ISOLATED" }). The runner sets -// globalThis.__interceptor_h2i. -// - The CORS DNR session rule must be active for the duration of this call -// so that third-party , font, and