diff --git a/extension/build.ts b/extension/build.ts new file mode 100644 index 0000000..af7cda7 --- /dev/null +++ b/extension/build.ts @@ -0,0 +1,62 @@ +/** + * Build script for the Zero Companion MV3 extension. + * + * Bundles `src/worker.ts` into a single classic service-worker script and emits + * a generated module the `zero` CLI bundles in — `zero/src/companion/ + * extension-assets.ts` — mapping each extension filename to its contents. At + * runtime `bridge-engine.ts` writes those files (plus a per-run `bridge.json`) + * into `~/.zero/extension/`, which `zero browser connect` hands to Chrome via + * `--load-extension`. Embedding the built extension in the CLI bundle keeps it + * self-contained — no assumptions about the installer's on-disk layout. + * + * Run with `bun build.ts` (preferred) or via esbuild fallback. + */ +import { mkdir, readFile, writeFile } from "node:fs/promises"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const root = __dirname; +const manifestPath = resolve(root, "manifest.json"); +const workerEntry = resolve(root, "src/worker.ts"); +const assetsOut = resolve(root, "..", "zero", "src", "companion", "extension-assets.ts"); + +async function bundleWorker(): Promise { + if (typeof (globalThis as any).Bun !== "undefined") { + const Bun = (globalThis as any).Bun; + const out = await Bun.build({ + entrypoints: [workerEntry], + target: "browser", + format: "iife", + minify: false, + }); + if (!out.success) throw new AggregateError(out.logs, "worker bundle failed"); + return await out.outputs[0].text(); + } + const esbuild = await import("esbuild"); + const res = await esbuild.build({ + entryPoints: [workerEntry], + bundle: true, + write: false, + platform: "browser", + target: "chrome116", + format: "iife", + }); + return res.outputFiles![0]!.text; +} + +const workerJs = await bundleWorker(); +const manifestJson = await readFile(manifestPath, "utf8"); + +const assets: Record = { + "manifest.json": manifestJson, + "worker.js": workerJs, +}; + +const banner = "// AUTO-GENERATED by extension/build.ts — do not edit by hand.\n" + + "// Run `cd extension && bun build.ts` to regenerate.\n"; +const body = `export const EXTENSION_ASSETS: Record = ${JSON.stringify(assets, null, 2)};\n`; + +await mkdir(dirname(assetsOut), { recursive: true }); +await writeFile(assetsOut, banner + body); +console.log(`extension: built worker (${workerJs.length} bytes) → ${assetsOut}`); diff --git a/extension/manifest.json b/extension/manifest.json new file mode 100644 index 0000000..6956fe3 --- /dev/null +++ b/extension/manifest.json @@ -0,0 +1,11 @@ +{ + "manifest_version": 3, + "name": "Zero Companion", + "version": "1.0.0", + "description": "Lets the Zero agent drive a tab in your real Chrome — your logins, your session, no separate browser.", + "permissions": ["debugger", "tabs", "scripting", "activeTab", "alarms"], + "host_permissions": [""], + "background": { + "service_worker": "worker.js" + } +} diff --git a/extension/src/worker.ts b/extension/src/worker.ts new file mode 100644 index 0000000..e048da1 --- /dev/null +++ b/extension/src/worker.ts @@ -0,0 +1,779 @@ +/** + * Zero Companion — MV3 service worker. + * + * This is the laptop end of the browser-control link. Instead of Playwright + * launching and locking the user's Chrome profile, the agent now drives the + * user's REAL, already-open tab from inside Chrome via `chrome.debugger` + * (which is the Chrome DevTools Protocol — the exact API the server's headless + * pool uses). Nothing is launched or locked, so the whole class of + * profile-lock / keychain-mock / flag-fighting failures disappears. + * + * Wiring: the `zero browser connect` CLI runs a localhost WebSocket bridge and + * writes `bridge.json` (port + one-time secret) into this extension's own + * directory. We read it, connect to `ws://127.0.0.1:`, prove the secret, + * and then execute `{type:"command", id, action}` frames against the active + * tab, replying `{type:"response", id, result|error}`. The action/result + * shapes are byte-identical to `server/lib/browser/protocol.ts`, so the server + * and agent can't tell this apart from the headless browser. + * + * The CDP action logic below is ported from `server/lib/browser/host-pool.ts` + * — same a11y-tree snapshot with stable `[ref=eN]` ids, same ref-stale + * recovery, same incremental diff, same replMode evaluate with console + * capture. `cdp.send(method, params)` became `sendCmd(tabId, method, params)`. + */ + +// chrome.* is provided by the extension runtime; type it loosely so the +// extension builds without @types/chrome as a dependency. +declare const chrome: any; + +// ── Bridge link constants ── + +const RECONNECT_MIN_MS = 1_000; +const RECONNECT_MAX_MS = 15_000; +/** + * Secondary wake: if the worker is ever killed while the bridge is down, this + * alarm revives it to retry connecting. The PRIMARY keepalive is the bridge's + * 20s ping (an incoming WS message keeps the worker warm in Chrome 116+). + * Chrome clamps alarm periods to a 30s (0.5 min) minimum. + */ +const KEEPALIVE_ALARM = "zero-keepalive"; +const KEEPALIVE_PERIOD_MIN = 0.5; + +const MAX_SNAPSHOT_LINES = 150; +const INCREMENTAL_THRESHOLD = 0.5; + +// ── Bridge config (written by the CLI into this extension's dir) ── + +interface BridgeConfig { + port: number; + secret: string; +} + +async function readBridgeConfig(): Promise { + try { + // Always read fresh from disk: the CLI rewrites port/secret each run, and + // unpacked extensions are served live from disk so no-store sees updates. + const res = await fetch(chrome.runtime.getURL("bridge.json"), { cache: "no-store" }); + if (!res.ok) return null; + const cfg = await res.json(); + if (typeof cfg?.port === "number" && typeof cfg?.secret === "string") return cfg; + return null; + } catch { + return null; + } +} + +// ── Per-tab CDP driver ── + +interface RefEntry { + role: string; + name: string; + backendNodeId: number; +} + +interface SnapshotCache { + prevLines?: string[]; + prevUrl?: string; +} + +/** + * Wraps a single attached tab. Holds the same per-page state the host-pool's + * ProjectSession held (refMap, snapshotCache) plus the console buffer and + * one-shot CDP event waiters this implementation needs. + */ +class TabDriver { + refMap = new Map(); + snapshotCache: SnapshotCache = {}; + /** Console lines captured during the current evaluate(). */ + consoleLogs: string[] = []; + /** One-shot resolvers keyed by CDP event method (e.g. Page.loadEventFired). */ + private waiters = new Map void>>(); + + constructor(public tabId: number) {} + + send(method: string, params?: Record): Promise { + return chrome.debugger.sendCommand({ tabId: this.tabId }, method, params ?? {}); + } + + /** Dispatched by the global onEvent listener for this tab. */ + onEvent(method: string, params: any): void { + if (method === "Runtime.consoleAPICalled") { + try { + const level = params?.type ?? "log"; + const args = (params?.args ?? []) + .map((a: any) => { + if (a == null) return String(a); + if ("value" in a) return typeof a.value === "string" ? a.value : JSON.stringify(a.value); + return a.description ?? a.unserializableValue ?? ""; + }) + .join(" "); + this.consoleLogs.push(`[${level}] ${args}`); + } catch { + /* ignore */ + } + return; + } + const list = this.waiters.get(method); + if (list) { + this.waiters.delete(method); + for (const fn of list) fn(); + } + } + + /** Resolve on the next occurrence of a CDP event, or after `timeout` ms. Never rejects. */ + once(method: string, timeout: number): Promise { + return new Promise((resolve) => { + const list = this.waiters.get(method) ?? []; + let done = false; + const fire = () => { + if (done) return; + done = true; + resolve(); + }; + list.push(fire); + this.waiters.set(method, list); + setTimeout(fire, timeout); + }); + } + + async info(): Promise<{ url: string; title: string }> { + try { + const tab = await chrome.tabs.get(this.tabId); + return { url: tab?.url ?? "", title: tab?.title ?? "" }; + } catch { + return { url: "", title: "" }; + } + } +} + +// ── CDP action helpers (ported from host-pool.ts) ── + +function resolveRef(drv: TabDriver, ref: string): number { + const entry = drv.refMap.get(ref); + if (!entry) { + throw new Error(`Element ref [${ref}] not found. Take a snapshot first to get current refs.`); + } + return entry.backendNodeId; +} + +async function resolveNode(drv: TabDriver, backendNodeId: number): Promise { + const { object } = await drv.send("DOM.resolveNode", { backendNodeId }); + if (!object?.objectId) { + throw new Error("Could not resolve node — it may have been removed from the DOM. Take a new snapshot."); + } + return object.objectId; +} + +async function getNodeCenter(drv: TabDriver, backendNodeId: number) { + const objectId = await resolveNode(drv, backendNodeId); + const result = await drv.send("Runtime.callFunctionOn", { + objectId, + functionDeclaration: `function() { + const r = this.getBoundingClientRect(); + return JSON.stringify({ x: r.x + r.width / 2, y: r.y + r.height / 2, width: r.width, height: r.height }); + }`, + returnByValue: true, + }); + await drv.send("Runtime.releaseObject", { objectId }).catch(() => {}); + return JSON.parse(result.result.value); +} + +async function clickNode(drv: TabDriver, backendNodeId: number) { + const objectId = await resolveNode(drv, backendNodeId); + await drv + .send("Runtime.callFunctionOn", { + objectId, + functionDeclaration: `function() { this.scrollIntoViewIfNeeded(); }`, + }) + .catch(() => {}); + await drv.send("Runtime.releaseObject", { objectId }).catch(() => {}); + const pos = await getNodeCenter(drv, backendNodeId); + await drv.send("Input.dispatchMouseEvent", { type: "mouseMoved", x: pos.x, y: pos.y }); + await drv.send("Input.dispatchMouseEvent", { type: "mousePressed", x: pos.x, y: pos.y, button: "left", clickCount: 1 }); + await drv.send("Input.dispatchMouseEvent", { type: "mouseReleased", x: pos.x, y: pos.y, button: "left", clickCount: 1 }); +} + +async function hoverNode(drv: TabDriver, backendNodeId: number) { + const pos = await getNodeCenter(drv, backendNodeId); + await drv.send("Input.dispatchMouseEvent", { type: "mouseMoved", x: pos.x, y: pos.y }); +} + +async function focusAndType(drv: TabDriver, backendNodeId: number, text: string) { + try { + await drv.send("DOM.focus", { backendNodeId }); + } catch { + const pos = await getNodeCenter(drv, backendNodeId); + await drv.send("Input.dispatchMouseEvent", { type: "mouseMoved", x: pos.x, y: pos.y }); + await drv.send("Input.dispatchMouseEvent", { type: "mousePressed", x: pos.x, y: pos.y, button: "left", clickCount: 1 }); + await drv.send("Input.dispatchMouseEvent", { type: "mouseReleased", x: pos.x, y: pos.y, button: "left", clickCount: 1 }); + } + const objectId = await resolveNode(drv, backendNodeId); + await drv.send("Runtime.callFunctionOn", { + objectId, + functionDeclaration: `function() { + if ('value' in this) { this.value = ''; this.dispatchEvent(new Event('input', { bubbles: true })); } + else if (this.isContentEditable) { this.textContent = ''; this.dispatchEvent(new Event('input', { bubbles: true })); } + }`, + }); + await drv.send("Runtime.releaseObject", { objectId }).catch(() => {}); + await drv.send("Input.insertText", { text }); +} + +function stripRefs(line: string): string { + return line.replace(/ \[ref=e\d+\]/g, ""); +} + +async function buildA11ySnapshot( + drv: TabDriver, + options?: { relaxed?: boolean; interactiveOnly?: boolean; selector?: string }, +): Promise<{ content: string; truncated?: boolean; refMap: Map }> { + const refMap = new Map(); + let refCounter = 0; + const relaxed = options?.relaxed ?? false; + const interactiveOnly = options?.interactiveOnly ?? false; + + let rootBackendNodeId: number | undefined; + if (options?.selector) { + try { + const doc = await drv.send("DOM.getDocument", { depth: 0 }); + const { nodeId } = await drv.send("DOM.querySelector", { + nodeId: doc.root.nodeId, + selector: options.selector, + }); + if (nodeId) { + const { node } = await drv.send("DOM.describeNode", { nodeId }); + rootBackendNodeId = node.backendNodeId; + } + } catch { + // fall through to full tree + } + } + + const ax: any = await drv.send("Accessibility.getFullAXTree", { depth: 50 }); + const nodes: any[] = ax.nodes; + + const nodeMap = new Map(); + const children = new Map(); + for (const node of nodes) { + nodeMap.set(node.nodeId, node); + if (node.parentId) { + const kids = children.get(node.parentId) ?? []; + kids.push(node.nodeId); + children.set(node.parentId, kids); + } + } + + let scopeNodeId: string | undefined; + if (rootBackendNodeId) { + for (const node of nodes) { + if (node.backendDOMNodeId === rootBackendNodeId) { + scopeNodeId = node.nodeId; + break; + } + } + } + + const skipRoles = new Set([ + "none", "InlineTextBox", "LineBreak", + "StaticText", "RootWebArea", "ignored", + ...(relaxed ? [] : ["generic"]), + ]); + const interactiveRoles = new Set([ + "button", "link", "textbox", "checkbox", "radio", + "combobox", "menuitem", "tab", "switch", "slider", + "searchbox", "spinbutton", "option", "menuitemcheckbox", + "menuitemradio", "treeitem", + ]); + + const lines: string[] = []; + const lineLimit = interactiveOnly ? Infinity : MAX_SNAPSHOT_LINES; + let truncated = false; + + function renderNode(nodeId: string, depth: number) { + if (truncated) return; + const node = nodeMap.get(nodeId); + if (!node) return; + const role = node.role?.value ?? ""; + const name = node.name?.value ?? ""; + const backendNodeId = node.backendDOMNodeId; + if (skipRoles.has(role)) { + const keepGeneric = role === "generic" && name && backendNodeId; + if (!keepGeneric) { + for (const kid of children.get(nodeId) ?? []) renderNode(kid, depth); + return; + } + } + const isInteractive = interactiveRoles.has(role); + if (interactiveOnly && !isInteractive) { + for (const kid of children.get(nodeId) ?? []) renderNode(kid, depth); + return; + } + if (lines.length >= lineLimit) { + truncated = true; + return; + } + let ref = ""; + if (relaxed) { + if (backendNodeId) { + refCounter++; + const refId = `e${refCounter}`; + ref = ` [ref=${refId}]`; + refMap.set(refId, { role, name, backendNodeId }); + } + } else if (backendNodeId && (isInteractive || name)) { + refCounter++; + const refId = `e${refCounter}`; + ref = ` [ref=${refId}]`; + refMap.set(refId, { role, name, backendNodeId }); + } + const nameStr = name ? ` "${name}"` : ""; + if (interactiveOnly) { + lines.push(`- ${role}${nameStr}${ref}`); + } else { + lines.push(`${" ".repeat(depth)}- ${role}${nameStr}${ref}`); + } + if (!interactiveOnly) { + for (const kid of children.get(nodeId) ?? []) renderNode(kid, depth + 1); + } + } + + const startNode = scopeNodeId + ? nodeMap.get(scopeNodeId) + : nodes.find((n: any) => !n.parentId || n.role?.value === "RootWebArea"); + if (startNode) { + if (scopeNodeId) { + renderNode(scopeNodeId, 0); + } else { + for (const kid of children.get(startNode.nodeId) ?? []) renderNode(kid, 0); + } + } + if (truncated) { + lines.push(`\n[...truncated at ${lineLimit} lines — use snapshot with a CSS selector to see specific sections, e.g. selector: "main", "article", "#content"]`); + } + return { content: lines.join("\n"), truncated, refMap }; +} + +async function takeSnapshot( + drv: TabDriver, + opts?: { interactiveOnly?: boolean; selector?: string }, +): Promise { + const interactiveOnly = opts?.interactiveOnly ?? false; + let snap = await buildA11ySnapshot(drv, { interactiveOnly, selector: opts?.selector }); + drv.refMap.clear(); + for (const [k, v] of snap.refMap) drv.refMap.set(k, v); + if (drv.refMap.size === 0) { + snap = await buildA11ySnapshot(drv, { + relaxed: true, + interactiveOnly, + selector: opts?.selector, + }); + drv.refMap.clear(); + for (const [k, v] of snap.refMap) drv.refMap.set(k, v); + } + let content = snap.content; + if (!content && drv.refMap.size === 0) { + content = "[No interactive elements found in page accessibility tree. " + + "Try: snapshot with mode 'full' to see all content, screenshot to see the page visually, " + + "evaluate to inspect the DOM with JavaScript, or wait and snapshot again if the page is still loading.]"; + } + + // Incremental diff vs cache. + const currentLines = content.split("\n"); + const currentStripped = currentLines.map(stripRefs); + const currentUrl = (await drv.info()).url; + if ( + drv.snapshotCache.prevLines && + drv.snapshotCache.prevUrl === currentUrl && + !opts?.selector + ) { + const prevSet = new Set(drv.snapshotCache.prevLines); + const currSet = new Set(currentStripped); + const added: string[] = []; + const removed: string[] = []; + for (let i = 0; i < currentLines.length; i++) { + if (!prevSet.has(currentStripped[i]!)) added.push(currentLines[i]!); + } + for (const prevLine of drv.snapshotCache.prevLines) { + if (!currSet.has(prevLine)) removed.push(prevLine); + } + const unchanged = currentLines.length - added.length; + const isIncremental = unchanged / Math.max(currentLines.length, 1) >= INCREMENTAL_THRESHOLD; + if (isIncremental && (added.length > 0 || removed.length > 0)) { + const parts: string[] = []; + parts.push(`[Incremental snapshot — ${unchanged} unchanged, ${added.length} added, ${removed.length} removed]`); + if (added.length > 0) parts.push("", "Added:", ...added); + if (removed.length > 0) parts.push("", "Removed:", ...removed); + const interactiveLines = currentLines.filter((_, i) => { + const line = currentStripped[i]!; + return /^-?\s*- (button|link|textbox|checkbox|radio|combobox|menuitem|tab|switch|slider|searchbox|spinbutton|option)/.test(line.trimStart()); + }); + if (interactiveLines.length > 0) parts.push("", "Interactive elements:", ...interactiveLines); + content = parts.join("\n"); + } + } + drv.snapshotCache.prevLines = currentStripped; + drv.snapshotCache.prevUrl = currentUrl; + return content; +} + +async function snapshotIfNavigated(drv: TabDriver, urlBefore: string): Promise { + const urlAfter = (await drv.info()).url; + if (urlAfter !== urlBefore) return takeSnapshot(drv, { interactiveOnly: true }); + return undefined; +} + +function isStaleNodeError(err: unknown): boolean { + const msg = err instanceof Error ? err.message : String(err); + return ( + msg.includes("does not belong to the document") || + msg.includes("No node with given id found") || + msg.includes("Could not resolve node") || + msg.includes("not found — it may have been removed") + ); +} + +async function reResolveRef(drv: TabDriver, ref: string): Promise { + await takeSnapshot(drv, { interactiveOnly: true }); + return resolveRef(drv, ref); +} + +// ── Action dispatch ── + +type BrowserAction = { type: string; [k: string]: any }; +type BrowserResult = Record; + +async function executeAction(drv: TabDriver, action: BrowserAction): Promise { + switch (action.type) { + case "navigate": { + const loaded = drv.once("Page.loadEventFired", 30_000); + try { + await drv.send("Page.navigate", { url: action.url }); + } catch (err) { + throw new Error(`navigate failed: ${err instanceof Error ? err.message : String(err)}`); + } + await loaded; + await new Promise((r) => setTimeout(r, 500)); // settle (≈ networkidle) + const info = await drv.info(); + const snapshot = await takeSnapshot(drv, { interactiveOnly: true }); + return { type: "done", ...info, message: `Navigated to ${action.url}`, snapshot }; + } + case "click": { + const urlBefore = (await drv.info()).url; + let nodeId = resolveRef(drv, action.ref); + try { + await clickNode(drv, nodeId); + } catch (err) { + if (isStaleNodeError(err)) { + try { + nodeId = await reResolveRef(drv, action.ref); + await clickNode(drv, nodeId); + } catch { + throw new Error(`Element [${action.ref}] no longer exists on the page. Take a new snapshot to see current elements.`); + } + } else throw err; + } + await drv.once("Page.loadEventFired", 5_000); + const info = await drv.info(); + const snapshot = await snapshotIfNavigated(drv, urlBefore); + return { type: "done", ...info, message: `Clicked [${action.ref}]`, snapshot }; + } + case "type": { + const urlBefore = (await drv.info()).url; + let nodeId = resolveRef(drv, action.ref); + try { + await focusAndType(drv, nodeId, action.text); + } catch (err) { + if (isStaleNodeError(err)) { + try { + nodeId = await reResolveRef(drv, action.ref); + await focusAndType(drv, nodeId, action.text); + } catch { + throw new Error(`Element [${action.ref}] no longer exists on the page. Take a new snapshot to see current elements.`); + } + } else throw err; + } + if (action.submit) { + await drv.send("Input.dispatchKeyEvent", { type: "keyDown", key: "Enter", code: "Enter", windowsVirtualKeyCode: 13 }); + await drv.send("Input.dispatchKeyEvent", { type: "keyUp", key: "Enter", code: "Enter", windowsVirtualKeyCode: 13 }); + await drv.once("Page.loadEventFired", 5_000); + } + const info = await drv.info(); + const snapshot = await snapshotIfNavigated(drv, urlBefore); + return { type: "done", ...info, message: `Typed into [${action.ref}]`, snapshot }; + } + case "select": { + // Set