From 4f5fcd9acb65eec4195eb54334942731490abbc8 Mon Sep 17 00:00:00 2001 From: "J.Chen" Date: Thu, 14 May 2026 13:38:05 +0800 Subject: [PATCH 01/37] fix(extension): keep active daemon websocket Keep stale Browser Bridge WebSocket events from clobbering the active daemon connection.\n\nCo-authored-by: Jeff Chen --- extension/dist/background.js | 36 +++++++++----- extension/src/background.test.ts | 82 +++++++++++++++++++++++++++++++- extension/src/background.ts | 37 +++++++++----- 3 files changed, 131 insertions(+), 24 deletions(-) diff --git a/extension/dist/background.js b/extension/dist/background.js index fa085bb6f..54c0091c2 100644 --- a/extension/dist/background.js +++ b/extension/dist/background.js @@ -671,13 +671,21 @@ const _origLog = console.log.bind(console); const _origWarn = console.warn.bind(console); const _origError = console.error.bind(console); function forwardLog(level, args) { - if (!ws || ws.readyState !== WebSocket.OPEN) return; try { const msg = args.map((a) => typeof a === "string" ? a : JSON.stringify(a)).join(" "); - ws.send(JSON.stringify({ type: "log", level, msg, ts: Date.now() })); + safeSend(ws, { type: "log", level, msg, ts: Date.now() }); } catch { } } +function safeSend(socket, payload) { + if (!socket || socket.readyState !== WebSocket.OPEN) return false; + try { + socket.send(JSON.stringify(payload)); + return true; + } catch { + return false; + } +} console.log = (...args) => { _origLog(...args); forwardLog("info", args); @@ -698,44 +706,50 @@ async function connect() { } catch { return; } + let thisWs; try { const contextId = await getCurrentContextId(); - ws = new WebSocket(DAEMON_WS_URL); + thisWs = new WebSocket(DAEMON_WS_URL); + ws = thisWs; currentContextId = contextId; } catch { scheduleReconnect(); return; } - ws.onopen = () => { + thisWs.onopen = () => { + if (ws !== thisWs) return; console.log("[opencli] Connected to daemon"); reconnectAttempts = 0; if (reconnectTimer) { clearTimeout(reconnectTimer); reconnectTimer = null; } - ws?.send(JSON.stringify({ + safeSend(thisWs, { type: "hello", contextId: currentContextId, version: chrome.runtime.getManifest().version, compatRange: ">=1.7.0" - })); + }); }; - ws.onmessage = async (event) => { + thisWs.onmessage = async (event) => { + if (ws !== thisWs) return; try { const command = JSON.parse(event.data); const result = await handleCommand(command); - ws?.send(JSON.stringify(result)); + if (ws !== thisWs) return; + safeSend(thisWs, result); } catch (err) { console.error("[opencli] Message handling error:", err); } }; - ws.onclose = () => { + thisWs.onclose = () => { + if (ws !== thisWs) return; console.log("[opencli] Disconnected from daemon"); ws = null; scheduleReconnect(); }; - ws.onerror = () => { - ws?.close(); + thisWs.onerror = () => { + thisWs.close(); }; } const MAX_EAGER_ATTEMPTS = 6; diff --git a/extension/src/background.test.ts b/extension/src/background.test.ts index 551db2162..8d415ea45 100644 --- a/extension/src/background.test.ts +++ b/extension/src/background.test.ts @@ -31,15 +31,23 @@ const adapterKey = (session: string): string => leaseKey('adapter', session); class MockWebSocket { static OPEN = 1; static CONNECTING = 0; + static CLOSED = 3; + static instances: MockWebSocket[] = []; readyState = MockWebSocket.CONNECTING; + sent: string[] = []; onopen: (() => void) | null = null; onmessage: ((event: { data: string }) => void) | null = null; onclose: (() => void) | null = null; onerror: (() => void) | null = null; - constructor(_url: string) {} - send(_data: string): void {} + constructor(_url: string) { + MockWebSocket.instances.push(this); + } + send(data: string): void { + this.sent.push(data); + } close(): void { + this.readyState = MockWebSocket.CLOSED; this.onclose?.(); } } @@ -194,6 +202,7 @@ describe('background tab isolation', () => { beforeEach(() => { vi.resetModules(); vi.useRealTimers(); + MockWebSocket.instances = []; vi.stubGlobal('WebSocket', MockWebSocket); }); @@ -649,6 +658,75 @@ describe('background tab isolation', () => { }); }); + it('keeps the active daemon connection when a superseded WebSocket closes later', async () => { + const { chrome } = createChromeMock(); + vi.stubGlobal('chrome', chrome); + vi.stubGlobal('fetch', vi.fn(async () => ({ ok: true }))); + + await import('./background'); + await vi.waitFor(() => { + expect(MockWebSocket.instances).toHaveLength(1); + }); + const firstWs = MockWebSocket.instances[0]; + firstWs.readyState = 3; + + const onAlarmListener = chrome.alarms.onAlarm.addListener.mock.calls[0][0]; + await onAlarmListener({ name: 'keepalive' }); + await vi.waitFor(() => { + expect(MockWebSocket.instances).toHaveLength(2); + }); + const secondWs = MockWebSocket.instances[1]; + secondWs.readyState = MockWebSocket.OPEN; + + firstWs.onclose?.(); + secondWs.onmessage?.({ + data: JSON.stringify({ + id: 'sessions-after-stale-close', + action: 'tabs', + op: 'list', + session: 'work', + surface: 'browser', + }), + }); + + await vi.waitFor(() => { + expect(secondWs.sent.some((entry) => entry.includes('sessions-after-stale-close'))).toBe(true); + }); + }); + + it('ignores daemon commands delivered to a superseded WebSocket', async () => { + const { chrome } = createChromeMock(); + vi.stubGlobal('chrome', chrome); + vi.stubGlobal('fetch', vi.fn(async () => ({ ok: true }))); + + await import('./background'); + await vi.waitFor(() => { + expect(MockWebSocket.instances).toHaveLength(1); + }); + const firstWs = MockWebSocket.instances[0]; + firstWs.readyState = MockWebSocket.OPEN; + + const onAlarmListener = chrome.alarms.onAlarm.addListener.mock.calls[0][0]; + firstWs.readyState = MockWebSocket.CLOSED; + await onAlarmListener({ name: 'keepalive' }); + await vi.waitFor(() => { + expect(MockWebSocket.instances).toHaveLength(2); + }); + firstWs.readyState = MockWebSocket.OPEN; + + await firstWs.onmessage?.({ + data: JSON.stringify({ + id: 'stale-command', + action: 'tabs', + op: 'list', + session: 'work', + surface: 'browser', + }), + }); + + expect(firstWs.sent.some((entry) => entry.includes('stale-command'))).toBe(false); + }); + it('can execute concurrently on two pages in the same session', async () => { const { chrome, tabs } = createChromeMock(); tabs.push({ diff --git a/extension/src/background.ts b/extension/src/background.ts index 2925c13bc..c788131dc 100644 --- a/extension/src/background.ts +++ b/extension/src/background.ts @@ -70,13 +70,22 @@ const _origWarn = console.warn.bind(console); const _origError = console.error.bind(console); function forwardLog(level: 'info' | 'warn' | 'error', args: unknown[]): void { - if (!ws || ws.readyState !== WebSocket.OPEN) return; try { const msg = args.map(a => typeof a === 'string' ? a : JSON.stringify(a)).join(' '); - ws.send(JSON.stringify({ type: 'log', level, msg, ts: Date.now() })); + safeSend(ws, { type: 'log', level, msg, ts: Date.now() }); } catch { /* don't recurse */ } } +function safeSend(socket: WebSocket | null | undefined, payload: unknown): boolean { + if (!socket || socket.readyState !== WebSocket.OPEN) return false; + try { + socket.send(JSON.stringify(payload)); + return true; + } catch { + return false; + } +} + console.log = (...args: unknown[]) => { _origLog(...args); forwardLog('info', args); }; console.warn = (...args: unknown[]) => { _origWarn(...args); forwardLog('warn', args); }; console.error = (...args: unknown[]) => { _origError(...args); forwardLog('error', args); }; @@ -100,16 +109,19 @@ async function connect(): Promise { return; // daemon not running — skip WebSocket to avoid console noise } + let thisWs: WebSocket; try { const contextId = await getCurrentContextId(); - ws = new WebSocket(DAEMON_WS_URL); + thisWs = new WebSocket(DAEMON_WS_URL); + ws = thisWs; currentContextId = contextId; } catch { scheduleReconnect(); return; } - ws.onopen = () => { + thisWs.onopen = () => { + if (ws !== thisWs) return; console.log('[opencli] Connected to daemon'); reconnectAttempts = 0; // Reset on successful connection if (reconnectTimer) { @@ -117,32 +129,35 @@ async function connect(): Promise { reconnectTimer = null; } // Send version + compatibility range so the daemon can report mismatches to the CLI - ws?.send(JSON.stringify({ + safeSend(thisWs, { type: 'hello', contextId: currentContextId, version: chrome.runtime.getManifest().version, compatRange: __OPENCLI_COMPAT_RANGE__, - })); + }); }; - ws.onmessage = async (event) => { + thisWs.onmessage = async (event) => { + if (ws !== thisWs) return; try { const command = JSON.parse(event.data as string) as Command; const result = await handleCommand(command); - ws?.send(JSON.stringify(result)); + if (ws !== thisWs) return; + safeSend(thisWs, result); } catch (err) { console.error('[opencli] Message handling error:', err); } }; - ws.onclose = () => { + thisWs.onclose = () => { + if (ws !== thisWs) return; console.log('[opencli] Disconnected from daemon'); ws = null; scheduleReconnect(); }; - ws.onerror = () => { - ws?.close(); + thisWs.onerror = () => { + thisWs.close(); }; } From cddc84776ce0cf72a35c715b7b9f3ea6c3f68755 Mon Sep 17 00:00:00 2001 From: "J.Chen" Date: Thu, 14 May 2026 13:38:41 +0800 Subject: [PATCH 02/37] docs(browser): clarify named session lifecycle (#1542) * docs(browser): clarify named session lifecycle * docs(browser): clarify owned versus bound sessions --------- Co-authored-by: Jeff Chen Co-authored-by: jackwener --- docs/guide/browser-bridge.md | 18 ++++++++++++++++++ docs/zh/guide/browser-bridge.md | 18 ++++++++++++++++++ skills/opencli-browser/SKILL.md | 18 ++++++++++-------- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/docs/guide/browser-bridge.md b/docs/guide/browser-bridge.md index 36c09420f..927001e12 100644 --- a/docs/guide/browser-bridge.md +++ b/docs/guide/browser-bridge.md @@ -48,6 +48,24 @@ Key rules: - `tab select ` makes that tab the default target for later untargeted `opencli browser ...` commands. - `tab close ` removes the tab; if it was the current default target, the stored default is cleared. +## Session Lifecycle + +Use a stable session name when you want multiple `opencli browser` commands to keep operating on the same page: + +```bash +opencli browser my-session open https://example.com +opencli browser my-session state +opencli browser my-session extract "main" +``` + +Owned browser sessions use an interactive tab lease with a 10-minute idle timeout. Release it explicitly when done: + +```bash +opencli browser my-session close +``` + +Use `opencli browser bind` when you want to attach OpenCLI to a Chrome tab you already opened manually. Bound sessions do not have the owned-session idle close timer; they stay attached until `unbind`, tab close, window close, or daemon restart. For owned sessions, use `--window foreground` to watch OpenCLI work in a visible automation window, or `--window background` to keep that automation window out of the way. + ## How It Works ``` diff --git a/docs/zh/guide/browser-bridge.md b/docs/zh/guide/browser-bridge.md index 36e5e006a..9a2078dd7 100644 --- a/docs/zh/guide/browser-bridge.md +++ b/docs/zh/guide/browser-bridge.md @@ -46,6 +46,24 @@ opencli browser baidu tab close - `tab select ` 会把该 tab 设为后续未显式指定 target 的 `opencli browser ...` 命令默认目标。 - `tab close ` 会关闭该 tab;如果它正好是当前默认目标,会一并清掉这条默认绑定。 +## Session 生命周期 + +如果你希望多条 `opencli browser` 命令持续操作同一个页面,请使用稳定的 session 名称: + +```bash +opencli browser my-session open https://example.com +opencli browser my-session state +opencli browser my-session extract "main" +``` + +OpenCLI 拥有的 browser session 使用交互式 tab lease,默认空闲超时为 10 分钟。完成后可以显式释放: + +```bash +opencli browser my-session close +``` + +如果要把 OpenCLI 绑定到你已经手动打开的 Chrome tab,请使用 `opencli browser bind`。绑定 session 没有 owned session 的 idle close 计时器,会一直保持到 `unbind`、tab 关闭、窗口关闭或 daemon 重启。对于 OpenCLI 自己创建的 owned session,使用 `--window foreground` 可以在可见自动化窗口里观察 OpenCLI 操作;使用 `--window background` 可以让这个自动化窗口留在后台。 + ## Daemon 生命周期 Daemon 在首次运行浏览器命令时自动启动,之后保持常驻运行。 diff --git a/skills/opencli-browser/SKILL.md b/skills/opencli-browser/SKILL.md index 188724be9..657e40154 100644 --- a/skills/opencli-browser/SKILL.md +++ b/skills/opencli-browser/SKILL.md @@ -25,6 +25,7 @@ Until `doctor` is green, nothing else will work. Typical failures: Chrome not ru ## Session lifecycle - `opencli browser *` commands require a `` positional immediately after `browser`. Use the same session name for a multi-step flow; use a different name to isolate parallel browser work. +- Use a stable session name for any multi-command or human-paced browser workflow. Example: `opencli browser fb-yaya-warmup open https://example.com`, then reuse `opencli browser fb-yaya-warmup state`, `extract`, `click`, etc. - Owned browser sessions keep a tab lease alive between calls. Release it with `opencli browser close` or let the idle timeout expire. - `opencli browser bind` binds the Chrome tab you already have open to that session. Use this for logged-in pages, SSO flows, or pages you manually positioned before handing control to the agent. - `--window foreground|background` (or `OPENCLI_WINDOW=foreground|background`) chooses whether OpenCLI creates/focuses a foreground browser window or uses a background browser window for owned sessions. @@ -59,14 +60,15 @@ Bound sessions have no OpenCLI idle-close timer; the binding lasts until `unbind ## Critical rules 1. **Always inspect before you act.** Run `state` or `find` first. Never hard-code a ref or selector from memory across sessions — indices are per-snapshot. -2. **Prefer numeric ref over CSS once you have it.** Numeric refs survive mild DOM shifts because the CLI fingerprints each tagged element. A CSS selector written by hand will break the first time the site re-renders. -3. **Read `match_level` after every write.** `exact` = all good. `stable` = the element is the same but some soft attrs drifted — your action still applied. `reidentified` = the original ref was gone and the CLI found a unique replacement; double-check you hit the right element. -4. **Use the `compound` field for form controls.** Do not regex-guess a date format, do not `state` twice to get the full ``. -5. **Verify writes that matter.** After `type `, run `get value `. After `select`, run `get value`. Autocomplete widgets, React controlled inputs, and masked fields all silently eat characters. The CLI cannot detect this for you. -6. **`state` → action → `state` after a page change.** Navigations, form submits, and SPA route changes invalidate refs. Take a fresh snapshot. Do not reuse refs from before the transition. -7. **Chain with `&&`.** A chained sequence runs in one shell so refs acquired by the first command stay live for the second. Separate shell invocations lose the session context you just set up. -8. **`eval` is read-only.** Wrap the JS in an IIFE and return JSON. If you need to *change* the page, use the structured `click` / `type` / `select` / `keys` commands instead — they produce structured output and fingerprints, `eval` does not. -9. **Prefer `network` to screen-scraping.** If a page you care about fetches its data from a JSON API, the API is almost always more reliable than scraping the rendered DOM. Capture once, inspect the shape, then `--detail ` the body you need. +2. **Prefer site adapters before raw browser driving.** If `opencli ` already covers the task, use that adapter command first (`opencli facebook notifications`, `opencli reddit read`, etc.). Use `opencli browser ...` only for gaps, debugging, or one-off UI flows the adapter does not expose. +3. **Prefer numeric ref over CSS once you have it.** Numeric refs survive mild DOM shifts because the CLI fingerprints each tagged element. A CSS selector written by hand will break the first time the site re-renders. +4. **Read `match_level` after every write.** `exact` = all good. `stable` = the element is the same but some soft attrs drifted — your action still applied. `reidentified` = the original ref was gone and the CLI found a unique replacement; double-check you hit the right element. +5. **Use the `compound` field for form controls.** Do not regex-guess a date format, do not `state` twice to get the full ``. +6. **Verify writes that matter.** After `type `, run `get value `. After `select`, run `get value`. Autocomplete widgets, React controlled inputs, and masked fields all silently eat characters. The CLI cannot detect this for you. +7. **`state` → action → `state` after a page change.** Navigations, form submits, and SPA route changes invalidate refs. Take a fresh snapshot. Do not reuse refs from before the transition. +8. **Chain with `&&` when reusing freshly parsed refs.** A chained sequence runs in one shell so the ref you just read from output can be passed directly to the next command. Separate shell invocations keep the named browser session, but any shell-local variables or copied refs from the previous command can go stale after page changes. +9. **`eval` is read-only.** Wrap the JS in an IIFE and return JSON. If you need to *change* the page, use the structured `click` / `type` / `select` / `keys` commands instead — they produce structured output and fingerprints, `eval` does not. +10. **Prefer `network` to screen-scraping.** If a page you care about fetches its data from a JSON API, the API is almost always more reliable than scraping the rendered DOM. Capture once, inspect the shape, then `--detail ` the body you need. --- From 68b18cdbcd86d2ff41bbe74609bca85f93d668df Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 13:57:38 +0800 Subject: [PATCH 03/37] fix(extension): coalesce daemon websocket connects (#1554) --- extension/dist/background.js | 18 ++++++++++++++-- extension/src/background.test.ts | 35 ++++++++++++++++++++++++++++++++ extension/src/background.ts | 20 ++++++++++++++++-- 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/extension/dist/background.js b/extension/dist/background.js index 54c0091c2..e9c7f481b 100644 --- a/extension/dist/background.js +++ b/extension/dist/background.js @@ -626,6 +626,7 @@ let reconnectAttempts = 0; const CONTEXT_ID_KEY = "opencli_context_id_v1"; let currentContextId = "default"; let contextIdPromise = null; +let connectInFlight = null; async function getCurrentContextId() { if (contextIdPromise) return contextIdPromise; contextIdPromise = (async () => { @@ -698,17 +699,30 @@ console.error = (...args) => { _origError(...args); forwardLog("error", args); }; -async function connect() { - if (ws?.readyState === WebSocket.OPEN || ws?.readyState === WebSocket.CONNECTING) return; +function isDaemonSocketActive(socket = ws) { + return socket?.readyState === WebSocket.OPEN || socket?.readyState === WebSocket.CONNECTING; +} +function connect() { + if (isDaemonSocketActive()) return Promise.resolve(); + if (connectInFlight) return connectInFlight; + connectInFlight = connectAttempt().finally(() => { + connectInFlight = null; + }); + return connectInFlight; +} +async function connectAttempt() { + if (isDaemonSocketActive()) return; try { const res = await fetch(DAEMON_PING_URL, { signal: AbortSignal.timeout(1e3) }); if (!res.ok) return; } catch { return; } + if (isDaemonSocketActive()) return; let thisWs; try { const contextId = await getCurrentContextId(); + if (isDaemonSocketActive()) return; thisWs = new WebSocket(DAEMON_WS_URL); ws = thisWs; currentContextId = contextId; diff --git a/extension/src/background.test.ts b/extension/src/background.test.ts index 8d415ea45..5a8182cf2 100644 --- a/extension/src/background.test.ts +++ b/extension/src/background.test.ts @@ -52,6 +52,16 @@ class MockWebSocket { } } +function deferred() { + let resolve!: (value: T | PromiseLike) => void; + let reject!: (reason?: unknown) => void; + const promise = new Promise((res, rej) => { + resolve = res; + reject = rej; + }); + return { promise, resolve, reject }; +} + function createChromeMock() { let nextTabId = 10; let nextGroupId = 100; @@ -694,6 +704,31 @@ describe('background tab isolation', () => { }); }); + it('coalesces concurrent daemon connection attempts while the probe is in flight', async () => { + const { chrome } = createChromeMock(); + vi.stubGlobal('chrome', chrome); + const ping = deferred<{ ok: boolean }>(); + const fetchMock = vi.fn(() => ping.promise); + vi.stubGlobal('fetch', fetchMock); + + await import('./background'); + await vi.waitFor(() => { + expect(fetchMock).toHaveBeenCalledTimes(1); + }); + + const onAlarmListener = chrome.alarms.onAlarm.addListener.mock.calls[0][0]; + await onAlarmListener({ name: 'keepalive' }); + await onAlarmListener({ name: 'keepalive' }); + + expect(fetchMock).toHaveBeenCalledTimes(1); + expect(MockWebSocket.instances).toHaveLength(0); + + ping.resolve({ ok: true }); + await vi.waitFor(() => { + expect(MockWebSocket.instances).toHaveLength(1); + }); + }); + it('ignores daemon commands delivered to a superseded WebSocket', async () => { const { chrome } = createChromeMock(); vi.stubGlobal('chrome', chrome); diff --git a/extension/src/background.ts b/extension/src/background.ts index c788131dc..2d14b023e 100644 --- a/extension/src/background.ts +++ b/extension/src/background.ts @@ -18,6 +18,7 @@ let reconnectAttempts = 0; const CONTEXT_ID_KEY = 'opencli_context_id_v1'; let currentContextId = 'default'; let contextIdPromise: Promise | null = null; +let connectInFlight: Promise | null = null; async function getCurrentContextId(): Promise { if (contextIdPromise) return contextIdPromise; @@ -92,6 +93,10 @@ console.error = (...args: unknown[]) => { _origError(...args); forwardLog('error // ─── WebSocket connection ──────────────────────────────────────────── +function isDaemonSocketActive(socket: WebSocket | null | undefined = ws): boolean { + return socket?.readyState === WebSocket.OPEN || socket?.readyState === WebSocket.CONNECTING; +} + /** * Probe the daemon via its /ping HTTP endpoint before attempting a WebSocket * connection. fetch() failures are silently catchable; new WebSocket() is not @@ -99,8 +104,17 @@ console.error = (...args: unknown[]) => { _origError(...args); forwardLog('error * JS handler can intercept it. By keeping the probe inside connect() every * call site remains unchanged and the guard can never be accidentally skipped. */ -async function connect(): Promise { - if (ws?.readyState === WebSocket.OPEN || ws?.readyState === WebSocket.CONNECTING) return; +function connect(): Promise { + if (isDaemonSocketActive()) return Promise.resolve(); + if (connectInFlight) return connectInFlight; + connectInFlight = connectAttempt().finally(() => { + connectInFlight = null; + }); + return connectInFlight; +} + +async function connectAttempt(): Promise { + if (isDaemonSocketActive()) return; try { const res = await fetch(DAEMON_PING_URL, { signal: AbortSignal.timeout(1000) }); @@ -108,10 +122,12 @@ async function connect(): Promise { } catch { return; // daemon not running — skip WebSocket to avoid console noise } + if (isDaemonSocketActive()) return; let thisWs: WebSocket; try { const contextId = await getCurrentContextId(); + if (isDaemonSocketActive()) return; thisWs = new WebSocket(DAEMON_WS_URL); ws = thisWs; currentContextId = contextId; From 6b26aedd5637227b95f681acbf61ab53b3af3aa0 Mon Sep 17 00:00:00 2001 From: Ocean Date: Thu, 14 May 2026 14:01:34 +0800 Subject: [PATCH 04/37] feat(twitter/list-tweets): include media via extractMedia (parity with timeline/search) (#1464) * feat(twitter/list-tweets): include media via extractMedia (parity with timeline/search) list-tweets was the only X recall path that dropped media. timeline.js and search.js both call extractMedia(legacy) and emit has_media/media_urls; list-tweets returned only text fields, so downstream consumers (e.g. ml-scout's rate UI) couldn't render image/video thumbnails on tweets pulled from a list timeline. Changes: - Import extractMedia from ./shared.js - Spread extractMedia(legacy) into extractTimelineTweet return - Add has_media, media_urls to columns array (--format columns parity) - Update unit test to assert the new shape; add coverage for photo and video extraction * chore(manifest): rebuild cli-manifest.json for list-tweets media columns --------- Co-authored-by: ml-scout From af7b94152fb6ce0989cdc9787bb6b28fccc41a59 Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 14:24:31 +0800 Subject: [PATCH 05/37] feat(twitter): add extractMedia parity to bookmarks + bookmark-folder (#1555) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors PR #1464 (list-tweets) and the timeline/search/tweets/likes/thread family: spread `...extractMedia(legacy)` into the row and surface `has_media` + `media_urls` columns. Pure parity, no behavior change for existing callers — media keys do not collide with the original columns. - bookmarks.js: import `extractMedia` from ./shared.js, spread into extractBookmarkTweet row, append columns, export __test__. - bookmark-folder.js: same change on extractFolderTweet, export extractFolderTweet via __test__. - bookmarks.test.js (new): baseline + photo + video + entities-only fallback + dedup + envelope + empty-envelope (8 tests). - bookmark-folder.test.js: update existing baseline expectation with has_media/media_urls, add 3 new media tests (photo / mp4 / no-media). - cli-manifest.json: regenerated; only the two `columns` entries change. Reverse-validated: tests fail when extractMedia spread is removed. Audits unchanged: typed-error-lint 189/189, silent-column-drop 102/103 (pre-existing main resolution noted but not consumed here). --- cli-manifest.json | 8 +- clis/twitter/bookmark-folder.js | 8 +- clis/twitter/bookmark-folder.test.js | 60 +++++++- clis/twitter/bookmarks.js | 12 +- clis/twitter/bookmarks.test.js | 205 +++++++++++++++++++++++++++ 5 files changed, 284 insertions(+), 9 deletions(-) create mode 100644 clis/twitter/bookmarks.test.js diff --git a/cli-manifest.json b/cli-manifest.json index 2bc2ab99b..457fd43ab 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -22711,7 +22711,9 @@ "retweets", "bookmarks", "created_at", - "url" + "url", + "has_media", + "media_urls" ], "type": "js", "modulePath": "twitter/bookmark-folder.js", @@ -22772,7 +22774,9 @@ "retweets", "bookmarks", "created_at", - "url" + "url", + "has_media", + "media_urls" ], "type": "js", "modulePath": "twitter/bookmarks.js", diff --git a/clis/twitter/bookmark-folder.js b/clis/twitter/bookmark-folder.js index 3377b7d11..c984eb47b 100644 --- a/clis/twitter/bookmark-folder.js +++ b/clis/twitter/bookmark-folder.js @@ -1,7 +1,7 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; import { ArgumentError, AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js'; -import { resolveTwitterQueryId } from './shared.js'; +import { extractMedia, resolveTwitterQueryId } from './shared.js'; // Companion to bookmark-folders.js: reads tweets inside a single folder. // X exposes folder contents through a separate timeline operation @@ -54,7 +54,7 @@ function buildFolderTimelineUrl(queryId, folderId, count, cursor) { + `&features=${encodeURIComponent(JSON.stringify(FEATURES))}`; } -function extractFolderTweet(result, seen) { +export function extractFolderTweet(result, seen) { if (!result) return null; const tw = result.tweet || result; const legacy = tw.legacy || {}; @@ -72,6 +72,7 @@ function extractFolderTweet(result, seen) { bookmarks: legacy.bookmark_count || 0, created_at: legacy.created_at || '', url: screenName ? `https://x.com/${screenName}/status/${tw.rest_id}` : `https://x.com/i/status/${tw.rest_id}`, + ...extractMedia(legacy), }; } @@ -129,7 +130,7 @@ cli({ { name: 'limit', type: 'int', default: 20, help: 'Maximum number of bookmarks to return (default 20).' }, { name: 'top-by-engagement', type: 'int', default: 0, help: 'When set to N>0, re-rank the folder by weighted engagement (likes×1 + retweets×3 + replies×2 + bookmarks×5 + log10(views+1)×0.5) and return the top N. Default 0 keeps the API\'s native (saved-time) ordering.' }, ], - columns: ['id', 'author', 'text', 'likes', 'retweets', 'bookmarks', 'created_at', 'url'], + columns: ['id', 'author', 'text', 'likes', 'retweets', 'bookmarks', 'created_at', 'url', 'has_media', 'media_urls'], func: async (page, kwargs) => { const folderId = String(kwargs['folder-id'] || '').trim(); if (!folderId || !FOLDER_ID_PATTERN.test(folderId)) { @@ -184,6 +185,7 @@ cli({ export const __test__ = { parseBookmarkFolderTimeline, + extractFolderTweet, buildFolderTimelineUrl, FOLDER_ID_PATTERN, }; diff --git a/clis/twitter/bookmark-folder.test.js b/clis/twitter/bookmark-folder.test.js index cbbda7bb1..64b8c28d9 100644 --- a/clis/twitter/bookmark-folder.test.js +++ b/clis/twitter/bookmark-folder.test.js @@ -2,7 +2,7 @@ import { describe, expect, it, vi } from 'vitest'; import { getRegistry } from '@jackwener/opencli/registry'; import { __test__ } from './bookmark-folder.js'; -const { parseBookmarkFolderTimeline, buildFolderTimelineUrl, FOLDER_ID_PATTERN } = __test__; +const { parseBookmarkFolderTimeline, extractFolderTweet, buildFolderTimelineUrl, FOLDER_ID_PATTERN } = __test__; describe('twitter bookmark-folder URL builder', () => { it('embeds the folder id and count in the variables payload', () => { @@ -97,6 +97,8 @@ describe('twitter bookmark-folder timeline parser', () => { bookmarks: 3, created_at: 'Tue Mar 17 09:00:00 +0000 2026', url: 'https://x.com/alice/status/1', + has_media: false, + media_urls: [], }, ]); expect(nextCursor).toBe('NEXT_CURSOR'); @@ -247,6 +249,62 @@ describe('twitter bookmark-folder timeline parser', () => { it('returns empty array + null cursor for unknown envelope', () => { expect(parseBookmarkFolderTimeline({}, new Set())).toEqual({ tweets: [], nextCursor: null }); }); + + it('includes photo media URLs from extended_entities', () => { + const tweet = extractFolderTweet({ + rest_id: '101', + legacy: { + full_text: 'pic folder tweet', + extended_entities: { + media: [ + { type: 'photo', media_url_https: 'https://pbs.twimg.com/media/abc.jpg' }, + { type: 'photo', media_url_https: 'https://pbs.twimg.com/media/def.jpg' }, + ], + }, + }, + core: { user_results: { result: { legacy: { screen_name: 'eve' } } } }, + }, new Set()); + expect(tweet?.has_media).toBe(true); + expect(tweet?.media_urls).toEqual([ + 'https://pbs.twimg.com/media/abc.jpg', + 'https://pbs.twimg.com/media/def.jpg', + ]); + }); + + it('extracts mp4 variant URL for video media', () => { + const tweet = extractFolderTweet({ + rest_id: '102', + legacy: { + full_text: 'video folder tweet', + extended_entities: { + media: [{ + type: 'video', + media_url_https: 'https://pbs.twimg.com/amplify_video_thumb/thumb.jpg', + video_info: { + variants: [ + { content_type: 'application/x-mpegURL', url: 'https://video.twimg.com/playlist.m3u8' }, + { content_type: 'video/mp4', bitrate: 832000, url: 'https://video.twimg.com/low.mp4' }, + { content_type: 'video/mp4', bitrate: 2176000, url: 'https://video.twimg.com/high.mp4' }, + ], + }, + }], + }, + }, + core: { user_results: { result: { legacy: { screen_name: 'frank' } } } }, + }, new Set()); + expect(tweet?.has_media).toBe(true); + expect(tweet?.media_urls?.[0]).toMatch(/\.mp4$/); + }); + + it('returns has_media false / media_urls empty when no media present', () => { + const tweet = extractFolderTweet({ + rest_id: '103', + legacy: { full_text: 'text only', favorite_count: 0, retweet_count: 0, bookmark_count: 0 }, + core: { user_results: { result: { legacy: { screen_name: 'gail' } } } }, + }, new Set()); + expect(tweet?.has_media).toBe(false); + expect(tweet?.media_urls).toEqual([]); + }); }); describe('twitter bookmark-folder id validation', () => { diff --git a/clis/twitter/bookmarks.js b/clis/twitter/bookmarks.js index 322f4f090..cd7644066 100644 --- a/clis/twitter/bookmarks.js +++ b/clis/twitter/bookmarks.js @@ -1,5 +1,6 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; +import { extractMedia } from './shared.js'; import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js'; const BOOKMARKS_QUERY_ID = 'Fy0QMy4q_aZCpkO0PnyLYw'; const MAX_PAGINATION_PAGES = 100; @@ -42,7 +43,7 @@ function buildBookmarksUrl(count, cursor) { + `?variables=${encodeURIComponent(JSON.stringify(vars))}` + `&features=${encodeURIComponent(JSON.stringify(FEATURES))}`; } -function extractBookmarkTweet(result, seen) { +export function extractBookmarkTweet(result, seen) { if (!result) return null; const tw = result.tweet || result; @@ -64,9 +65,10 @@ function extractBookmarkTweet(result, seen) { bookmarks: legacy.bookmark_count || 0, created_at: legacy.created_at || '', url: `https://x.com/${screenName}/status/${tw.rest_id}`, + ...extractMedia(legacy), }; } -function parseBookmarks(data, seen) { +export function parseBookmarks(data, seen) { const tweets = []; let nextCursor = null; const instructions = data?.data?.bookmark_timeline_v2?.timeline?.instructions @@ -111,7 +113,7 @@ cli({ { name: 'limit', type: 'int', default: 20, help: 'Maximum number of bookmarks to return (default 20).' }, { name: 'top-by-engagement', type: 'int', default: 0, help: 'When set to N>0, re-rank the bookmarks by weighted engagement (likes×1 + retweets×3 + replies×2 + bookmarks×5 + log10(views+1)×0.5) and return the top N. Default 0 keeps the API\'s native (saved-time) ordering.' }, ], - columns: ['id', 'author', 'text', 'likes', 'retweets', 'bookmarks', 'created_at', 'url'], + columns: ['id', 'author', 'text', 'likes', 'retweets', 'bookmarks', 'created_at', 'url', 'has_media', 'media_urls'], func: async (page, kwargs) => { const limit = kwargs.limit || 20; const cookies = await page.getCookies({ url: 'https://x.com' }); @@ -174,3 +176,7 @@ cli({ return applyTopByEngagement(trimmed, kwargs['top-by-engagement']); }, }); +export const __test__ = { + parseBookmarks, + extractBookmarkTweet, +}; diff --git a/clis/twitter/bookmarks.test.js b/clis/twitter/bookmarks.test.js new file mode 100644 index 000000000..ce7b926f8 --- /dev/null +++ b/clis/twitter/bookmarks.test.js @@ -0,0 +1,205 @@ +import { describe, expect, it } from 'vitest'; +import { __test__ } from './bookmarks.js'; + +const { parseBookmarks, extractBookmarkTweet } = __test__; + +describe('twitter bookmarks parser', () => { + it('extracts a baseline tweet with no media (has_media false, media_urls empty)', () => { + const tweet = extractBookmarkTweet({ + rest_id: '1', + legacy: { + full_text: 'plain bookmark', + favorite_count: 5, + retweet_count: 1, + bookmark_count: 2, + created_at: 'Wed Apr 16 10:00:00 +0000 2026', + }, + core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } }, + }, new Set()); + expect(tweet).toEqual({ + id: '1', + author: 'alice', + name: 'Alice', + text: 'plain bookmark', + likes: 5, + retweets: 1, + bookmarks: 2, + created_at: 'Wed Apr 16 10:00:00 +0000 2026', + url: 'https://x.com/alice/status/1', + has_media: false, + media_urls: [], + }); + }); + + it('includes photo media URLs from extended_entities', () => { + const tweet = extractBookmarkTweet({ + rest_id: '101', + legacy: { + full_text: 'pic bookmark', + extended_entities: { + media: [ + { type: 'photo', media_url_https: 'https://pbs.twimg.com/media/abc.jpg' }, + { type: 'photo', media_url_https: 'https://pbs.twimg.com/media/def.jpg' }, + ], + }, + }, + core: { user_results: { result: { legacy: { screen_name: 'bob' } } } }, + }, new Set()); + expect(tweet?.has_media).toBe(true); + expect(tweet?.media_urls).toEqual([ + 'https://pbs.twimg.com/media/abc.jpg', + 'https://pbs.twimg.com/media/def.jpg', + ]); + }); + + it('extracts mp4 variant URL for video media', () => { + const tweet = extractBookmarkTweet({ + rest_id: '102', + legacy: { + full_text: 'video bookmark', + extended_entities: { + media: [{ + type: 'video', + media_url_https: 'https://pbs.twimg.com/amplify_video_thumb/thumb.jpg', + video_info: { + variants: [ + { content_type: 'application/x-mpegURL', url: 'https://video.twimg.com/playlist.m3u8' }, + { content_type: 'video/mp4', bitrate: 832000, url: 'https://video.twimg.com/low.mp4' }, + { content_type: 'video/mp4', bitrate: 2176000, url: 'https://video.twimg.com/high.mp4' }, + ], + }, + }], + }, + }, + core: { user_results: { result: { legacy: { screen_name: 'carol' } } } }, + }, new Set()); + expect(tweet?.has_media).toBe(true); + expect(tweet?.media_urls?.[0]).toMatch(/\.mp4$/); + }); + + it('falls back to entities.media when extended_entities is absent', () => { + const tweet = extractBookmarkTweet({ + rest_id: '103', + legacy: { + full_text: 'entities-only media', + entities: { + media: [{ type: 'photo', media_url_https: 'https://pbs.twimg.com/media/legacy.jpg' }], + }, + }, + core: { user_results: { result: { legacy: { screen_name: 'dave' } } } }, + }, new Set()); + expect(tweet?.has_media).toBe(true); + expect(tweet?.media_urls).toEqual(['https://pbs.twimg.com/media/legacy.jpg']); + }); + + it('prefers note_tweet text over truncated full_text', () => { + const tweet = extractBookmarkTweet({ + rest_id: '2', + legacy: { full_text: 'short text…', favorite_count: 0, retweet_count: 0, bookmark_count: 0 }, + note_tweet: { note_tweet_results: { result: { text: 'full long-form text body' } } }, + core: { user_results: { result: { core: { screen_name: 'erin' } } } }, + }, new Set()); + expect(tweet?.text).toBe('full long-form text body'); + }); + + it('deduplicates tweets across the seen Set', () => { + const data = { + data: { + bookmark_timeline_v2: { + timeline: { + instructions: [{ + entries: [ + { + entryId: 'tweet-3', + content: { + itemContent: { + tweet_results: { + result: { + rest_id: '3', + legacy: { full_text: 'first', favorite_count: 0, retweet_count: 0, bookmark_count: 0 }, + core: { user_results: { result: { legacy: { screen_name: 'frank' } } } }, + }, + }, + }, + }, + }, + { + entryId: 'tweet-3-dup', + content: { + itemContent: { + tweet_results: { + result: { + rest_id: '3', + legacy: { full_text: 'duplicate' }, + core: { user_results: { result: { legacy: { screen_name: 'frank' } } } }, + }, + }, + }, + }, + }, + ], + }], + }, + }, + }, + }; + const seen = new Set(); + const { tweets } = parseBookmarks(data, seen); + expect(tweets).toHaveLength(1); + expect(tweets[0].text).toBe('first'); + }); + + it('extracts cursor + tweets from the bookmark_timeline_v2 envelope', () => { + const data = { + data: { + bookmark_timeline_v2: { + timeline: { + instructions: [ + { + type: 'TimelineAddEntries', + entries: [ + { + entryId: 'tweet-4', + content: { + itemContent: { + tweet_results: { + result: { + rest_id: '4', + legacy: { + full_text: 'envelope tweet', + favorite_count: 1, + retweet_count: 0, + bookmark_count: 0, + extended_entities: { + media: [{ type: 'photo', media_url_https: 'https://pbs.twimg.com/media/x.jpg' }], + }, + }, + core: { user_results: { result: { legacy: { screen_name: 'gina' } } } }, + }, + }, + }, + }, + }, + { + entryId: 'cursor-bottom-Y', + content: { __typename: 'TimelineTimelineCursor', cursorType: 'Bottom', value: 'NEXT' }, + }, + ], + }, + ], + }, + }, + }, + }; + const { tweets, nextCursor } = parseBookmarks(data, new Set()); + expect(tweets).toHaveLength(1); + expect(tweets[0].id).toBe('4'); + expect(tweets[0].has_media).toBe(true); + expect(tweets[0].media_urls).toEqual(['https://pbs.twimg.com/media/x.jpg']); + expect(nextCursor).toBe('NEXT'); + }); + + it('returns empty tweets + null cursor for unknown envelope', () => { + expect(parseBookmarks({}, new Set())).toEqual({ tweets: [], nextCursor: null }); + }); +}); From 7edf53783f47f5c86b593141e3ea57dacc9208e8 Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 14:30:13 +0800 Subject: [PATCH 06/37] fix(daemon): report unknown browser command results (#1558) --- src/browser/daemon-client.test.ts | 23 +++++++++++ src/browser/daemon-client.ts | 4 ++ src/daemon-utils.ts | 55 ++++++++++++++++++++++++++ src/daemon.test.ts | 54 +++++++++++++++++++++++++- src/daemon.ts | 64 +++++++++++++++++++++++-------- 5 files changed, 182 insertions(+), 18 deletions(-) create mode 100644 src/daemon-utils.ts diff --git a/src/browser/daemon-client.test.ts b/src/browser/daemon-client.test.ts index 10bfc234c..2a65f91d1 100644 --- a/src/browser/daemon-client.test.ts +++ b/src/browser/daemon-client.test.ts @@ -1,6 +1,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { + BrowserCommandError, fetchDaemonStatus, getDaemonHealth, requestDaemonShutdown, @@ -220,4 +221,26 @@ describe('daemon-client', () => { }); expect(ids[0]).not.toBe(ids[1]); }); + + it('sendCommand does not retry command_result_unknown even when the message looks transient', async () => { + const fetchMock = vi.mocked(fetch); + fetchMock.mockResolvedValue({ + ok: false, + status: 503, + json: () => Promise.resolve({ + id: 'server', + ok: false, + errorCode: 'command_result_unknown', + error: 'Extension disconnected after command timeout', + errorHint: 'Inspect state before retrying.', + }), + } as Response); + + await expect(sendCommand('exec', { code: 'window.__mutate = true' })).rejects.toMatchObject({ + name: 'BrowserCommandError', + code: 'command_result_unknown', + hint: 'Inspect state before retrying.', + } satisfies Partial); + expect(fetchMock).toHaveBeenCalledTimes(1); + }); }); diff --git a/src/browser/daemon-client.ts b/src/browser/daemon-client.ts index f2a2c0672..1a789a87f 100644 --- a/src/browser/daemon-client.ts +++ b/src/browser/daemon-client.ts @@ -94,6 +94,7 @@ export interface DaemonStatus { profileDisconnected?: boolean; profiles?: BrowserProfileStatus[]; pending: number; + commandResultUnknown?: number; memoryMB: number; port: number; } @@ -197,6 +198,9 @@ async function sendCommandRaw( const result = (await res.json()) as DaemonResult; if (!result.ok) { + if (result.errorCode === 'command_result_unknown') { + throw new BrowserCommandError(result.error ?? 'Browser command result is unknown', result.errorCode, result.errorHint); + } const isDuplicateCommandId = res.status === 409 || (result.error ?? '').includes('Duplicate command id'); if (isDuplicateCommandId && attempt < maxRetries) { diff --git a/src/daemon-utils.ts b/src/daemon-utils.ts new file mode 100644 index 000000000..93f177615 --- /dev/null +++ b/src/daemon-utils.ts @@ -0,0 +1,55 @@ +export const COMMAND_RESULT_UNKNOWN_CODE = 'command_result_unknown'; + +export const COMMAND_RESULT_UNKNOWN_HINT = + 'Inspect the browser/session state before retrying. Do not blindly retry write commands such as navigate, click, type, or eval.'; + +export const PROFILE_DISCONNECTED_HINT = + 'Open that Chrome profile and make sure the OpenCLI extension is enabled, or choose another profile with opencli profile use .'; + +export type DaemonFailureContract = { + message: string; + errorCode: string; + errorHint: string; + status: number; + countAsCommandResultUnknown: boolean; +}; + +export function commandResultUnknownMessage(action: string): string { + return `Browser connection dropped after the ${action} command was dispatched; it may have completed.`; +} + +export function buildExtensionDisconnectFailure(input: { + contextId: string; + action: string; + dispatched: boolean; +}): DaemonFailureContract { + if (input.dispatched) { + return { + message: commandResultUnknownMessage(input.action), + errorCode: COMMAND_RESULT_UNKNOWN_CODE, + errorHint: COMMAND_RESULT_UNKNOWN_HINT, + status: 503, + countAsCommandResultUnknown: true, + }; + } + return buildCommandDispatchFailure(input.contextId); +} + +export function buildCommandDispatchFailure(contextId: string): DaemonFailureContract { + return { + message: `Browser profile "${contextId}" disconnected before command dispatch`, + errorCode: 'profile_disconnected', + errorHint: PROFILE_DISCONNECTED_HINT, + status: 503, + countAsCommandResultUnknown: false, + }; +} + +export function getResponseCorsHeaders(pathname: string, origin?: string): Record | undefined { + if (pathname !== '/ping') return undefined; + if (!origin || !origin.startsWith('chrome-extension://')) return undefined; + return { + 'Access-Control-Allow-Origin': origin, + Vary: 'Origin', + }; +} diff --git a/src/daemon.test.ts b/src/daemon.test.ts index 008e314a9..cdac27bea 100644 --- a/src/daemon.test.ts +++ b/src/daemon.test.ts @@ -1,6 +1,13 @@ import { describe, expect, it } from 'vitest'; -import { getResponseCorsHeaders } from './daemon.js'; +import { + COMMAND_RESULT_UNKNOWN_CODE, + COMMAND_RESULT_UNKNOWN_HINT, + buildCommandDispatchFailure, + buildExtensionDisconnectFailure, + commandResultUnknownMessage, + getResponseCorsHeaders, +} from './daemon-utils.js'; describe('getResponseCorsHeaders', () => { it('allows the Browser Bridge extension origin to read /ping', () => { @@ -22,3 +29,48 @@ describe('getResponseCorsHeaders', () => { expect(getResponseCorsHeaders('/command', 'chrome-extension://abc123')).toBeUndefined(); }); }); + +describe('daemon command dispatch', () => { + it('uses a distinct command_result_unknown contract for ambiguous dispatched commands', () => { + expect(COMMAND_RESULT_UNKNOWN_CODE).toBe('command_result_unknown'); + expect(commandResultUnknownMessage('navigate')).toContain('navigate command was dispatched'); + expect(COMMAND_RESULT_UNKNOWN_HINT).toContain('Inspect the browser/session state'); + expect(COMMAND_RESULT_UNKNOWN_HINT).toContain('Do not blindly retry write commands'); + }); + + it('classifies dispatched extension disconnects as command_result_unknown', () => { + expect(buildExtensionDisconnectFailure({ + contextId: 'work', + action: 'navigate', + dispatched: true, + })).toEqual({ + message: 'Browser connection dropped after the navigate command was dispatched; it may have completed.', + errorCode: 'command_result_unknown', + errorHint: COMMAND_RESULT_UNKNOWN_HINT, + status: 503, + countAsCommandResultUnknown: true, + }); + }); + + it('classifies pre-dispatch extension disconnects as profile_disconnected', () => { + expect(buildExtensionDisconnectFailure({ + contextId: 'work', + action: 'navigate', + dispatched: false, + })).toMatchObject({ + message: 'Browser profile "work" disconnected before command dispatch', + errorCode: 'profile_disconnected', + status: 503, + countAsCommandResultUnknown: false, + }); + }); + + it('classifies ws.send dispatch failures as profile_disconnected', () => { + expect(buildCommandDispatchFailure('work')).toMatchObject({ + message: 'Browser profile "work" disconnected before command dispatch', + errorCode: 'profile_disconnected', + status: 503, + countAsCommandResultUnknown: false, + }); + }); +}); diff --git a/src/daemon.ts b/src/daemon.ts index 803d3cba5..ed50ee27f 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -28,6 +28,11 @@ import { log } from './logger.js'; import { PKG_VERSION } from './version.js'; import { DEFAULT_CONTEXT_ID } from './browser/profile.js'; import { recordExtensionVersion } from './update-check.js'; +import { + buildCommandDispatchFailure, + buildExtensionDisconnectFailure, + getResponseCorsHeaders, +} from './daemon-utils.js'; const PORT = parseInt(process.env.OPENCLI_DAEMON_PORT ?? String(DEFAULT_DAEMON_PORT), 10); @@ -44,10 +49,13 @@ type ExtensionProfileConnection = { const extensionProfiles = new Map(); const pending = new Map void; reject: (error: Error) => void; timer: ReturnType; }>(); +let commandResultUnknownCount = 0; // Extension log ring buffer interface LogEntry { level: string; msg: string; ts: number; } const LOG_BUFFER_SIZE = 200; @@ -136,12 +144,16 @@ function unregisterExtensionConnection(ws: WebSocket): void { for (const [id, p] of pending) { if (p.contextId !== contextId) continue; clearTimeout(p.timer); - p.reject(new DaemonCommandFailure( - `Browser profile "${contextId}" disconnected`, - 'profile_disconnected', - 'Open that Chrome profile and make sure the OpenCLI extension is enabled, or choose another profile with opencli profile use .', - 503, - )); + const failure = buildExtensionDisconnectFailure({ + contextId, + action: p.action, + dispatched: p.dispatched, + }); + if (failure.countAsCommandResultUnknown) { + commandResultUnknownCount++; + log.warn(`[daemon] Command result unknown after extension disconnect (id=${id}, action=${p.action}, context=${contextId})`); + } + p.reject(new DaemonCommandFailure(failure.message, failure.errorCode, failure.errorHint, failure.status)); pending.delete(id); } } @@ -176,15 +188,6 @@ function jsonResponse( res.end(JSON.stringify(data)); } -export function getResponseCorsHeaders(pathname: string, origin?: string): Record | undefined { - if (pathname !== '/ping') return undefined; - if (!origin || !origin.startsWith('chrome-extension://')) return undefined; - return { - 'Access-Control-Allow-Origin': origin, - Vary: 'Origin', - }; -} - async function handleRequest(req: IncomingMessage, res: ServerResponse): Promise { // ─── Security: Origin & custom-header check ────────────────────── // Block browser-based CSRF: browsers always send an Origin header on @@ -257,6 +260,7 @@ async function handleRequest(req: IncomingMessage, res: ServerResponse): Promise profileDisconnected: route.errorCode === 'profile_disconnected', profiles, pending: pending.size, + commandResultUnknown: commandResultUnknownCount, memoryMB: Math.round(mem.rss / 1024 / 1024 * 10) / 10, port: PORT, }); @@ -321,8 +325,34 @@ async function handleRequest(req: IncomingMessage, res: ServerResponse): Promise pending.delete(body.id); reject(new Error(`Command timeout (${timeoutMs / 1000}s)`)); }, timeoutMs); - pending.set(body.id, { contextId: route.connection!.contextId, resolve, reject, timer }); - route.connection!.ws.send(JSON.stringify(body)); + const entry = { + contextId: route.connection!.contextId, + action: typeof body.action === 'string' ? body.action : 'unknown', + dispatched: false, + resolve, + reject, + timer, + }; + pending.set(body.id, entry); + const failBeforeDispatch = (err: unknown) => { + if (pending.get(body.id) !== entry) return; + const failure = buildCommandDispatchFailure(entry.contextId); + clearTimeout(timer); + pending.delete(body.id); + reject(new DaemonCommandFailure(failure.message, failure.errorCode, failure.errorHint, failure.status)); + log.warn(`[daemon] Failed to dispatch command ${body.id}: ${err instanceof Error ? err.message : String(err)}`); + }; + try { + route.connection!.ws.send(JSON.stringify(body), (err?: Error) => { + if (err && !entry.dispatched) failBeforeDispatch(err); + }); + // Once ws accepts the frame, the command may execute even if the + // result is later lost; do not downgrade later disconnects to a + // pre-dispatch failure just because no result/ack has arrived yet. + entry.dispatched = true; + } catch (err) { + failBeforeDispatch(err); + } }); jsonResponse(res, 200, result); From 29c135b656cac7a7728b7f9c0658f1cb8e7ba399 Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 16:12:17 +0800 Subject: [PATCH 07/37] refactor(notion): replace built-in CDP adapter with external ntn CLI (#1559) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor(notion): replace built-in CDP adapter with external ntn CLI Notion has shipped an official CLI at https://ntn.dev. It uses the public Notion API (blocks / databases / properties / comments) instead of reverse-engineering the Desktop UI, so it survives Notion app updates and exposes a wider command surface than the in-tree adapter could. Changes: - `src/external-clis.yaml` — register `ntn` as first-class external CLI (binary `ntn`, homepage ntn.dev, install via the shell-pipe script on mac/linux) - `clis/notion/` — entire directory removed (8 commands: status / search / read / new / write / sidebar / favorites / export) - `docs/adapters/desktop/notion.md` — removed - `docs/.vitepress/config.mts` — drop nav entry - `docs/adapters/index.md` — drop adapter row - `README.md` / `README.zh-CN.md` — drop notion from feature lines, drop adapter table row, add `ntn` to CLI hub examples - `docs/index.md` / `docs/zh/index.md` / `docs/guide/getting-started.md` — drop notion from electron-control feature copy - `skills/opencli-usage/SKILL.md` — drop notion from electron list - `cli-manifest.json` — rebuilt with --allow-removals=8 Migration for users: `curl -fsSL https://ntn.dev | bash` (or `opencli external install ntn`) Then use `opencli ntn ` in place of `opencli notion `. Rationale: the in-tree adapter was reverse-engineered against Notion Desktop CDP and shipped only 8 commands. The official CLI gives users the full Notion API surface and reduces our maintenance burden to zero. Same pattern as gh / obsidian / lark-cli / tg-cli / discord-cli / wx-cli. Verification: - `npx tsc --noEmit` clean - `npx vitest run --project unit` → 1091/1 skipped - `npm run build` (with --allow-removals=8) — manifest 809 entries - grep notion in user-facing docs (README / docs / skills) — only descriptive mentions remain in non-blocking places (comparison / site-recon / electron how-to / design doc), no broken adapter references * fix(notion): align ntn external migration * docs(notion): clarify ntn manual install --- CHANGELOG.md | 6 + README.md | 10 +- README.zh-CN.md | 9 +- cli-manifest.json | 175 ------------------ clis/notion/export.js | 32 ---- clis/notion/favorites.js | 85 --------- clis/notion/new.js | 35 ---- clis/notion/read.js | 31 ---- clis/notion/search.js | 47 ----- clis/notion/sidebar.js | 42 ----- clis/notion/status.js | 17 -- clis/notion/write.js | 41 ---- docs/.vitepress/config.mts | 1 - docs/adapters/desktop/notion.md | 29 --- docs/adapters/index.md | 1 - docs/advanced/electron.md | 5 +- docs/comparison.md | 8 +- docs/conventions/listing-detail-id-pairing.md | 2 +- docs/guide/electron-app-cli.md | 1 - docs/guide/getting-started.md | 2 +- docs/index.md | 2 +- docs/zh/guide/electron-app-cli.md | 1 - docs/zh/index.md | 2 +- skills/opencli-usage/SKILL.md | 6 +- src/electron-apps.test.ts | 1 + src/electron-apps.ts | 1 - src/external-clis.yaml | 6 + src/external.test.ts | 17 ++ 28 files changed, 54 insertions(+), 561 deletions(-) delete mode 100644 clis/notion/export.js delete mode 100644 clis/notion/favorites.js delete mode 100644 clis/notion/new.js delete mode 100644 clis/notion/read.js delete mode 100644 clis/notion/search.js delete mode 100644 clis/notion/sidebar.js delete mode 100644 clis/notion/status.js delete mode 100644 clis/notion/write.js delete mode 100644 docs/adapters/desktop/notion.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f11b5133..4101566c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +### ⚠ BREAKING CHANGES + +* **notion** — remove the in-tree `clis/notion/` CDP-on-Desktop adapter (8 commands: status / search / read / new / write / sidebar / favorites / export). Notion has shipped an official CLI at , which is registered as a first-class external CLI in `external-clis.yaml`. Migration: install `ntn` from (`curl -fsSL https://ntn.dev | bash`), then use `opencli ntn `. Auto-install is intentionally not configured because the official installer is a shell script while OpenCLI external installs run shell-free command strings. The official CLI uses the public Notion API rather than reverse-engineering the Desktop UI, so it survives Notion app updates and exposes a wider command surface (blocks / databases / properties / comments) than the reverse-engineered adapter could. + ## [1.7.19](https://github.com/jackwener/opencli/compare/v1.7.18...v1.7.19) (2026-05-14) Major hotfix + simplification batch. Extension bumped to 1.0.14. Node floor lowered to v20 so the long tail of Node v20–v21.6 users no longer crashes at module load. `opencli browser` user surface replaces required-flag `--session ` with a `` positional. `page.evaluate(fn, ...args)` adds a type-safe alternative to the implicit auto-IIFE string form. Twitter cursor pagination no longer silently caps at ~500 items. diff --git a/README.md b/README.md index 400f97dc2..b2b9063f8 100644 --- a/README.md +++ b/README.md @@ -14,11 +14,11 @@ OpenCLI gives you one surface for three different kinds of automation: - **Let AI Agents operate any website** — install the `opencli-adapter-author` skill in your AI agent (Claude Code, Cursor, etc.), and it can navigate, click, type/fill, extract, and inspect any page through your logged-in browser via `opencli browser` primitives. - **Write new adapters** end-to-end with `opencli browser` + the `opencli-adapter-author` skill, which guides from first recon through field decoding, code, and `opencli browser verify`. -It also works as a **CLI hub** for local tools such as `gh`, `docker`, `tg`, `discord`, `wx`, and other binaries you register yourself, plus **desktop app adapters** for Electron apps like Cursor, Codex, Antigravity, ChatGPT, and Notion. +It also works as a **CLI hub** for local tools such as `gh`, `docker`, `tg`, `discord`, `wx`, `ntn` (Notion), and other binaries you register yourself, plus **desktop app adapters** for Electron apps like Cursor, Codex, Antigravity, and ChatGPT. ## Highlights -- **Desktop App Control** — Drive Electron apps (Cursor, Codex, ChatGPT, Notion, etc.) directly from the terminal via CDP. +- **Desktop App Control** — Drive Electron apps (Cursor, Codex, ChatGPT, etc.) directly from the terminal via CDP. - **Browser Automation for AI Agents** — Install the `opencli-adapter-author` skill, and your AI agent can operate any website: navigate, click, type/fill, extract, screenshot — all through your logged-in Chrome session. - **Multi-profile Browser Bridge** — Install the extension in each Chrome profile you want to use, then route commands with `--profile`, `OPENCLI_PROFILE`, or `opencli profile use`. - **Website → CLI** — Turn any website into a deterministic CLI: 100+ site surfaces are already registered, or write your own with the `opencli-adapter-author` skill + `opencli browser verify`. @@ -283,13 +283,14 @@ To load the source Browser Bridge extension: ## CLI Hub -OpenCLI acts as a universal hub for your existing command-line tools — unified discovery, pure passthrough execution, and auto-install (if a tool isn't installed, OpenCLI runs `brew install ` automatically before re-running the command). +OpenCLI acts as a universal hub for your existing command-line tools — unified discovery, pure passthrough execution, and auto-install when a safe package-manager command is configured. | External CLI | Description | Example | |--------------|-------------|---------| | **gh** | GitHub CLI | `opencli gh pr list --limit 5` | | **obsidian** | Obsidian vault management | `opencli obsidian search query="AI"` | | **docker** | Docker | `opencli docker ps` | +| **ntn** | Notion CLI — official Notion API CLI for pages, databases, blocks, search, comments | `opencli ntn pages list` | | **lark-cli** | Lark/Feishu — messages, docs, calendar, tasks, 200+ commands | `opencli lark-cli calendar +agenda` | | **dws** | DingTalk — cross-platform CLI for DingTalk's full suite, designed for humans and AI agents | `opencli dws msg send --to user "hello"` | | **wecom-cli** | WeCom/企业微信 — CLI for WeCom open platform, for humans and AI agents | `opencli wecom-cli msg send --to user "hello"` | @@ -304,6 +305,8 @@ OpenCLI acts as a universal hub for your existing command-line tools — unified opencli external register mycli ``` +**Manual install** — some external CLIs use official shell-script installers rather than shell-free package-manager commands. For `ntn`, install from first, then run `opencli ntn ...`. + ### Desktop App Adapters Control Electron desktop apps directly from the terminal. Each adapter has its own detailed documentation: @@ -315,7 +318,6 @@ Control Electron desktop apps directly from the terminal. Each adapter has its o | **Antigravity** | Control Antigravity Ultra from terminal | [Doc](./docs/adapters/desktop/antigravity.md) | | **ChatGPT App** | Automate ChatGPT macOS desktop app | [Doc](./docs/adapters/desktop/chatgpt-app.md) | | **ChatWise** | Multi-LLM client (GPT-4, Claude, Gemini) | [Doc](./docs/adapters/desktop/chatwise.md) | -| **Notion** | Search, read, write Notion pages | [Doc](./docs/adapters/desktop/notion.md) | | **Discord** | Discord Desktop — messages, channels, servers | [Doc](./docs/adapters/desktop/discord.md) | | **Doubao** | Control Doubao AI desktop app via CDP | [Doc](./docs/adapters/desktop/doubao-app.md) | diff --git a/README.zh-CN.md b/README.zh-CN.md index d366a0590..8e77b81dd 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -14,11 +14,11 @@ OpenCLI 可以用同一套 CLI 做三类事情: - **让 AI Agent 操作任意网站**:在你的 AI Agent(Claude Code、Cursor 等)中安装 `opencli-adapter-author` skill,Agent 就能用你的已登录浏览器导航、点击、输入/填充、提取任意网页内容。 - **把新网站写成 CLI**:用 `opencli browser` 原语 + `opencli-adapter-author` skill,从站点侦察、API 发现、字段解码到 `opencli browser verify` 一条龙。 -除了网站能力,OpenCLI 还是一个 **CLI 枢纽**:你可以把 `gh`、`docker`、`tg`、`discord`、`wx` 等本地工具统一注册到 `opencli` 下,也可以通过桌面端适配器控制 Cursor、Codex、Antigravity、ChatGPT、Notion 等 Electron 应用。 +除了网站能力,OpenCLI 还是一个 **CLI 枢纽**:你可以把 `gh`、`docker`、`tg`、`discord`、`wx`、`ntn`(Notion)等本地工具统一注册到 `opencli` 下,也可以通过桌面端适配器控制 Cursor、Codex、Antigravity、ChatGPT 等 Electron 应用。 ## 亮点 -- **桌面应用控制** — 通过 CDP 直接在终端驱动 Electron 应用(Cursor、Codex、ChatGPT、Notion 等)。 +- **桌面应用控制** — 通过 CDP 直接在终端驱动 Electron 应用(Cursor、Codex、ChatGPT 等)。 - **AI Agent 浏览器自动化** — 安装 `opencli-adapter-author` skill,你的 AI Agent 就能操作任意网站:导航、点击、输入/填充、提取、截图——全部通过你的已登录 Chrome 会话完成。 - **网站 → CLI** — 把任何网站变成确定性 CLI:100+ 站点能力已注册,或用 `opencli-adapter-author` skill + `opencli browser verify` 自己写。 - **账号安全** — 复用 Chrome/Chromium 登录态,凭证永远不会离开浏览器。 @@ -241,7 +241,6 @@ npm link | **chatwise** | `status` `new` `send` `read` `ask` `model` `history` `export` `screenshot` | 桌面端 | | **doubao** | `status` `new` `send` `read` `ask` `history` `detail` `meeting-summary` `meeting-transcript` | 浏览器 | | **doubao-app** | `status` `new` `send` `read` `ask` `screenshot` `dump` | 桌面端 | -| **notion** | `status` `search` `read` `new` `write` `sidebar` `favorites` `export` | 桌面端 | | **discord-app** | `status` `send` `read` `channels` `servers` `search` `members` | 桌面端 | | **v2ex** | `hot` `latest` `topic` `node` `user` `member` `replies` `nodes` `daily` `me` `notifications` | 公开 / 浏览器 | | **xueqiu** | `feed` `hot-stock` `hot` `search` `stock` `comments` `watchlist` `earnings-date` `fund-holdings` `fund-snapshot` | 浏览器 | @@ -333,6 +332,7 @@ OpenCLI 也可以作为你现有命令行工具的统一入口,负责发现、 | **gh** | GitHub CLI | `opencli gh pr list --limit 5` | | **obsidian** | Obsidian 仓库管理 | `opencli obsidian search query="AI"` | | **docker** | Docker 命令行工具 | `opencli docker ps` | +| **ntn** | Notion CLI — 基于官方 Notion API 的页面、数据库、块、搜索、评论命令 | `opencli ntn pages list` | | **lark-cli** | 飞书 CLI — 消息、文档、日历、任务,200+ 命令 | `opencli lark-cli calendar +agenda` | | **dws** | 钉钉 CLI — 钉钉全套产品能力的跨平台命令行工具,支持人类和 AI Agent 使用 | `opencli dws msg send --to user "hello"` | | **wecom-cli** | 企业微信 CLI — 企业微信开放平台命令行工具,支持人类和 AI Agent 使用 | `opencli wecom-cli msg send --to user "hello"` | @@ -343,7 +343,7 @@ OpenCLI 也可以作为你现有命令行工具的统一入口,负责发现、 **零配置透传**:OpenCLI 会把你的输入原样转发给底层二进制,保留原生 stdout / stderr 行为。 -**自动安装**:如果你运行 `opencli gh ...` 时系统中还没有 `gh`,OpenCLI 会优先尝试通过系统包管理器安装,然后自动重试命令。 +**自动安装**:如果某个外部 CLI 配置了安全的包管理器安装命令,OpenCLI 会优先尝试安装后再执行;`ntn` 的官方安装方式是 shell 脚本,请先按 手动安装。 **注册自定义本地 CLI**: @@ -362,7 +362,6 @@ opencli register mycli | **Antigravity** | 在终端直接控制 Antigravity Ultra | [Doc](./docs/adapters/desktop/antigravity.md) | | **ChatGPT App** | 自动化操作 ChatGPT macOS 桌面客户端 | [Doc](./docs/adapters/desktop/chatgpt-app.md) | | **ChatWise** | 多 LLM 客户端(GPT-4、Claude、Gemini) | [Doc](./docs/adapters/desktop/chatwise.md) | -| **Notion** | 搜索、读取、写入 Notion 页面 | [Doc](./docs/adapters/desktop/notion.md) | | **Discord** | Discord 桌面版 — 消息、频道、服务器 | [Doc](./docs/adapters/desktop/discord.md) | | **Doubao** | 通过 CDP 控制豆包桌面应用 | [Doc](./docs/adapters/desktop/doubao-app.md) | diff --git a/cli-manifest.json b/cli-manifest.json index 457fd43ab..17b77d182 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -15753,181 +15753,6 @@ "sourceFile": "notebooklm/summary.js", "navigateBefore": false }, - { - "site": "notion", - "name": "export", - "description": "Export the current Notion page as Markdown", - "access": "read", - "domain": "localhost", - "strategy": "ui", - "browser": true, - "args": [ - { - "name": "output", - "type": "str", - "required": false, - "help": "Output file (default: /tmp/notion-export.md)" - } - ], - "columns": [ - "Status", - "File" - ], - "type": "js", - "modulePath": "notion/export.js", - "sourceFile": "notion/export.js", - "navigateBefore": true - }, - { - "site": "notion", - "name": "favorites", - "description": "List pages from the Notion Favorites section in the sidebar", - "access": "read", - "domain": "localhost", - "strategy": "ui", - "browser": true, - "args": [], - "columns": [ - "Index", - "Title", - "Icon" - ], - "type": "js", - "modulePath": "notion/favorites.js", - "sourceFile": "notion/favorites.js", - "navigateBefore": true - }, - { - "site": "notion", - "name": "new", - "description": "Create a new page in Notion", - "access": "write", - "domain": "localhost", - "strategy": "ui", - "browser": true, - "args": [ - { - "name": "title", - "type": "str", - "required": false, - "positional": true, - "help": "Page title (optional)" - } - ], - "columns": [ - "Status" - ], - "type": "js", - "modulePath": "notion/new.js", - "sourceFile": "notion/new.js", - "navigateBefore": true - }, - { - "site": "notion", - "name": "read", - "description": "Read the content of the currently open Notion page", - "access": "read", - "domain": "localhost", - "strategy": "ui", - "browser": true, - "args": [], - "columns": [ - "Title", - "Content" - ], - "type": "js", - "modulePath": "notion/read.js", - "sourceFile": "notion/read.js", - "navigateBefore": true - }, - { - "site": "notion", - "name": "search", - "description": "Search pages and databases in Notion via Quick Find (Cmd+P)", - "access": "read", - "domain": "localhost", - "strategy": "ui", - "browser": true, - "args": [ - { - "name": "query", - "type": "str", - "required": true, - "positional": true, - "help": "Search query" - } - ], - "columns": [ - "Index", - "Title" - ], - "type": "js", - "modulePath": "notion/search.js", - "sourceFile": "notion/search.js", - "navigateBefore": true - }, - { - "site": "notion", - "name": "sidebar", - "description": "List pages and databases from the Notion sidebar", - "access": "read", - "domain": "localhost", - "strategy": "ui", - "browser": true, - "args": [], - "columns": [ - "Index", - "Title" - ], - "type": "js", - "modulePath": "notion/sidebar.js", - "sourceFile": "notion/sidebar.js", - "navigateBefore": true - }, - { - "site": "notion", - "name": "status", - "description": "Check active CDP connection to Notion Desktop", - "access": "read", - "domain": "localhost", - "strategy": "ui", - "browser": true, - "args": [], - "columns": [ - "Status", - "Url", - "Title" - ], - "type": "js", - "modulePath": "notion/status.js", - "sourceFile": "notion/status.js", - "navigateBefore": true - }, - { - "site": "notion", - "name": "write", - "description": "Append text content to the currently open Notion page", - "access": "write", - "domain": "localhost", - "strategy": "ui", - "browser": true, - "args": [ - { - "name": "text", - "type": "str", - "required": true, - "positional": true, - "help": "Text to append to the page" - } - ], - "columns": [ - "Status" - ], - "type": "js", - "modulePath": "notion/write.js", - "sourceFile": "notion/write.js", - "navigateBefore": true - }, { "site": "nowcoder", "name": "companies", diff --git a/clis/notion/export.js b/clis/notion/export.js deleted file mode 100644 index 123e852f0..000000000 --- a/clis/notion/export.js +++ /dev/null @@ -1,32 +0,0 @@ -import * as fs from 'node:fs'; -import { cli, Strategy } from '@jackwener/opencli/registry'; -export const exportCommand = cli({ - site: 'notion', - name: 'export', - access: 'read', - description: 'Export the current Notion page as Markdown', - domain: 'localhost', - strategy: Strategy.UI, - browser: true, - args: [ - { name: 'output', required: false, help: 'Output file (default: /tmp/notion-export.md)' }, - ], - columns: ['Status', 'File'], - func: async (page, kwargs) => { - const outputPath = kwargs.output || '/tmp/notion-export.md'; - const result = await page.evaluate(` - (function() { - const titleEl = document.querySelector('[data-block-id] [placeholder="Untitled"], h1.notion-title, [class*="title"]'); - const title = titleEl ? (titleEl.textContent || '').trim() : document.title; - - const frame = document.querySelector('.notion-page-content, [class*="page-content"], main'); - const content = frame ? (frame.innerText || '').trim() : document.body.innerText; - - return { title, content }; - })() - `); - const md = `# ${result.title}\n\n${result.content}`; - fs.writeFileSync(outputPath, md); - return [{ Status: 'Success', File: outputPath }]; - }, -}); diff --git a/clis/notion/favorites.js b/clis/notion/favorites.js deleted file mode 100644 index ac6c3ec6f..000000000 --- a/clis/notion/favorites.js +++ /dev/null @@ -1,85 +0,0 @@ -import { cli, Strategy } from '@jackwener/opencli/registry'; -export const favoritesCommand = cli({ - site: 'notion', - name: 'favorites', - access: 'read', - description: 'List pages from the Notion Favorites section in the sidebar', - domain: 'localhost', - strategy: Strategy.UI, - browser: true, - args: [], - columns: ['Index', 'Title', 'Icon'], - func: async (page) => { - const items = await page.evaluate(` - (function() { - const results = []; - - // Strategy 1: Use Notion's own class 'notion-outliner-bookmarks-header-container' - const headerContainer = document.querySelector('.notion-outliner-bookmarks-header-container'); - if (headerContainer) { - // Walk up to the section parent that wraps header + items - let section = headerContainer.parentElement; - if (section && section.children.length === 1) section = section.parentElement; - - if (section) { - const treeItems = section.querySelectorAll('[role="treeitem"]'); - treeItems.forEach((item) => { - // Title text is in a div.notranslate sibling of the icon area - const titleEl = item.querySelector('div.notranslate:not(.notion-record-icon)'); - const title = titleEl - ? titleEl.textContent.trim() - : (item.textContent || '').trim().substring(0, 80); - - // Icon/emoji is in the notion-record-icon element - const iconEl = item.querySelector('.notion-record-icon'); - const icon = iconEl ? iconEl.textContent.trim().substring(0, 4) : ''; - - if (title && title.length > 0) { - results.push({ Index: results.length + 1, Title: title, Icon: icon || '📄' }); - } - }); - } - } - - // Strategy 2: Fallback — find "Favorites" text node and walk DOM - if (results.length === 0) { - const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null); - let node; - let favEl = null; - while (node = walker.nextNode()) { - const text = node.textContent.trim(); - if (text === 'Favorites' || text === '收藏' || text === '收藏夹') { - favEl = node.parentElement; - break; - } - } - - if (favEl) { - let section = favEl; - for (let i = 0; i < 6; i++) { - const p = section.parentElement; - if (!p || p === document.body) break; - const treeItems = p.querySelectorAll(':scope > [role="treeitem"]'); - if (treeItems.length > 0) { section = p; break; } - section = p; - } - - const treeItems = section.querySelectorAll('[role="treeitem"]'); - treeItems.forEach((item) => { - const text = (item.textContent || '').trim().substring(0, 120); - if (text && text.length > 1 && !text.match(/^(Favorites|收藏夹?)$/)) { - results.push({ Index: results.length + 1, Title: text, Icon: '📄' }); - } - }); - } - } - - return results; - })() - `); - if (items.length === 0) { - return [{ Index: 0, Title: 'No favorites found. Make sure sidebar is visible and you have favorites.', Icon: '⚠️' }]; - } - return items; - }, -}); diff --git a/clis/notion/new.js b/clis/notion/new.js deleted file mode 100644 index 56cd5889f..000000000 --- a/clis/notion/new.js +++ /dev/null @@ -1,35 +0,0 @@ -import { cli, Strategy } from '@jackwener/opencli/registry'; -export const newCommand = cli({ - site: 'notion', - name: 'new', - access: 'write', - description: 'Create a new page in Notion', - domain: 'localhost', - strategy: Strategy.UI, - browser: true, - args: [ - { name: 'title', required: false, positional: true, help: 'Page title (optional)' }, - ], - columns: ['Status'], - func: async (page, kwargs) => { - const title = kwargs.title; - // Cmd+N creates a new page in Notion - const isMac = process.platform === 'darwin'; - await page.pressKey(isMac ? 'Meta+N' : 'Control+N'); - await page.wait(1); - // If title is provided, type it into the title field - if (title) { - await page.evaluate(` - (function(t) { - const titleEl = document.querySelector('[placeholder="Untitled"], [data-content-editable-leaf] [placeholder]'); - if (titleEl) { - titleEl.focus(); - document.execCommand('insertText', false, t); - } - })(${JSON.stringify(title)}) - `); - await page.wait(0.5); - } - return [{ Status: title ? `Created page: ${title}` : 'New blank page created' }]; - }, -}); diff --git a/clis/notion/read.js b/clis/notion/read.js deleted file mode 100644 index 52e99922e..000000000 --- a/clis/notion/read.js +++ /dev/null @@ -1,31 +0,0 @@ -import { cli, Strategy } from '@jackwener/opencli/registry'; -export const readCommand = cli({ - site: 'notion', - name: 'read', - access: 'read', - description: 'Read the content of the currently open Notion page', - domain: 'localhost', - strategy: Strategy.UI, - browser: true, - args: [], - columns: ['Title', 'Content'], - func: async (page) => { - const result = await page.evaluate(` - (function() { - // Get the page title - const titleEl = document.querySelector('[data-block-id] [placeholder="Untitled"], .notion-page-block .notranslate, h1.notion-title, [class*="title"]'); - const title = titleEl ? (titleEl.textContent || '').trim() : document.title; - - // Get the page content — Notion renders blocks in a frame - const frame = document.querySelector('.notion-page-content, [class*="page-content"], .layout-content, main'); - const content = frame ? (frame.innerText || frame.textContent || '').trim() : ''; - - return { title, content }; - })() - `); - return [{ - Title: result.title || 'Untitled', - Content: result.content || '(empty page)', - }]; - }, -}); diff --git a/clis/notion/search.js b/clis/notion/search.js deleted file mode 100644 index 4239d6ff6..000000000 --- a/clis/notion/search.js +++ /dev/null @@ -1,47 +0,0 @@ -import { cli, Strategy } from '@jackwener/opencli/registry'; -export const searchCommand = cli({ - site: 'notion', - name: 'search', - access: 'read', - description: 'Search pages and databases in Notion via Quick Find (Cmd+P)', - domain: 'localhost', - strategy: Strategy.UI, - browser: true, - args: [{ name: 'query', required: true, positional: true, help: 'Search query' }], - columns: ['Index', 'Title'], - func: async (page, kwargs) => { - const query = kwargs.query; - // Open Quick Find - const isMac = process.platform === 'darwin'; - await page.pressKey(isMac ? 'Meta+P' : 'Control+P'); - await page.wait(0.5); - // Type the search query - await page.evaluate(` - (function(q) { - const input = document.querySelector('input[placeholder*="Search"], input[type="text"]'); - if (input) { - const setter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value').set; - setter.call(input, q); - input.dispatchEvent(new Event('input', { bubbles: true })); - } - })(${JSON.stringify(query)}) - `); - await page.wait(1.5); - // Scrape results - const results = await page.evaluate(` - (function() { - const items = document.querySelectorAll('[role="option"], [class*="searchResult"], [class*="quick-find"] [role="button"]'); - return Array.from(items).slice(0, 20).map((item, i) => ({ - Index: i + 1, - Title: (item.textContent || '').trim().substring(0, 120), - })); - })() - `); - // Close Quick Find - await page.pressKey('Escape'); - if (results.length === 0) { - return [{ Index: 0, Title: `No results for "${query}"` }]; - } - return results; - }, -}); diff --git a/clis/notion/sidebar.js b/clis/notion/sidebar.js deleted file mode 100644 index 6a9e077c7..000000000 --- a/clis/notion/sidebar.js +++ /dev/null @@ -1,42 +0,0 @@ -import { cli, Strategy } from '@jackwener/opencli/registry'; -export const sidebarCommand = cli({ - site: 'notion', - name: 'sidebar', - access: 'read', - description: 'List pages and databases from the Notion sidebar', - domain: 'localhost', - strategy: Strategy.UI, - browser: true, - args: [], - columns: ['Index', 'Title'], - func: async (page) => { - const items = await page.evaluate(` - (function() { - const results = []; - // Notion sidebar items - const selectors = [ - '[class*="sidebar"] [role="treeitem"]', - '[class*="sidebar"] a', - '.notion-sidebar [role="button"]', - 'nav [role="treeitem"]', - ]; - - for (const sel of selectors) { - const nodes = document.querySelectorAll(sel); - if (nodes.length > 0) { - nodes.forEach((n, i) => { - const text = (n.textContent || '').trim().substring(0, 100); - if (text && text.length > 1) results.push({ Index: i + 1, Title: text }); - }); - break; - } - } - return results; - })() - `); - if (items.length === 0) { - return [{ Index: 0, Title: 'No sidebar items found. Toggle the sidebar first.' }]; - } - return items; - }, -}); diff --git a/clis/notion/status.js b/clis/notion/status.js deleted file mode 100644 index 637f1b988..000000000 --- a/clis/notion/status.js +++ /dev/null @@ -1,17 +0,0 @@ -import { cli, Strategy } from '@jackwener/opencli/registry'; -export const statusCommand = cli({ - site: 'notion', - name: 'status', - access: 'read', - description: 'Check active CDP connection to Notion Desktop', - domain: 'localhost', - strategy: Strategy.UI, - browser: true, - args: [], - columns: ['Status', 'Url', 'Title'], - func: async (page) => { - const url = await page.evaluate('window.location.href'); - const title = await page.evaluate('document.title'); - return [{ Status: 'Connected', Url: url, Title: title }]; - }, -}); diff --git a/clis/notion/write.js b/clis/notion/write.js deleted file mode 100644 index 11eda6132..000000000 --- a/clis/notion/write.js +++ /dev/null @@ -1,41 +0,0 @@ -import { cli, Strategy } from '@jackwener/opencli/registry'; -export const writeCommand = cli({ - site: 'notion', - name: 'write', - access: 'write', - description: 'Append text content to the currently open Notion page', - domain: 'localhost', - strategy: Strategy.UI, - browser: true, - args: [{ name: 'text', required: true, positional: true, help: 'Text to append to the page' }], - columns: ['Status'], - func: async (page, kwargs) => { - const text = kwargs.text; - // Focus the page body and move to the end - await page.evaluate(` - (function(text) { - // Find the editable area in Notion - const editables = document.querySelectorAll('.notion-page-content [contenteditable="true"], [class*="page-content"] [contenteditable="true"]'); - let target = editables.length > 0 ? editables[editables.length - 1] : null; - - if (!target) { - // Fallback: just find any contenteditable - const all = document.querySelectorAll('[contenteditable="true"]'); - target = all.length > 0 ? all[all.length - 1] : null; - } - - if (!target) throw new Error('Could not find editable area in Notion page'); - - target.focus(); - // Move to end - const sel = window.getSelection(); - sel.selectAllChildren(target); - sel.collapseToEnd(); - - document.execCommand('insertText', false, text); - })(${JSON.stringify(text)}) - `); - await page.wait(0.5); - return [{ Status: 'Text appended successfully' }]; - }, -}); diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index 1c767bcb9..d6aef8812 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -148,7 +148,6 @@ export default defineConfig({ { text: 'Antigravity', link: '/adapters/desktop/antigravity' }, { text: 'ChatGPT', link: '/adapters/desktop/chatgpt' }, { text: 'ChatWise', link: '/adapters/desktop/chatwise' }, - { text: 'Notion', link: '/adapters/desktop/notion' }, { text: 'Discord', link: '/adapters/desktop/discord' }, { text: 'Doubao App', link: '/adapters/desktop/doubao-app' }, ], diff --git a/docs/adapters/desktop/notion.md b/docs/adapters/desktop/notion.md deleted file mode 100644 index 7ff21cd09..000000000 --- a/docs/adapters/desktop/notion.md +++ /dev/null @@ -1,29 +0,0 @@ -# Notion - -Control the **Notion Desktop App** from the terminal via Chrome DevTools Protocol (CDP). - -## Prerequisites - -Launch with remote debugging port: -```bash -/Applications/Notion.app/Contents/MacOS/Notion --remote-debugging-port=9230 -``` - -## Setup - -```bash -export OPENCLI_CDP_ENDPOINT="http://127.0.0.1:9230" -``` - -## Commands - -| Command | Description | -|---------|-------------| -| `opencli notion status` | Check CDP connection | -| `opencli notion search "query"` | Quick Find search (Cmd+P) | -| `opencli notion read` | Read the current page content | -| `opencli notion new "title"` | Create a new page (Cmd+N) | -| `opencli notion write "text"` | Append text to the current page | -| `opencli notion sidebar` | List pages from the sidebar | -| `opencli notion favorites` | List pages from the Favorites section | -| `opencli notion export` | Export page as Markdown | diff --git a/docs/adapters/index.md b/docs/adapters/index.md index 5d002f615..5360cbcec 100644 --- a/docs/adapters/index.md +++ b/docs/adapters/index.md @@ -148,6 +148,5 @@ Run `opencli list` for the live registry. | **[Antigravity](./desktop/antigravity.md)** | Control Antigravity Ultra | `status` `send` `read` `new` `dump` `extract-code` `model` `watch` | | **[ChatGPT App](./desktop/chatgpt-app.md)** | Automate ChatGPT macOS app | `status` `new` `send` `read` `ask` `model` | | **[ChatWise](./desktop/chatwise.md)** | Multi-LLM client | `status` `new` `send` `read` `ask` `model` `history` `export` `screenshot` | -| **[Notion](./desktop/notion.md)** | Search, read, write pages | `status` `search` `read` `new` `write` `sidebar` `favorites` `export` | | **[Discord](./desktop/discord.md)** | Desktop messages & channels | `status` `send` `read` `channels` `servers` `search` `members` | | **[Doubao App](./desktop/doubao-app.md)** | Doubao AI desktop app via CDP | `status` `new` `send` `read` `ask` `screenshot` `dump` | diff --git a/docs/advanced/electron.md b/docs/advanced/electron.md index 322b090a4..13dcaeae8 100644 --- a/docs/advanced/electron.md +++ b/docs/advanced/electron.md @@ -4,7 +4,7 @@ description: How to CLI-ify and automate any Electron Desktop Application via CD # CLI-ifying Electron Applications (Skill Guide) -Based on the successful automation of **Cursor**, **Codex**, **Antigravity**, **ChatWise**, **Notion**, and **Discord** desktop apps, this guide serves as the standard operating procedure (SOP) for adapting ANY Electron-based application into an OpenCLI adapter. +Based on the successful automation of **Cursor**, **Codex**, **Antigravity**, **ChatWise**, and **Discord** desktop apps, this guide serves as the standard operating procedure (SOP) for adapting ANY Electron-based application into an OpenCLI adapter. ## Core Concept @@ -108,7 +108,7 @@ Core techniques: ## Pitfalls & Gotchas -1. **Port conflicts (EADDRINUSE)**: Only one app per port. Use unique ports: Codex=9222, ChatGPT=9224, Cursor=9226, ChatWise=9228, Notion=9230, Discord=9232 +1. **Port conflicts (EADDRINUSE)**: Only one app per port. Use unique ports: Codex=9222, ChatGPT=9224, Cursor=9226, ChatWise=9228, Discord=9232 2. **IPage abstraction**: OpenCLI wraps the browser page as `IPage` (`src/types.ts`). Use `page.pressKey()` and `page.evaluate()`, NOT direct DOM APIs 3. **Timing**: Always add `await page.wait(0.5)` to `1.0` after DOM mutations. Returning too early disconnects prematurely 4. **AppleScript requires Accessibility**: Terminal app must be granted permission in System Settings → Privacy & Security → Accessibility @@ -121,5 +121,4 @@ Core techniques: | ChatGPT | 9224 | CDP / AppleScript | | Cursor | 9226 | CDP | | ChatWise | 9228 | CDP | -| Notion | 9230 | CDP | | Discord App | 9232 | CDP | diff --git a/docs/comparison.md b/docs/comparison.md index 917a509d8..888ccfe0e 100644 --- a/docs/comparison.md +++ b/docs/comparison.md @@ -71,14 +71,14 @@ OpenCLI occupies a specific niche in the browser automation ecosystem. This guid ### 5. Desktop App Control -> "I want to script Cursor, ChatGPT, Notion, or other Electron apps from the terminal." +> "I want to script Cursor, ChatGPT, or other Electron apps from the terminal." | Tool | Fit | Notes | |------|-----|-------| -| **opencli** | Best | 8 desktop adapters via CDP + AppleScript. The only CLI tool with this capability. | +| **opencli** | Best | 7 desktop adapters via CDP + AppleScript. The only CLI tool with this capability. | | All others | N/A | Browser automation tools cannot control desktop applications. | -**This is unique to opencli.** No other tool in this comparison can send a prompt to ChatGPT desktop, extract code from Cursor, or write to Notion pages via CLI. +**This is unique to opencli.** No other tool in this comparison can send a prompt to ChatGPT desktop or extract code from Cursor via CLI. ## Key Trade-offs @@ -88,7 +88,7 @@ OpenCLI occupies a specific niche in the browser automation ecosystem. This guid - **Deterministic output** — Same command always returns the same schema. Pipeable, scriptable, CI-friendly. - **Speed** — Adapter commands return in seconds, not minutes. - **Broad platform coverage** — 100+ registered site surfaces spanning global platforms (Reddit, HackerNews, Twitter, YouTube) and Chinese platforms (Bilibili, Zhihu, Xiaohongshu, Douban, Weibo) with adapters that understand local anti-bot patterns. -- **Desktop app control** — CDP adapters for Cursor, Codex, Notion, ChatGPT, Discord, and more. +- **Desktop app control** — CDP adapters for Cursor, Codex, ChatGPT, Discord, and more. - **Easy to extend** — Drop a `.js` adapter into the `clis/` folder for auto-registration. Contributing a new site adapter is straightforward. ### opencli's Limitations diff --git a/docs/conventions/listing-detail-id-pairing.md b/docs/conventions/listing-detail-id-pairing.md index 29ccdf570..c61903e48 100644 --- a/docs/conventions/listing-detail-id-pairing.md +++ b/docs/conventions/listing-detail-id-pairing.md @@ -63,7 +63,7 @@ call. Many legitimate listings genuinely don't pair: search keywords, not addressable entities. - **Profile-attribute listings** (`reddit user`, `lesswrong user`, `weibo user`) — rows are `[field, value]` pairs of one profile. -- **UI-only sessions** (`discord-app search`, `notion search` Quick Find) +- **UI-only sessions** (`discord-app search`, app quick find surfaces) — page ids aren't extractable from the rendered DOM. - **Comment / reply listings** — sub-resources of a parent thread; the detail command fetches the parent, not the comment. diff --git a/docs/guide/electron-app-cli.md b/docs/guide/electron-app-cli.md index 59809d169..956f7fdd1 100644 --- a/docs/guide/electron-app-cli.md +++ b/docs/guide/electron-app-cli.md @@ -153,7 +153,6 @@ When the adapter is ready, also add: Examples to study: - `docs/adapters/desktop/codex.md` - `docs/adapters/desktop/chatwise.md` -- `docs/adapters/desktop/notion.md` - `docs/adapters/desktop/discord.md` ## Common failure modes diff --git a/docs/guide/getting-started.md b/docs/guide/getting-started.md index 901fe178d..63131cfc1 100644 --- a/docs/guide/getting-started.md +++ b/docs/guide/getting-started.md @@ -11,7 +11,7 @@ OpenCLI turns **any website** or **Electron app** into a command-line interface ## Highlights -- **Desktop App Control** — Drive Electron apps (Cursor, Codex, ChatGPT, Notion, etc.) directly from the terminal via CDP. +- **Desktop App Control** — Drive Electron apps (Cursor, Codex, ChatGPT, etc.) directly from the terminal via CDP. - **Browser Automation** — `browser` gives AI agents direct browser control: click, type/fill, extract, screenshot — fully scriptable. - **Website → CLI** — Turn any website into a deterministic CLI: 100+ site surfaces are already registered, or author your own with the `opencli-adapter-author` skill. - **Account-safe** — Reuses Chrome's logged-in state; your credentials never leave the browser. diff --git a/docs/index.md b/docs/index.md index 45584ec3b..422bced25 100644 --- a/docs/index.md +++ b/docs/index.md @@ -16,7 +16,7 @@ hero: features: - icon: 🖥️ title: Desktop App Control - details: Drive Electron apps (Cursor, Codex, ChatGPT, Notion, etc.) directly from the terminal via CDP. + details: Drive Electron apps (Cursor, Codex, ChatGPT, etc.) directly from the terminal via CDP. - icon: 🌐 title: Browser Automation details: "AI agents get direct browser control: click, type/fill, extract, screenshot — any interaction, fully scriptable." diff --git a/docs/zh/guide/electron-app-cli.md b/docs/zh/guide/electron-app-cli.md index 4d151abb0..45c71e10a 100644 --- a/docs/zh/guide/electron-app-cli.md +++ b/docs/zh/guide/electron-app-cli.md @@ -141,7 +141,6 @@ clis//utils.ts 可以参考这些现成文档: - `docs/adapters/desktop/codex.md` - `docs/adapters/desktop/chatwise.md` -- `docs/adapters/desktop/notion.md` - `docs/adapters/desktop/discord.md` ## 常见问题 diff --git a/docs/zh/index.md b/docs/zh/index.md index 33898aac7..482d9ecda 100644 --- a/docs/zh/index.md +++ b/docs/zh/index.md @@ -16,7 +16,7 @@ hero: features: - icon: 🖥️ title: 桌面应用控制 - details: 通过 CDP 直接在终端驱动 Electron 应用(Cursor、Codex、ChatGPT、Notion 等)。 + details: 通过 CDP 直接在终端驱动 Electron 应用(Cursor、Codex、ChatGPT 等)。 - icon: 🌐 title: 浏览器自动化 details: AI Agent 直接控制浏览器:点击、输入、提取、截图 — 任何交互,完全可编程。 diff --git a/skills/opencli-usage/SKILL.md b/skills/opencli-usage/SKILL.md index 5592d3237..abf4d5661 100644 --- a/skills/opencli-usage/SKILL.md +++ b/skills/opencli-usage/SKILL.md @@ -40,7 +40,7 @@ npx tsx src/main.ts # same surface, no global install | `UI` | Same as COOKIE, full DOM interaction. | | `LOCAL` | No browser; talks to a local/dev endpoint. | -Electron desktop apps (cursor, codex, chatwise, notion, discord-app, doubao-app, antigravity, chatgpt-app) route through CDP against the running app — same cookie-less flow as a logged-in browser. Make sure the app is running before invoking. +Electron desktop apps (cursor, codex, chatwise, discord-app, doubao-app, antigravity, chatgpt-app) route through CDP against the running app — same cookie-less flow as a logged-in browser. Make sure the app is running before invoking. ## Discover what's installed — don't read this file, run a command @@ -134,7 +134,9 @@ opencli gh pr list --limit 5 # passthrough; stdio is inherited, exit code prop opencli docker ps ``` -Built-in entries live in `src/external-clis.yaml`; user overrides and additions in `~/.opencli/external-clis.yaml`. Commonly shipped: `gh`, `docker`, `vercel`, `lark-cli`, `dws`, `wecom-cli`, `obsidian`, `tg`, `discord`, `wx`. +Built-in entries live in `src/external-clis.yaml`; user overrides and additions in `~/.opencli/external-clis.yaml`. Commonly shipped: `gh`, `docker`, `vercel`, `lark-cli`, `dws`, `wecom-cli`, `obsidian`, `ntn`, `tg`, `discord`, `wx`. + +Some official CLIs use shell-script installers instead of a shell-free package-manager command. Entries without an `install` config, such as `ntn`, must be installed manually from their homepage before passthrough use. ## Shell completion diff --git a/src/electron-apps.test.ts b/src/electron-apps.test.ts index efe32c145..cbe01fce4 100644 --- a/src/electron-apps.test.ts +++ b/src/electron-apps.test.ts @@ -28,6 +28,7 @@ describe('electron-apps registry', () => { it('isElectronApp returns false for non-Electron sites', () => { expect(isElectronApp('bilibili')).toBe(false); + expect(isElectronApp('notion')).toBe(false); expect(isElectronApp('unknown-app')).toBe(false); }); diff --git a/src/electron-apps.ts b/src/electron-apps.ts index 92add1c37..9eb061de2 100644 --- a/src/electron-apps.ts +++ b/src/electron-apps.ts @@ -29,7 +29,6 @@ export const builtinApps: Record = { cursor: { port: 9226, processName: 'Cursor', bundleId: 'com.todesktop.runtime.Cursor', displayName: 'Cursor' }, codex: { port: 9222, processName: 'Codex', bundleId: 'com.openai.codex', displayName: 'Codex' }, chatwise: { port: 9228, processName: 'ChatWise', bundleId: 'com.chatwise.app', displayName: 'ChatWise' }, - notion: { port: 9230, processName: 'Notion', bundleId: 'notion.id', displayName: 'Notion' }, 'discord-app': { port: 9232, processName: 'Discord', bundleId: 'com.discord.app', displayName: 'Discord' }, 'doubao-app': { port: 9225, processName: 'Doubao', bundleId: 'com.volcengine.doubao', displayName: 'Doubao' }, antigravity: { diff --git a/src/external-clis.yaml b/src/external-clis.yaml index 13ae96da8..80acb3567 100644 --- a/src/external-clis.yaml +++ b/src/external-clis.yaml @@ -14,6 +14,12 @@ install: mac: "brew install --cask obsidian" +- name: ntn + binary: ntn + description: "Notion CLI — official Notion API CLI for pages, databases, blocks, search, comments" + homepage: "https://ntn.dev" + tags: [notion, notes, knowledge, productivity] + - name: docker binary: docker description: "Docker command-line interface" diff --git a/src/external.test.ts b/src/external.test.ts index 8645f87ff..9ebdfc5b9 100644 --- a/src/external.test.ts +++ b/src/external.test.ts @@ -1,4 +1,8 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import yaml from 'js-yaml'; const { mockExecFileSync, mockPlatform } = vi.hoisted(() => ({ mockExecFileSync: vi.fn(), @@ -20,6 +24,8 @@ vi.mock('node:os', async () => { import { installExternalCli, parseCommand, type ExternalCliConfig } from './external.js'; +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + describe('parseCommand', () => { it('splits binaries and quoted arguments without invoking a shell', () => { expect(parseCommand('npm install -g "@scope/tool name"')).toEqual({ @@ -42,6 +48,17 @@ describe('parseCommand', () => { 'Install command contains unsafe shell operators', ); }); + + it('keeps built-in install commands compatible with the shell-free parser', () => { + const raw = fs.readFileSync(path.join(__dirname, 'external-clis.yaml'), 'utf8'); + const entries = (yaml.load(raw) || []) as ExternalCliConfig[]; + + for (const entry of entries) { + for (const command of Object.values(entry.install ?? {})) { + if (command) expect(() => parseCommand(command)).not.toThrow(); + } + } + }); }); describe('installExternalCli', () => { From 9c4f4a3d3036fca46b0780663cf397fdc990c176 Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 16:28:18 +0800 Subject: [PATCH 08/37] fix(cli): show external CLI package aliases (#1560) --- CHANGELOG.md | 4 ++++ README.md | 6 +++--- README.zh-CN.md | 6 +++--- skills/opencli-usage/SKILL.md | 2 +- src/cli.test.ts | 1 + src/cli.ts | 11 ++++++++--- src/external-clis.yaml | 3 +++ src/external.test.ts | 12 +++++++++++- src/external.ts | 7 +++++++ src/help.test.ts | 6 +++++- src/help.ts | 14 ++++++++++---- 11 files changed, 56 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4101566c9..0e7714817 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ * **notion** — remove the in-tree `clis/notion/` CDP-on-Desktop adapter (8 commands: status / search / read / new / write / sidebar / favorites / export). Notion has shipped an official CLI at , which is registered as a first-class external CLI in `external-clis.yaml`. Migration: install `ntn` from (`curl -fsSL https://ntn.dev | bash`), then use `opencli ntn `. Auto-install is intentionally not configured because the official installer is a shell script while OpenCLI external installs run shell-free command strings. The official CLI uses the public Notion API rather than reverse-engineering the Desktop UI, so it survives Notion app updates and exposes a wider command surface (blocks / databases / properties / comments) than the reverse-engineered adapter could. +### Bug Fixes + +* **external** — distinguish external CLI executable names from distribution/project names in root help. Built-in aliases such as `tg`, `discord`, and `wx` remain the callable `opencli ...` entrypoints while help renders `tg(tg-cli)`, `discord(discord-cli)`, and `wx(wx-cli)` to show their package lineage. + ## [1.7.19](https://github.com/jackwener/opencli/compare/v1.7.18...v1.7.19) (2026-05-14) Major hotfix + simplification batch. Extension bumped to 1.0.14. Node floor lowered to v20 so the long tail of Node v20–v21.6 users no longer crashes at module load. `opencli browser` user surface replaces required-flag `--session ` with a `` positional. `page.evaluate(fn, ...args)` adds a type-safe alternative to the implicit auto-IIFE string form. Twitter cursor pagination no longer silently caps at ~500 items. diff --git a/README.md b/README.md index b2b9063f8..3d2439975 100644 --- a/README.md +++ b/README.md @@ -294,9 +294,9 @@ OpenCLI acts as a universal hub for your existing command-line tools — unified | **lark-cli** | Lark/Feishu — messages, docs, calendar, tasks, 200+ commands | `opencli lark-cli calendar +agenda` | | **dws** | DingTalk — cross-platform CLI for DingTalk's full suite, designed for humans and AI agents | `opencli dws msg send --to user "hello"` | | **wecom-cli** | WeCom/企业微信 — CLI for WeCom open platform, for humans and AI agents | `opencli wecom-cli msg send --to user "hello"` | -| **tg** | Telegram — local-first sync, search, and export via MTProto for AI agents | `opencli tg search "AI news" -f json` | -| **discord** | Discord — local-first sync, search, and export via SQLite for AI agents | `opencli discord recent --channel general` | -| **wx** | WeChat — query local WeChat data: sessions, messages, search, contacts, export | `opencli wx search "OpenCLI"` | +| **tg(tg-cli)** | Telegram — local-first sync, search, and export via MTProto for AI agents | `opencli tg search "AI news" -f json` | +| **discord(discord-cli)** | Discord — local-first sync, search, and export via SQLite for AI agents | `opencli discord recent --channel general` | +| **wx(wx-cli)** | WeChat — query local WeChat data: sessions, messages, search, contacts, export | `opencli wx search "OpenCLI"` | | **vercel** | Vercel — deploy projects, manage domains, env vars, logs | `opencli vercel deploy --prod` | **Register your own** — add any local CLI so AI agents can discover it via `opencli list`: diff --git a/README.zh-CN.md b/README.zh-CN.md index 8e77b81dd..8143eba7d 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -336,9 +336,9 @@ OpenCLI 也可以作为你现有命令行工具的统一入口,负责发现、 | **lark-cli** | 飞书 CLI — 消息、文档、日历、任务,200+ 命令 | `opencli lark-cli calendar +agenda` | | **dws** | 钉钉 CLI — 钉钉全套产品能力的跨平台命令行工具,支持人类和 AI Agent 使用 | `opencli dws msg send --to user "hello"` | | **wecom-cli** | 企业微信 CLI — 企业微信开放平台命令行工具,支持人类和 AI Agent 使用 | `opencli wecom-cli msg send --to user "hello"` | -| **tg** | Telegram CLI — 基于 MTProto 的本地优先同步、搜索、导出,面向 AI Agent | `opencli tg search "AI news" -f json` | -| **discord** | Discord CLI — 基于 SQLite 的本地优先同步、搜索、导出,面向 AI Agent | `opencli discord recent --channel general` | -| **wx** | 微信本地数据 CLI — 会话、聊天记录、搜索、联系人、导出 | `opencli wx search "OpenCLI"` | +| **tg(tg-cli)** | Telegram CLI — 基于 MTProto 的本地优先同步、搜索、导出,面向 AI Agent | `opencli tg search "AI news" -f json` | +| **discord(discord-cli)** | Discord CLI — 基于 SQLite 的本地优先同步、搜索、导出,面向 AI Agent | `opencli discord recent --channel general` | +| **wx(wx-cli)** | 微信本地数据 CLI — 会话、聊天记录、搜索、联系人、导出 | `opencli wx search "OpenCLI"` | | **vercel** | Vercel — 部署项目、管理域名、环境变量、日志 | `opencli vercel deploy --prod` | **零配置透传**:OpenCLI 会把你的输入原样转发给底层二进制,保留原生 stdout / stderr 行为。 diff --git a/skills/opencli-usage/SKILL.md b/skills/opencli-usage/SKILL.md index abf4d5661..c14613de5 100644 --- a/skills/opencli-usage/SKILL.md +++ b/skills/opencli-usage/SKILL.md @@ -134,7 +134,7 @@ opencli gh pr list --limit 5 # passthrough; stdio is inherited, exit code prop opencli docker ps ``` -Built-in entries live in `src/external-clis.yaml`; user overrides and additions in `~/.opencli/external-clis.yaml`. Commonly shipped: `gh`, `docker`, `vercel`, `lark-cli`, `dws`, `wecom-cli`, `obsidian`, `ntn`, `tg`, `discord`, `wx`. +Built-in entries live in `src/external-clis.yaml`; user overrides and additions in `~/.opencli/external-clis.yaml`. Commonly shipped: `gh`, `docker`, `vercel`, `lark-cli`, `dws`, `wecom-cli`, `obsidian`, `ntn`, `tg(tg-cli)`, `discord(discord-cli)`, `wx(wx-cli)`. Some official CLIs use shell-script installers instead of a shell-free package-manager command. Entries without an `install` config, such as `ntn`, must be installed manually from their homepage before passthrough use. diff --git a/src/cli.test.ts b/src/cli.test.ts index 1e7c5f479..2576f761b 100644 --- a/src/cli.test.ts +++ b/src/cli.test.ts @@ -192,6 +192,7 @@ describe('createProgram root help descriptions', () => { expect(data.site_adapters.sites).toEqual(['bilibili']); expect(data.external_clis.count).toBeGreaterThanOrEqual(0); expect(Array.isArray(data.external_clis.clis)).toBe(true); + expect(Array.isArray(data.external_clis.display)).toBe(true); // Adapters must NOT leak into the core commands list const commandNames = data.commands.map((cmd: any) => cmd.name); expect(commandNames).not.toContain('bilibili'); diff --git a/src/cli.ts b/src/cli.ts index 4cdfd8ae3..ec5d27ff0 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -16,7 +16,7 @@ import { serializeCommand, formatArgSummary } from './serialization.js'; import { render as renderOutput } from './output.js'; import { PKG_VERSION } from './version.js'; import { printCompletionScript } from './completion.js'; -import { loadExternalClis, executeExternalCli, installExternalCli, registerExternalCli, isBinaryInstalled } from './external.js'; +import { loadExternalClis, executeExternalCli, installExternalCli, registerExternalCli, isBinaryInstalled, formatExternalCliLabel } from './external.js'; import { registerAllCommands } from './commanderAdapter.js'; import { classifyAdapter, formatRootAdapterHelpText, installCommanderNamespaceStructuredHelp, installStructuredHelp, leadingPositionalFromUsage, rootHelpData, type RootAdapterGroups } from './help.js'; import { EXIT_CODES, getErrorMessage, BrowserConnectError } from './errors.js'; @@ -3239,6 +3239,7 @@ cli({ .action((opts) => { const rows = loadExternalClis().map((ext) => ({ name: ext.name, + package: ext.package ?? '', binary: ext.binary, installed: isBinaryInstalled(ext.binary), description: ext.description ?? '', @@ -3247,7 +3248,7 @@ cli({ })); renderOutput(rows, { fmt: opts.format, - columns: ['name', 'binary', 'installed', 'description', 'homepage', 'tags'], + columns: ['name', 'package', 'binary', 'installed', 'description', 'homepage', 'tags'], title: 'opencli/external/list', source: 'opencli external list', }); @@ -3306,6 +3307,10 @@ cli({ // Classification derives from each adapter's `domain` field — see classifyAdapter. // External CLIs are taken from the externalClis registry (passthrough binaries). const externalNames = externalClis.map(ext => ext.name); + const externalHelpEntries = externalClis.map(ext => ({ + name: ext.name, + label: formatExternalCliLabel(ext), + })); const siteDomains = new Map(); for (const [, cmd] of getRegistry()) { if (!siteDomains.has(cmd.site)) siteDomains.set(cmd.site, cmd.domain); @@ -3316,7 +3321,7 @@ cli({ if (classifyAdapter(siteDomains.get(site)) === 'app') apps.push(site); else sites.push(site); } - const adapterGroups: RootAdapterGroups = { external: externalNames, apps, sites }; + const adapterGroups: RootAdapterGroups = { external: externalHelpEntries, apps, sites }; const adapterNameSet = new Set([...externalNames, ...siteNames]); installCommanderNamespaceStructuredHelp(browser, { globalCommand: program, description: originalBrowserDescription }); installCommanderNamespaceStructuredHelp(daemonCmd, { globalCommand: program, description: originalDaemonDescription }); diff --git a/src/external-clis.yaml b/src/external-clis.yaml index 80acb3567..4e0980bbf 100644 --- a/src/external-clis.yaml +++ b/src/external-clis.yaml @@ -63,6 +63,7 @@ - name: tg binary: tg + package: tg-cli description: "Telegram CLI — local-first sync, search, export via MTProto for AI agents" homepage: "https://github.com/jackwener/tg-cli" tags: [telegram, messaging, search, export, ai-agent] @@ -71,6 +72,7 @@ - name: discord binary: discord + package: discord-cli description: "Discord CLI — local-first sync, search, export via SQLite for AI agents" homepage: "https://github.com/jackwener/discord-cli" tags: [discord, messaging, search, export, ai-agent] @@ -79,6 +81,7 @@ - name: wx binary: wx + package: wx-cli description: "WeChat local data CLI — sessions, messages, search, contacts, export for AI agents" homepage: "https://github.com/jackwener/wx-cli" tags: [wechat, messaging, search, export, ai-agent] diff --git a/src/external.test.ts b/src/external.test.ts index 9ebdfc5b9..8d923eadd 100644 --- a/src/external.test.ts +++ b/src/external.test.ts @@ -22,7 +22,7 @@ vi.mock('node:os', async () => { }; }); -import { installExternalCli, parseCommand, type ExternalCliConfig } from './external.js'; +import { formatExternalCliLabel, installExternalCli, parseCommand, type ExternalCliConfig } from './external.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -61,6 +61,16 @@ describe('parseCommand', () => { }); }); +describe('formatExternalCliLabel', () => { + it('shows the package name when the executable name differs', () => { + expect(formatExternalCliLabel({ name: 'wx', binary: 'wx', package: 'wx-cli' })).toBe('wx(wx-cli)'); + }); + + it('keeps the label compact when package and name match', () => { + expect(formatExternalCliLabel({ name: 'docker', binary: 'docker', package: 'docker' })).toBe('docker'); + }); +}); + describe('installExternalCli', () => { const cli: ExternalCliConfig = { name: 'readwise', diff --git a/src/external.ts b/src/external.ts index 0a2e8dc73..5b168e84e 100644 --- a/src/external.ts +++ b/src/external.ts @@ -17,8 +17,11 @@ export interface ExternalCliInstall { } export interface ExternalCliConfig { + /** User-facing OpenCLI subcommand and, by default, the executable name. */ name: string; binary: string; + /** Distribution/project name when it differs from the executable name. */ + package?: string; description?: string; homepage?: string; tags?: string[]; @@ -86,6 +89,10 @@ export function getInstallCmd(installConfig?: ExternalCliInstall): string | null return null; } +export function formatExternalCliLabel(cli: ExternalCliConfig): string { + return cli.package && cli.package !== cli.name ? `${cli.name}(${cli.package})` : cli.name; +} + /** * Safely parses a command string into a binary and argument list. * Rejects commands containing shell operators (&&, ||, |, ;, >, <, `) that diff --git a/src/help.test.ts b/src/help.test.ts index 95fc21758..3637fb685 100644 --- a/src/help.test.ts +++ b/src/help.test.ts @@ -25,13 +25,17 @@ describe('classifyAdapter', () => { describe('formatRootAdapterHelpText', () => { it('renders all three sections in External / App / Site order when populated', () => { const text = formatRootAdapterHelpText({ - external: ['gh', 'docker'], + external: [ + { name: 'gh', label: 'gh' }, + { name: 'wx', label: 'wx(wx-cli)' }, + ], apps: ['chatwise', 'codex'], sites: ['bilibili'], }); expect(text).toContain('External CLIs (2):'); expect(text).toContain('App adapters (2):'); expect(text).toContain('Site adapters (1):'); + expect(text).toContain('wx(wx-cli)'); expect(text.indexOf('External CLIs')).toBeLessThan(text.indexOf('App adapters')); expect(text.indexOf('App adapters')).toBeLessThan(text.indexOf('Site adapters')); }); diff --git a/src/help.ts b/src/help.ts index 418a82263..e1281550a 100644 --- a/src/help.ts +++ b/src/help.ts @@ -147,13 +147,18 @@ export function classifyAdapter(domain: string | undefined): AdapterKind { export interface RootAdapterGroups { /** Externally-registered CLIs (docker, gh, vercel, ...) — passthrough binaries */ - external: readonly string[]; + external: readonly RootExternalCli[]; /** Desktop-app adapters (chatgpt-app, chatwise, codex, ...) */ apps: readonly string[]; /** Web-site adapters (bilibili, dianping, ...) */ sites: readonly string[]; } +export interface RootExternalCli { + name: string; + label: string; +} + function formatGroupSection(label: string, names: readonly string[]): string[] { if (names.length === 0) return []; return [ @@ -167,7 +172,7 @@ export function formatRootAdapterHelpText(groups: RootAdapterGroups): string { const total = groups.external.length + groups.apps.length + groups.sites.length; if (total === 0) return ''; const lines: string[] = ['']; - lines.push(...formatGroupSection('External CLIs', groups.external)); + lines.push(...formatGroupSection('External CLIs', groups.external.map(cli => cli.label))); lines.push(...formatGroupSection('App adapters', groups.apps)); lines.push(...formatGroupSection('Site adapters', groups.sites)); lines.push("Run 'opencli list' for full command details, or 'opencli --help' to inspect one site."); @@ -468,7 +473,7 @@ function compactCommand(cmd: CliCommand): Record { } export function rootHelpData(program: Command, groups: RootAdapterGroups): Record { - const adapterNames = new Set([...groups.external, ...groups.apps, ...groups.sites]); + const adapterNames = new Set([...groups.external.map(cli => cli.name), ...groups.apps, ...groups.sites]); const commands = program.commands .filter(command => !adapterNames.has(command.name())) .map(command => ({ @@ -483,7 +488,8 @@ export function rootHelpData(program: Command, groups: RootAdapterGroups): Recor commands, external_clis: { count: groups.external.length, - clis: [...groups.external].sort(sortLocale), + clis: groups.external.map(cli => cli.name).sort(sortLocale), + display: groups.external.map(cli => cli.label).sort(sortLocale), }, app_adapters: { count: groups.apps.length, From 8c88a3cbf3e02a1ce71bacbd16221cb23f424d1d Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 16:33:19 +0800 Subject: [PATCH 09/37] chore(release): 1.7.20 (#1562) --- CHANGELOG.md | 21 ++++++++++++++++++--- package.json | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e7714817..7fa84569c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,14 +1,29 @@ # Changelog -## Unreleased +## [1.7.20](https://github.com/jackwener/opencli/compare/v1.7.19...v1.7.20) (2026-05-14) + +External CLI surface cleanup + Browser Bridge WebSocket lifecycle hardening. Two BREAKING changes around external CLIs: built-in `tg`/`discord`/`wx` (was `tg-cli`/`discord-cli`/`wx-cli`) now match their real binary names, and Notion's in-tree CDP adapter is replaced by the official `ntn` external CLI. ### ⚠ BREAKING CHANGES -* **notion** — remove the in-tree `clis/notion/` CDP-on-Desktop adapter (8 commands: status / search / read / new / write / sidebar / favorites / export). Notion has shipped an official CLI at , which is registered as a first-class external CLI in `external-clis.yaml`. Migration: install `ntn` from (`curl -fsSL https://ntn.dev | bash`), then use `opencli ntn `. Auto-install is intentionally not configured because the official installer is a shell script while OpenCLI external installs run shell-free command strings. The official CLI uses the public Notion API rather than reverse-engineering the Desktop UI, so it survives Notion app updates and exposes a wider command surface (blocks / databases / properties / comments) than the reverse-engineered adapter could. +* **notion** — remove the in-tree `clis/notion/` CDP-on-Desktop adapter (8 commands: `status` / `search` / `read` / `new` / `write` / `sidebar` / `favorites` / `export`). Notion has shipped an official CLI at , registered as a first-class external CLI in `external-clis.yaml`. Migration: install `ntn` from (`curl -fsSL https://ntn.dev | bash`), then use `opencli ntn `. Auto-install is intentionally not configured because the official installer is a shell script while OpenCLI external installs run shell-free command strings. The official CLI uses the public Notion API rather than reverse-engineering the Desktop UI, so it survives Notion app updates and exposes a wider command surface (blocks / databases / properties / comments) than the reverse-engineered adapter could. ([#1559](https://github.com/jackwener/opencli/issues/1559)) +* **external** — drop the `-cli` suffix from built-in external CLI subcommand names. `opencli tg-cli`, `opencli discord-cli`, `opencli wx-cli` are now `opencli tg`, `opencli discord`, `opencli wx`, matching the real binary names that those tools install as. Root help still shows the package lineage as `tg(tg-cli)` / `discord(discord-cli)` / `wx(wx-cli)`. ([#1544](https://github.com/jackwener/opencli/issues/1544)) + +### Features + +* **twitter** — `bookmarks` and `bookmark-folder` now include media via `extractMedia`, reaching parity with `timeline` / `search`. ([#1555](https://github.com/jackwener/opencli/issues/1555)) +* **twitter/list-tweets** — include media via `extractMedia` (parity with `timeline` / `search`). ([#1464](https://github.com/jackwener/opencli/issues/1464)) ### Bug Fixes -* **external** — distinguish external CLI executable names from distribution/project names in root help. Built-in aliases such as `tg`, `discord`, and `wx` remain the callable `opencli ...` entrypoints while help renders `tg(tg-cli)`, `discord(discord-cli)`, and `wx(wx-cli)` to show their package lineage. +* **daemon** — report ambiguous browser command outcomes with a distinct `command_result_unknown` errorCode and `503` when the extension WebSocket drops between command dispatch and result delivery. `sendCommandRaw()` treats this code as hard non-retryable, so write-side commands (`navigate` / `click` / `type` / `eval`) won't be silently re-issued and double-executed. Daemon exposes a `commandResultUnknown` counter on `/status` for future observability. ([#1558](https://github.com/jackwener/opencli/issues/1558)) +* **extension** — keep active daemon WebSocket; stale sockets no longer clobber active connection (`onopen` / `onclose` / `onmessage` are all gated by `ws !== thisWs` short-circuit), and `safeSend` only fires when `readyState === OPEN`. ([#1540](https://github.com/jackwener/opencli/issues/1540)) +* **extension** — coalesce concurrent daemon WebSocket connects via an in-flight promise. Startup / keepalive / reconnect triggering `connect()` during the daemon-probe or context-lookup async gap no longer creates duplicate real WebSocket connections. ([#1554](https://github.com/jackwener/opencli/issues/1554)) +* **external** — distinguish external CLI executable names from distribution/project names in root help. Built-in aliases such as `tg`, `discord`, `wx` remain the callable `opencli ...` entrypoints while help renders `tg(tg-cli)`, `discord(discord-cli)`, `wx(wx-cli)` to show their package lineage. ([#1560](https://github.com/jackwener/opencli/issues/1560)) + +### Docs + +* **browser** — clarify named session lifecycle in the Browser Bridge guide. ([#1542](https://github.com/jackwener/opencli/issues/1542)) ## [1.7.19](https://github.com/jackwener/opencli/compare/v1.7.18...v1.7.19) (2026-05-14) diff --git a/package.json b/package.json index 42be46e1d..1a06f9d37 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@jackwener/opencli", - "version": "1.7.19", + "version": "1.7.20", "publishConfig": { "access": "public" }, From 9c25bc70095ea9a1e9bf36426ee2e8af5d6ebad1 Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 16:45:00 +0800 Subject: [PATCH 10/37] fix(ci): add Windows native binding lock entries (#1563) --- package-lock.json | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/package-lock.json b/package-lock.json index f28afa6b6..be94be86e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@jackwener/opencli", - "version": "1.7.18", + "version": "1.7.20", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@jackwener/opencli", - "version": "1.7.18", + "version": "1.7.20", "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { @@ -732,6 +732,23 @@ "glibc" ] }, + "node_modules/@rolldown/binding-win32-x64-msvc": { + "version": "1.0.0-rc.15", + "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.15.tgz", + "integrity": "sha512-3P2A8L+x75qavWLe/Dll3EYBJLQmtkJN8rfh+U/eR3MqMgL/h98PhYI+JFfXuDPgPeCB7iZAKiqii5vqOvnA0g==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, "node_modules/@rolldown/pluginutils": { "version": "1.0.0-rc.15", "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.15.tgz", @@ -770,6 +787,20 @@ "glibc" ] }, + "node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.59.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.59.0.tgz", + "integrity": "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, "node_modules/@shikijs/core": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/@shikijs/core/-/core-2.5.0.tgz", From 5af2ff1d6c16400905e3a6bbfe588cde28d8789c Mon Sep 17 00:00:00 2001 From: Iris Chen Date: Thu, 14 May 2026 02:05:58 -0700 Subject: [PATCH 11/37] fix(xiaohongshu,rednote): unwrap page.evaluate envelope in search adapter (#1561) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(xiaohongshu,rednote): unwrap page.evaluate envelope in search adapter `page.evaluate(...)` returns a `{ session, data }` envelope rather than the raw IIFE return value, but the search adapters were calling `Array.isArray(payload)` directly on the envelope. `Array.isArray` is always false on the envelope, so every search result was silently dropped — status=success, exit 0, empty array, no error. The rednote adapter had this same bug; both share `buildSearchExtractJs` from `xiaohongshu/search.js`. Introduces `unwrapEvaluateResult(payload)` as a shared helper in `clis/xiaohongshu/search.js` (re-exported via the existing import line from `rednote/search.js`). The helper is a defensive ternary: it unwraps when payload looks like an envelope with an array `.data`, otherwise it passes the value through unchanged. This keeps the change back-compat with bridge versions that return the raw value, and preserves the existing `Array.isArray(payload)` typecheck at each call site. Verified manually against `opencli xiaohongshu search "补墙洞"` (a query known to return 20+ results in a logged-in browser tab): previously `[]`, now returns the expected ranked rows with all declared columns (`rank, title, author, likes, published_at, url`) populated. Adds 5 unit tests for `unwrapEvaluateResult` covering raw array passthrough, envelope unwrap, non-envelope object passthrough, null/undefined safety, and the "data is not an array" guard. The existing 19 search tests in `clis/xiaohongshu/search.test.js` still pass — the unwrap is invisible to the existing mocks which already return raw arrays. * fix(xhs): unwrap search evaluate envelopes --------- Co-authored-by: jackwener --- clis/rednote/rednote.test.js | 65 ++++++++++++++++++++++++++++ clis/rednote/search.js | 16 ++++--- clis/xiaohongshu/search.js | 50 ++++++++++++++------- clis/xiaohongshu/search.test.js | 77 ++++++++++++++++++++++++++++----- 4 files changed, 176 insertions(+), 32 deletions(-) diff --git a/clis/rednote/rednote.test.js b/clis/rednote/rednote.test.js index 1462ff703..dddf0ef4e 100644 --- a/clis/rednote/rednote.test.js +++ b/clis/rednote/rednote.test.js @@ -31,6 +31,14 @@ function createPageMock(evaluateResult) { getCookies: vi.fn().mockResolvedValue([{ name: 'sid', value: 'secret', domain: 'www.rednote.com' }]), }; } +function createSearchPageMock(evaluateResults) { + const page = createPageMock(undefined); + page.evaluate = vi.fn(); + for (const result of evaluateResults) { + page.evaluate.mockResolvedValueOnce(result); + } + return page; +} describe('rednote note URL identity', () => { const download = getRegistry().get('rednote/download'); @@ -130,6 +138,63 @@ describe('rednote argument validation', () => { }); }); +describe('rednote search browser-bridge envelopes', () => { + const search = getRegistry().get('rednote/search'); + + it('unwraps login-wall wait result envelopes before auth handling', async () => { + const page = createSearchPageMock([ + { session: 'site:rednote', data: 'login_wall' }, + ]); + + await expect(search.func(page, { query: 'tesla', limit: 5 })).rejects.toMatchObject({ + code: 'AUTH_REQUIRED', + message: expect.stringContaining('blocked behind a login wall'), + }); + expect(page.evaluate).toHaveBeenCalledTimes(1); + }); + + it('unwraps search extraction envelopes and preserves rednote row shape', async () => { + const url = 'https://www.rednote.com/search_result/68e90be80000000004022e66?xsec_token=test-token'; + const page = createSearchPageMock([ + 'content', + 1, + { + session: 'site:rednote', + data: [{ + title: 'rednote result', + author: 'author', + likes: '12', + url, + author_url: 'https://www.rednote.com/user/profile/u1', + }], + }, + ]); + + await expect(search.func(page, { query: 'tesla', limit: 1 })).resolves.toEqual([{ + rank: 1, + title: 'rednote result', + author: 'author', + likes: '12', + published_at: '2025-10-10', + url, + author_url: 'https://www.rednote.com/user/profile/u1', + }]); + }); + + it('fails typed instead of silently returning [] for malformed extraction payloads', async () => { + const page = createSearchPageMock([ + 'content', + 1, + { session: 'site:rednote', data: { rows: [] } }, + ]); + + await expect(search.func(page, { query: 'tesla', limit: 1 })).rejects.toMatchObject({ + code: 'COMMAND_EXEC', + message: expect.stringContaining('payload shape'), + }); + }); +}); + describe('rednote Pinia store failures', () => { it('maps feed store read failure to CommandExecutionError', async () => { const command = getRegistry().get('rednote/feed'); diff --git a/clis/rednote/search.js b/clis/rednote/search.js index 2dff8d75c..d4706d307 100644 --- a/clis/rednote/search.js +++ b/clis/rednote/search.js @@ -6,8 +6,8 @@ * 1:1 comparison between the two frontends. */ import { cli, Strategy } from '@jackwener/opencli/registry'; -import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors'; -import { buildScrollUntilJs, buildSearchExtractJs, noteIdToDate } from '../xiaohongshu/search.js'; +import { ArgumentError, AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; +import { buildScrollUntilJs, buildSearchExtractJs, noteIdToDate, unwrapEvaluateResult } from '../xiaohongshu/search.js'; function parseLimit(raw) { const parsed = Number(raw); @@ -19,6 +19,13 @@ function parseLimit(raw) { } return parsed; } +function requireSearchRows(payload) { + const rows = unwrapEvaluateResult(payload); + if (!Array.isArray(rows)) { + throw new CommandExecutionError('Unexpected Rednote search extraction payload shape; expected an array of rows.'); + } + return rows; +} /** * Wait for search results or login wall using MutationObserver (max 5s). @@ -78,7 +85,7 @@ cli({ const limit = parseLimit(kwargs.limit ?? 20); const keyword = encodeURIComponent(kwargs.query); await page.goto(`https://www.rednote.com/search_result?keyword=${keyword}&source=web_search_result_notes`); - const waitResult = await page.evaluate(WAIT_FOR_CONTENT_JS); + const waitResult = unwrapEvaluateResult(await page.evaluate(WAIT_FOR_CONTENT_JS)); if (waitResult === 'login_wall') { throw new AuthRequiredError('www.rednote.com', 'Rednote search results are blocked behind a login wall'); } @@ -87,8 +94,7 @@ cli({ // `autoScroll({ times: 2 })` capped extraction at ~13 notes regardless // of `--limit`. await page.evaluate(buildScrollUntilJs(limit)); - const payload = await page.evaluate(buildSearchExtractJs('www.rednote.com')); - const data = Array.isArray(payload) ? payload : []; + const data = requireSearchRows(await page.evaluate(buildSearchExtractJs('www.rednote.com'))); return data .filter((item) => item.title) .slice(0, limit) diff --git a/clis/xiaohongshu/search.js b/clis/xiaohongshu/search.js index b578275dc..a56475bad 100644 --- a/clis/xiaohongshu/search.js +++ b/clis/xiaohongshu/search.js @@ -6,7 +6,7 @@ * Ref: https://github.com/jackwener/opencli/issues/10 */ import { cli, Strategy } from '@jackwener/opencli/registry'; -import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors'; +import { ArgumentError, AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; /** * Wait for search results or login wall using MutationObserver (max 5s). * Returns 'content' if note items appeared, 'login_wall' if login gate @@ -60,6 +60,26 @@ export function stripXhsAuthorDateSuffix(value) { const stripped = text.replace(/\s*(?:\d{1,2}天前|\d+小时前|\d+分钟前|\d+秒前|刚刚|昨天|前天|\d+周前|\d+个月前|\d{1,2}-\d{1,2}|\d{4}-\d{1,2}-\d{1,2})$/u, '').trim(); return stripped || text; } +/** + * `page.evaluate` may return either the raw IIFE value or a + * `{ session, data }` envelope depending on the browser-bridge version. + * Adapter code that called `Array.isArray(payload)` directly on the + * envelope silently received [] for every search. This helper normalizes + * both shapes so callers can keep their Array.isArray checks unchanged. + */ +export function unwrapEvaluateResult(payload) { + if (payload && !Array.isArray(payload) && typeof payload === 'object' && 'session' in payload && 'data' in payload) { + return payload.data; + } + return payload; +} +function requireSearchRows(payload, phase) { + const rows = unwrapEvaluateResult(payload); + if (!Array.isArray(rows)) { + throw new CommandExecutionError(`Unexpected Xiaohongshu search ${phase} payload shape; expected an array of rows.`); + } + return rows; +} export function parseLimit(raw) { const parsed = Number(raw ?? 20); if (!Number.isFinite(parsed) || !Number.isInteger(parsed)) { @@ -267,7 +287,7 @@ export const command = cli({ // Wait for search results to render (or login wall to appear). // Uses MutationObserver to resolve as soon as content appears, // instead of a fixed delay + blind retry. - const waitResult = await page.evaluate(WAIT_FOR_CONTENT_JS); + const waitResult = unwrapEvaluateResult(await page.evaluate(WAIT_FOR_CONTENT_JS)); if (waitResult === 'login_wall') { throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall'); } @@ -275,25 +295,23 @@ export const command = cli({ // layout, so scrolling to the bottom can evict the initially visible // note cards from the DOM and make extraction return [] even though the // browser rendered results correctly. - const initialPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com')); - let payload = Array.isArray(initialPayload) ? initialPayload : []; + const initialPayload = requireSearchRows(await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com')), 'initial extraction'); + const payload = [...initialPayload]; if (payload.length < limit) { // Scroll until enough rows are rendered or the lazy-load plateaus. // Replaces the previous fixed `autoScroll({ times: 2 })` which capped // extraction at ~13 notes regardless of `--limit` (#1471). await page.evaluate(buildScrollUntilJs(limit)); - const scrolledPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com')); - if (Array.isArray(scrolledPayload)) { - const seen = new Set(payload.map((item) => item.url).filter(Boolean)); - for (const item of scrolledPayload) { - if (item?.url && seen.has(item.url)) - continue; - if (item?.url) - seen.add(item.url); - payload.push(item); - if (payload.length >= limit) - break; - } + const scrolledPayload = requireSearchRows(await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com')), 'post-scroll extraction'); + const seen = new Set(payload.map((item) => item.url).filter(Boolean)); + for (const item of scrolledPayload) { + if (item?.url && seen.has(item.url)) + continue; + if (item?.url) + seen.add(item.url); + payload.push(item); + if (payload.length >= limit) + break; } } const data = payload; diff --git a/clis/xiaohongshu/search.test.js b/clis/xiaohongshu/search.test.js index 578dd674a..88b96ab67 100644 --- a/clis/xiaohongshu/search.test.js +++ b/clis/xiaohongshu/search.test.js @@ -1,7 +1,7 @@ import { describe, expect, it, vi } from 'vitest'; import { getRegistry } from '@jackwener/opencli/registry'; import { JSDOM } from 'jsdom'; -import { __test__, buildScrollUntilJs, noteIdToDate } from './search.js'; +import { __test__, buildScrollUntilJs, noteIdToDate, unwrapEvaluateResult } from './search.js'; function markVisible(el) { el.getBoundingClientRect = () => ({ width: 100, height: 100 }); @@ -57,24 +57,37 @@ describe('xiaohongshu search', () => { expect(page.evaluate).toHaveBeenCalledTimes(1); expect(page.autoScroll).not.toHaveBeenCalled(); }); + it('unwraps a browser-bridge envelope before handling login-wall wait result', async () => { + const cmd = getRegistry().get('xiaohongshu/search'); + const page = createPageMock([ + { session: 'site:xiaohongshu', data: 'login_wall' }, + ]); + + await expect(cmd.func(page, { query: '特斯拉', limit: 5 })).rejects.toMatchObject({ + code: 'AUTH_REQUIRED', + message: expect.stringContaining('blocked behind a login wall'), + }); + expect(page.evaluate).toHaveBeenCalledTimes(1); + }); it('returns ranked results with search_result url and author_url preserved', async () => { const cmd = getRegistry().get('xiaohongshu/search'); expect(cmd?.func).toBeTypeOf('function'); const detailUrl = 'https://www.xiaohongshu.com/search_result/68e90be80000000004022e66?xsec_token=test-token&xsec_source='; const authorUrl = 'https://www.xiaohongshu.com/user/profile/635a9c720000000018028b40?xsec_token=user-token&xsec_source=pc_search'; + const rows = [ + { + title: '某鱼买FSD被坑了4万', + author: '随风', + likes: '261', + url: detailUrl, + author_url: authorUrl, + }, + ]; const page = createPageMock([ // First evaluate: MutationObserver wait (content appeared) 'content', - // Second evaluate: initial DOM extraction (already enough results) - [ - { - title: '某鱼买FSD被坑了4万', - author: '随风', - likes: '261', - url: detailUrl, - author_url: authorUrl, - }, - ], + // Second evaluate: initial DOM extraction (already enough results) through Browser Bridge envelope. + { session: 'site:xiaohongshu', data: rows }, ]); const result = await cmd.func(page, { query: '特斯拉', limit: 1 }); // Should only do one goto (the search page itself), no per-note detail navigation @@ -91,6 +104,18 @@ describe('xiaohongshu search', () => { }, ]); }); + it('fails typed instead of silently returning [] for malformed extraction payloads', async () => { + const cmd = getRegistry().get('xiaohongshu/search'); + const page = createPageMock([ + 'content', + { session: 'site:xiaohongshu', data: { rows: [] } }, + ]); + + await expect(cmd.func(page, { query: '测试', limit: 1 })).rejects.toMatchObject({ + code: 'COMMAND_EXEC', + message: expect.stringContaining('payload shape'), + }); + }); it('filters out results with no title and respects the limit', async () => { const cmd = getRegistry().get('xiaohongshu/search'); expect(cmd?.func).toBeTypeOf('function'); @@ -135,6 +160,10 @@ describe('xiaohongshu search', () => { 'content', // Second evaluate: initial extraction (no rows rendered) [], + // Third evaluate: scroll-until row count + 0, + // Fourth evaluate: post-scroll extraction (still no rows) + [], ]); const result = (await cmd.func(page, { query: '测试等待', limit: 5 })); expect(result).toHaveLength(0); @@ -268,3 +297,29 @@ describe('noteIdToDate (ObjectID timestamp parsing)', () => { expect(noteIdToDate('https://www.xiaohongshu.com/search_result/000000000000000000000000')).toBe(''); }); }); +describe('unwrapEvaluateResult (browser-bridge envelope normalization)', () => { + it('returns the raw array unchanged when payload is already an array', () => { + const arr = [{ title: 'a' }, { title: 'b' }]; + expect(unwrapEvaluateResult(arr)).toBe(arr); + }); + it('unwraps { session, data: [...] } envelope to the inner array', () => { + const arr = [{ title: 'a' }]; + const env = { session: 'site:xiaohongshu:abc', data: arr }; + expect(unwrapEvaluateResult(env)).toBe(arr); + }); + it('unwraps primitive data from Browser Bridge envelopes', () => { + expect(unwrapEvaluateResult({ session: 'site:xiaohongshu:abc', data: 'login_wall' })).toBe('login_wall'); + }); + it('passes non-envelope objects through unchanged', () => { + const obj = { results: [], loginWall: true }; + expect(unwrapEvaluateResult(obj)).toBe(obj); + }); + it('handles null and undefined safely', () => { + expect(unwrapEvaluateResult(null)).toBe(null); + expect(unwrapEvaluateResult(undefined)).toBe(undefined); + }); + it('unwraps non-array envelope data so callers can validate the payload shape', () => { + const env = { session: 'x', data: { not: 'an array' } }; + expect(unwrapEvaluateResult(env)).toEqual({ not: 'an array' }); + }); +}); From 16b02bcc58d36db39e2ea7304072a233c1ad328d Mon Sep 17 00:00:00 2001 From: "J.Chen" Date: Thu, 14 May 2026 17:15:39 +0800 Subject: [PATCH 12/37] fix(extension): reuse existing adapter tab group (#1541) * fix(extension): reuse existing adapter tab group * fix(extension): choose best existing adapter group --------- Co-authored-by: Jeff Chen Co-authored-by: jackwener --- extension/dist/background.js | 87 ++++++++++++++-- extension/src/background.test.ts | 174 ++++++++++++++++++++++++++++++- extension/src/background.ts | 104 ++++++++++++++++-- 3 files changed, 343 insertions(+), 22 deletions(-) diff --git a/extension/dist/background.js b/extension/dist/background.js index e9c7f481b..d569209de 100644 --- a/extension/dist/background.js +++ b/extension/dist/background.js @@ -787,6 +787,7 @@ const CONTAINER_TAB_GROUP_TITLE = { interactive: "OpenCLI Browser", automation: "OpenCLI Adapter" }; +const LEGACY_AUTOMATION_TAB_GROUP_TITLE = "OpenCLI"; const AUTOMATION_TAB_GROUP_COLOR = "orange"; let leaseMutationQueue = Promise.resolve(); const ownedContainers = { @@ -1022,11 +1023,70 @@ async function getOwnedContainerGroupId(role, windowId) { } container.groupId = null; } - const groups = await chrome.tabGroups.query({ windowId, title: CONTAINER_TAB_GROUP_TITLE[role] }); - const existing = groups[0]; - if (!existing) return null; - container.groupId = existing.id; - return existing.id; + for (const title of getOwnedContainerGroupTitles(role)) { + const groups = await chrome.tabGroups.query({ windowId, title }); + const existing = groups[0]; + if (existing) { + container.groupId = existing.id; + return existing.id; + } + } + return null; +} +function getOwnedContainerGroupTitles(role) { + return role === "automation" ? [CONTAINER_TAB_GROUP_TITLE.automation, LEGACY_AUTOMATION_TAB_GROUP_TITLE] : [CONTAINER_TAB_GROUP_TITLE.interactive]; +} +async function focusOwnedWindowIfRequested(windowId, mode) { + if (mode !== "foreground") return; + const updateWindow = chrome.windows.update; + if (typeof updateWindow === "function") await updateWindow(windowId, { focused: true }).catch(() => { + }); +} +async function toOwnedContainerDiscoveryCandidate(group) { + try { + const chromeWindow = await chrome.windows.get(group.windowId); + const reusableTabId = await findReusableOwnedContainerTab(group.windowId); + return { + windowId: group.windowId, + groupId: group.id, + focused: !!chromeWindow.focused, + hasReusableTab: reusableTabId !== void 0 + }; + } catch { + return null; + } +} +function selectOwnedContainerDiscoveryCandidate(candidates) { + if (candidates.length === 0) return null; + return [...candidates].sort((a, b) => { + if (a.focused !== b.focused) return a.focused ? -1 : 1; + if (a.hasReusableTab !== b.hasReusableTab) return a.hasReusableTab ? -1 : 1; + return a.groupId - b.groupId; + })[0]; +} +async function discoverOwnedContainerFromTabGroup(role) { + const container = ownedContainers[role]; + if (container.groupId !== null) { + try { + const group = await chrome.tabGroups.get(container.groupId); + await chrome.windows.get(group.windowId); + container.windowId = group.windowId; + return { windowId: group.windowId, groupId: group.id }; + } catch { + container.windowId = null; + container.groupId = null; + } + } + for (const title of getOwnedContainerGroupTitles(role)) { + const groups = await chrome.tabGroups.query({ title }); + const candidates = (await Promise.all(groups.map(toOwnedContainerDiscoveryCandidate))).filter((candidate) => candidate !== null); + const selected = selectOwnedContainerDiscoveryCandidate(candidates); + if (!selected) continue; + container.windowId = selected.windowId; + container.groupId = selected.groupId; + return { windowId: selected.windowId, groupId: selected.groupId }; + } + return null; } async function ensureOwnedContainerTabGroup(role, windowId, tabIds) { const ids = [...new Set(tabIds.filter((id) => id !== void 0))]; @@ -1066,11 +1126,7 @@ async function ensureOwnedContainerWindowUnlocked(role, initialUrl, mode = "back if (container.windowId !== null) { try { await chrome.windows.get(container.windowId); - if (mode === "foreground") { - const updateWindow = chrome.windows.update; - if (typeof updateWindow === "function") await updateWindow(container.windowId, { focused: true }).catch(() => { - }); - } + await focusOwnedWindowIfRequested(container.windowId, mode); const initialTabId2 = await findReusableOwnedContainerTab(container.windowId); await ensureOwnedContainerTabGroup(role, container.windowId, [initialTabId2]); return { @@ -1082,6 +1138,17 @@ async function ensureOwnedContainerWindowUnlocked(role, initialUrl, mode = "back container.groupId = null; } } + const discovered = await discoverOwnedContainerFromTabGroup(role); + if (discovered) { + await focusOwnedWindowIfRequested(discovered.windowId, mode); + const initialTabId2 = await findReusableOwnedContainerTab(discovered.windowId); + await ensureOwnedContainerTabGroup(role, discovered.windowId, [initialTabId2]); + await persistRuntimeState(); + return { + windowId: discovered.windowId, + initialTabId: initialTabId2 + }; + } const startUrl = initialUrl && isSafeNavigationUrl(initialUrl) ? initialUrl : BLANK_PAGE; const win = await chrome.windows.create({ url: startUrl, diff --git a/extension/src/background.test.ts b/extension/src/background.test.ts index 5a8182cf2..7fdac5f97 100644 --- a/extension/src/background.test.ts +++ b/extension/src/background.test.ts @@ -176,7 +176,7 @@ function createChromeMock() { onEvent: { addListener: vi.fn() } as Listener<(source: any, method: string, params: any) => void>, }, windows: { - get: vi.fn(async (windowId: number) => ({ id: windowId })), + get: vi.fn(async (windowId: number) => ({ id: windowId, focused: windowId === lastFocusedWindowId })), create: vi.fn(async ({ url, focused, width, height, type }: any) => ({ id: 1, url, focused, width, height, type })), remove: vi.fn(async (_windowId: number) => {}), onRemoved: { addListener: vi.fn() } as Listener<(windowId: number) => void>, @@ -205,7 +205,15 @@ function createChromeMock() { }, }; - return { chrome, tabs, groups, query, create, update }; + return { + chrome, + tabs, + groups, + query, + create, + update, + setLastFocusedWindowId: (windowId: number) => { lastFocusedWindowId = windowId; }, + }; } describe('background tab isolation', () => { @@ -1021,7 +1029,7 @@ describe('background tab isolation', () => { const { chrome } = createChromeMock(); chrome.windows.get = vi.fn(async (windowId: number) => { if (windowId === 90 || windowId === 91) throw new Error(`stale window ${windowId}`); - return { id: windowId }; + return { id: windowId, focused: false }; }); vi.stubGlobal('chrome', chrome); @@ -1163,6 +1171,166 @@ describe('background tab isolation', () => { expect(chrome.tabGroups.update).not.toHaveBeenCalled(); }); + it('discovers and reuses an existing OpenCLI Adapter group in another window before creating one', async () => { + const { chrome, tabs, groups } = createChromeMock(); + tabs.push({ + id: 77, + windowId: 7, + url: 'about:blank', + title: 'blank', + active: true, + status: 'complete', + groupId: -1, + }); + groups.push({ + id: 99, + windowId: 7, + title: 'OpenCLI Adapter', + color: 'orange', + collapsed: true, + }); + vi.stubGlobal('chrome', chrome); + + const mod = await import('./background'); + const tabId = await mod.__test__.resolveTabId(undefined, adapterKey('twitter')); + + expect(tabId).toBe(77); + expect(chrome.windows.create).not.toHaveBeenCalled(); + expect(mod.__test__.getAutomationWindowId(adapterKey('twitter'))).toBe(7); + expect(tabs.find((tab) => tab.id === 77)?.groupId).toBe(99); + expect(chrome.tabs.group).toHaveBeenCalledWith({ groupId: 99, tabIds: [77] }); + expect(chrome.tabGroups.update).not.toHaveBeenCalled(); + }); + + it('prefers a focused OpenCLI Adapter group when multiple matching groups exist', async () => { + const { chrome, tabs, groups, setLastFocusedWindowId } = createChromeMock(); + setLastFocusedWindowId(8); + tabs.push({ + id: 77, + windowId: 7, + url: 'about:blank', + title: 'blank', + active: true, + status: 'complete', + groupId: -1, + }); + tabs.push({ + id: 78, + windowId: 8, + url: 'about:blank', + title: 'blank', + active: true, + status: 'complete', + groupId: -1, + }); + groups.push( + { + id: 99, + windowId: 7, + title: 'OpenCLI Adapter', + color: 'orange', + collapsed: true, + }, + { + id: 98, + windowId: 8, + title: 'OpenCLI Adapter', + color: 'orange', + collapsed: true, + }, + ); + vi.stubGlobal('chrome', chrome); + + const mod = await import('./background'); + const tabId = await mod.__test__.resolveTabId(undefined, adapterKey('twitter')); + + expect(tabId).toBe(78); + expect(chrome.windows.create).not.toHaveBeenCalled(); + expect(mod.__test__.getAutomationWindowId(adapterKey('twitter'))).toBe(8); + expect(tabs.find((tab) => tab.id === 78)?.groupId).toBe(98); + expect(chrome.tabs.group).toHaveBeenCalledWith({ groupId: 98, tabIds: [78] }); + }); + + it('prefers an OpenCLI Adapter group with a reusable debuggable tab when none are focused', async () => { + const { chrome, tabs, groups, setLastFocusedWindowId } = createChromeMock(); + setLastFocusedWindowId(2); + tabs.push({ + id: 77, + windowId: 7, + url: 'chrome://settings', + title: 'settings', + active: true, + status: 'complete', + groupId: -1, + }); + tabs.push({ + id: 78, + windowId: 8, + url: 'about:blank', + title: 'blank', + active: true, + status: 'complete', + groupId: -1, + }); + groups.push( + { + id: 97, + windowId: 7, + title: 'OpenCLI Adapter', + color: 'orange', + collapsed: true, + }, + { + id: 98, + windowId: 8, + title: 'OpenCLI Adapter', + color: 'orange', + collapsed: true, + }, + ); + vi.stubGlobal('chrome', chrome); + + const mod = await import('./background'); + const tabId = await mod.__test__.resolveTabId(undefined, adapterKey('twitter')); + + expect(tabId).toBe(78); + expect(chrome.windows.create).not.toHaveBeenCalled(); + expect(mod.__test__.getAutomationWindowId(adapterKey('twitter'))).toBe(8); + expect(tabs.find((tab) => tab.id === 78)?.groupId).toBe(98); + expect(chrome.tabs.group).toHaveBeenCalledWith({ groupId: 98, tabIds: [78] }); + }); + + it('discovers and reuses a legacy OpenCLI automation group before creating a duplicate', async () => { + const { chrome, tabs, groups } = createChromeMock(); + tabs.push({ + id: 78, + windowId: 8, + url: 'about:blank', + title: 'blank', + active: true, + status: 'complete', + groupId: -1, + }); + groups.push({ + id: 98, + windowId: 8, + title: 'OpenCLI', + color: 'orange', + collapsed: true, + }); + vi.stubGlobal('chrome', chrome); + + const mod = await import('./background'); + const tabId = await mod.__test__.resolveTabId(undefined, adapterKey('twitter')); + + expect(tabId).toBe(78); + expect(chrome.windows.create).not.toHaveBeenCalled(); + expect(mod.__test__.getAutomationWindowId(adapterKey('twitter'))).toBe(8); + expect(tabs.find((tab) => tab.id === 78)?.groupId).toBe(98); + expect(chrome.tabs.group).toHaveBeenCalledWith({ groupId: 98, tabIds: [78] }); + expect(chrome.tabGroups.update).not.toHaveBeenCalled(); + }); + it('reuses a persisted automation group id after service worker restart even if the user renamed it', async () => { const { chrome, tabs, groups } = createChromeMock(); groups.push({ diff --git a/extension/src/background.ts b/extension/src/background.ts index 2d14b023e..18d673d8b 100644 --- a/extension/src/background.ts +++ b/extension/src/background.ts @@ -235,6 +235,7 @@ const CONTAINER_TAB_GROUP_TITLE: Record = { interactive: 'OpenCLI Browser', automation: 'OpenCLI Adapter', }; +const LEGACY_AUTOMATION_TAB_GROUP_TITLE = 'OpenCLI'; const AUTOMATION_TAB_GROUP_COLOR: chrome.tabGroups.ColorEnum = 'orange'; let leaseMutationQueue: Promise = Promise.resolve(); const ownedContainers: Record { + if (mode !== 'foreground') return; + const updateWindow = (chrome.windows as unknown as { update?: (windowId: number, updateInfo: { focused?: boolean }) => Promise }).update; + if (typeof updateWindow === 'function') await updateWindow(windowId, { focused: true }).catch(() => {}); +} + +async function toOwnedContainerDiscoveryCandidate(group: chrome.tabGroups.TabGroup): Promise { + try { + const chromeWindow = await chrome.windows.get(group.windowId); + const reusableTabId = await findReusableOwnedContainerTab(group.windowId); + return { + windowId: group.windowId, + groupId: group.id, + focused: !!chromeWindow.focused, + hasReusableTab: reusableTabId !== undefined, + }; + } catch { + // Ignore stale browser-session group/window state and keep looking. + return null; + } +} + +function selectOwnedContainerDiscoveryCandidate(candidates: OwnedContainerDiscoveryCandidate[]): OwnedContainerDiscoveryCandidate | null { + if (candidates.length === 0) return null; + return [...candidates].sort((a, b) => { + if (a.focused !== b.focused) return a.focused ? -1 : 1; + if (a.hasReusableTab !== b.hasReusableTab) return a.hasReusableTab ? -1 : 1; + return a.groupId - b.groupId; + })[0]; +} + +async function discoverOwnedContainerFromTabGroup(role: OwnedWindowRole): Promise<{ windowId: number; groupId: number } | null> { + const container = ownedContainers[role]; + if (container.groupId !== null) { + try { + const group = await chrome.tabGroups.get(container.groupId); + await chrome.windows.get(group.windowId); + container.windowId = group.windowId; + return { windowId: group.windowId, groupId: group.id }; + } catch { + container.windowId = null; + container.groupId = null; + } + } + + for (const title of getOwnedContainerGroupTitles(role)) { + const groups = await chrome.tabGroups.query({ title }); + const candidates = (await Promise.all(groups.map(toOwnedContainerDiscoveryCandidate))) + .filter((candidate): candidate is OwnedContainerDiscoveryCandidate => candidate !== null); + const selected = selectOwnedContainerDiscoveryCandidate(candidates); + if (!selected) continue; + container.windowId = selected.windowId; + container.groupId = selected.groupId; + return { windowId: selected.windowId, groupId: selected.groupId }; + } + + return null; } async function ensureOwnedContainerTabGroup(role: OwnedWindowRole, windowId: number, tabIds: Array): Promise { @@ -591,10 +668,7 @@ async function ensureOwnedContainerWindowUnlocked( if (container.windowId !== null) { try { await chrome.windows.get(container.windowId); - if (mode === 'foreground') { - const updateWindow = (chrome.windows as unknown as { update?: (windowId: number, updateInfo: { focused?: boolean }) => Promise }).update; - if (typeof updateWindow === 'function') await updateWindow(container.windowId, { focused: true }).catch(() => {}); - } + await focusOwnedWindowIfRequested(container.windowId, mode); const initialTabId = await findReusableOwnedContainerTab(container.windowId); await ensureOwnedContainerTabGroup(role, container.windowId, [initialTabId]); return { @@ -607,6 +681,18 @@ async function ensureOwnedContainerWindowUnlocked( } } + const discovered = await discoverOwnedContainerFromTabGroup(role); + if (discovered) { + await focusOwnedWindowIfRequested(discovered.windowId, mode); + const initialTabId = await findReusableOwnedContainerTab(discovered.windowId); + await ensureOwnedContainerTabGroup(role, discovered.windowId, [initialTabId]); + await persistRuntimeState(); + return { + windowId: discovered.windowId, + initialTabId, + }; + } + const startUrl = (initialUrl && isSafeNavigationUrl(initialUrl)) ? initialUrl : BLANK_PAGE; // Note: Do NOT set `state` parameter here. Chrome 146+ rejects 'normal' as an invalid From edfa5f0da3c224365fee5f468e1e490c1b73ca8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=83=A1=E5=A4=A7=E5=A4=B4?= Date: Thu, 14 May 2026 17:54:34 +0800 Subject: [PATCH 13/37] feat: add DuckDuckGo, Brave, and Yahoo web search adapters (#1546) * feat: add DuckDuckGo, Brave, and Yahoo web search adapters Add three new search engine adapters with browser-based DOM extraction: - duckduckgo/search: Search DuckDuckGo via html.duckduckgo.com Supports region, time filters, and XHR-based pagination (--offset) - duckduckgo/suggest: Search suggestion autocomplete (no browser needed) - brave/search: Search Brave Search via search.brave.com Supports GET-based pagination (--offset) - yahoo/search: Search Yahoo (Bing-powered) via search.yahoo.com Supports GET-based pagination (--page) All search adapters use Strategy.PUBLIC with browser:true, navigating the target site and extracting results via page.evaluate() DOM queries. Includes full test coverage (16 tests). * fix: use clampInt from shared utils and add adapter docs - Replace Math.max/Math.min patterns with clampInt() from _shared/common.js to pass the typed-error-lint gate (4 silent-clamp violations resolved) - Add adapter documentation for duckduckgo, brave, and yahoo to fix the doc-coverage CI check - Regenerate cli-manifest.json and typed-error-lint-baseline.json * fix: avoid silent-column-drop overlap in brave/yahoo extractors Change buildExtractorJs to return arrays instead of objects whose keys matched columns. This prevents silent-column-drop audit false positives as per opencli-adapter-author conventions. * fix(search): tighten browser search adapters * chore(search): drop baseline churn * fix(duckduckgo): execute search extractor safely * fix(yahoo): reject unsafe redirect targets --------- Co-authored-by: huzekang Co-authored-by: jackwener --- cli-manifest.json | 169 ++++++++++++++++++++++++++++ clis/_shared/search-adapter.js | 70 ++++++++++++ clis/brave/search.js | 80 +++++++++++++ clis/brave/search.test.js | 76 +++++++++++++ clis/duckduckgo/search.js | 131 +++++++++++++++++++++ clis/duckduckgo/search.test.js | 128 +++++++++++++++++++++ clis/duckduckgo/suggest.js | 45 ++++++++ clis/duckduckgo/suggest.test.js | 66 +++++++++++ clis/yahoo/search.js | 92 +++++++++++++++ clis/yahoo/search.test.js | 94 ++++++++++++++++ docs/adapters/browser/brave.md | 47 ++++++++ docs/adapters/browser/duckduckgo.md | 60 ++++++++++ docs/adapters/browser/yahoo.md | 49 ++++++++ 13 files changed, 1107 insertions(+) create mode 100644 clis/_shared/search-adapter.js create mode 100644 clis/brave/search.js create mode 100644 clis/brave/search.test.js create mode 100644 clis/duckduckgo/search.js create mode 100644 clis/duckduckgo/search.test.js create mode 100644 clis/duckduckgo/suggest.js create mode 100644 clis/duckduckgo/suggest.test.js create mode 100644 clis/yahoo/search.js create mode 100644 clis/yahoo/search.test.js create mode 100644 docs/adapters/browser/brave.md create mode 100644 docs/adapters/browser/duckduckgo.md create mode 100644 docs/adapters/browser/yahoo.md diff --git a/cli-manifest.json b/cli-manifest.json index 17b77d182..7e59454e2 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -4009,6 +4009,47 @@ "sourceFile": "boss/stats.js", "navigateBefore": false }, + { + "site": "brave", + "name": "search", + "description": "Search Brave Search", + "access": "read", + "domain": "search.brave.com", + "strategy": "public", + "browser": true, + "args": [ + { + "name": "keyword", + "type": "str", + "required": true, + "positional": true, + "help": "Search query" + }, + { + "name": "limit", + "type": "int", + "default": 10, + "required": false, + "help": "Number of results per page (max 18)" + }, + { + "name": "offset", + "type": "int", + "default": 0, + "required": false, + "help": "Page offset (0, 1, 2...). Brave returns ~18 results per page" + } + ], + "columns": [ + "rank", + "title", + "url", + "snippet" + ], + "type": "js", + "modulePath": "brave/search.js", + "sourceFile": "brave/search.js" + }, { "site": "chaoxing", "name": "assignments", @@ -8704,6 +8745,93 @@ "sourceFile": "douyin/videos.js", "navigateBefore": "https://creator.douyin.com" }, + { + "site": "duckduckgo", + "name": "search", + "description": "Search DuckDuckGo", + "access": "read", + "domain": "html.duckduckgo.com", + "strategy": "public", + "browser": true, + "args": [ + { + "name": "keyword", + "type": "str", + "required": true, + "positional": true, + "help": "Search query" + }, + { + "name": "limit", + "type": "int", + "default": 10, + "required": false, + "help": "Number of results per page (1-10). For multi-page, use --offset" + }, + { + "name": "offset", + "type": "int", + "default": 0, + "required": false, + "help": "Result offset for pagination (0, 10, 20...). Uses XHR POST internally" + }, + { + "name": "region", + "type": "str", + "required": false, + "help": "Region code (e.g. jp-jp, us-en, cn-zh). Default: all regions" + }, + { + "name": "time", + "type": "str", + "required": false, + "help": "Time range: d (day), w (week), m (month), y (year)" + } + ], + "columns": [ + "rank", + "title", + "url", + "snippet", + "displayUrl", + "icon", + "resultType" + ], + "type": "js", + "modulePath": "duckduckgo/search.js", + "sourceFile": "duckduckgo/search.js" + }, + { + "site": "duckduckgo", + "name": "suggest", + "description": "DuckDuckGo search suggestions", + "access": "read", + "domain": "duckduckgo.com", + "strategy": "public", + "browser": false, + "args": [ + { + "name": "keyword", + "type": "str", + "required": true, + "positional": true, + "help": "Search query prefix" + }, + { + "name": "limit", + "type": "int", + "default": 8, + "required": false, + "help": "Max number of suggestions" + } + ], + "columns": [ + "phrase" + ], + "type": "js", + "modulePath": "duckduckgo/suggest.js", + "sourceFile": "duckduckgo/suggest.js" + }, { "site": "eastmoney", "name": "announcement", @@ -26699,6 +26827,47 @@ "sourceFile": "xueqiu/watchlist.js", "navigateBefore": "https://xueqiu.com" }, + { + "site": "yahoo", + "name": "search", + "description": "Search Yahoo (powered by Bing)", + "access": "read", + "domain": "search.yahoo.com", + "strategy": "public", + "browser": true, + "args": [ + { + "name": "keyword", + "type": "str", + "required": true, + "positional": true, + "help": "Search query" + }, + { + "name": "limit", + "type": "int", + "default": 7, + "required": false, + "help": "Number of results per page (max 7)" + }, + { + "name": "page", + "type": "int", + "default": 1, + "required": false, + "help": "Page number (1, 2, 3...). Yahoo returns ~7 results per page" + } + ], + "columns": [ + "rank", + "title", + "url", + "snippet" + ], + "type": "js", + "modulePath": "yahoo/search.js", + "sourceFile": "yahoo/search.js" + }, { "site": "yahoo-finance", "name": "quote", diff --git a/clis/_shared/search-adapter.js b/clis/_shared/search-adapter.js new file mode 100644 index 000000000..b9e985342 --- /dev/null +++ b/clis/_shared/search-adapter.js @@ -0,0 +1,70 @@ +import { ArgumentError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; + +export function requireSearchQuery(value, label = 'keyword') { + const query = String(value ?? '').trim(); + if (!query) { + throw new ArgumentError(`${label} cannot be empty`); + } + return query; +} + +export function requireBoundedInteger(value, defaultValue, min, max, label) { + const raw = value ?? defaultValue; + const parsed = typeof raw === 'number' ? raw : Number(raw); + if (!Number.isInteger(parsed)) { + throw new ArgumentError(`${label} must be an integer between ${min} and ${max}, got ${JSON.stringify(value)}`); + } + if (parsed < min || parsed > max) { + throw new ArgumentError(`${label} must be between ${min} and ${max}, got ${parsed}`); + } + return parsed; +} + +export function requireNonNegativeInteger(value, defaultValue, label) { + const raw = value ?? defaultValue; + const parsed = typeof raw === 'number' ? raw : Number(raw); + if (!Number.isInteger(parsed) || parsed < 0) { + throw new ArgumentError(`${label} must be a non-negative integer, got ${JSON.stringify(value)}`); + } + return parsed; +} + +export function unwrapBrowserResult(value) { + if (value && typeof value === 'object' && !Array.isArray(value) && 'session' in value && 'data' in value) { + return value.data; + } + return value; +} + +export function requireRows(value, label) { + const rows = unwrapBrowserResult(value); + if (!Array.isArray(rows)) { + throw new CommandExecutionError(`${label} returned an unexpected payload shape; expected an array of result rows.`); + } + return rows; +} + +export function toHttpsUrl(value, baseUrl) { + const raw = String(value ?? '').trim(); + if (!raw) return ''; + try { + const url = new URL(raw, baseUrl); + if (url.protocol !== 'http:' && url.protocol !== 'https:') return ''; + return url.href; + } catch { + return ''; + } +} + +export function emptySearchResults(site, query) { + return new EmptyResultError(`${site} search`, `No ${site} results matched "${query}".`); +} + +export async function runBrowserStep(label, fn) { + try { + return await fn(); + } catch (error) { + if (error?.code || error?.name === 'ArgumentError') throw error; + throw new CommandExecutionError(`${label} failed: ${error?.message ?? error}`); + } +} diff --git a/clis/brave/search.js b/clis/brave/search.js new file mode 100644 index 000000000..178a65b06 --- /dev/null +++ b/clis/brave/search.js @@ -0,0 +1,80 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { + emptySearchResults, + requireBoundedInteger, + requireNonNegativeInteger, + requireRows, + requireSearchQuery, + runBrowserStep, + toHttpsUrl, +} from '../_shared/search-adapter.js'; + +function buildExtractorJs(limit) { + return ` +(function() { + var results = []; + var seen = {}; + var items = document.querySelectorAll('.snippet'); + for (var i = 0; i < items.length; i++) { + if (results.length >= ${limit}) break; + var el = items[i]; + if (el.classList.contains('standalone') || el.classList.contains('ad')) continue; + var titleEl = el.querySelector('.search-snippet-title'); + var snippetEl = el.querySelector('.generic-snippet .content'); + var linkEl = el.querySelector('.result-content a'); + if (!titleEl) continue; + var title = titleEl.textContent.trim(); + var href = linkEl ? linkEl.getAttribute('href') || '' : ''; + var snippet = snippetEl ? snippetEl.textContent.trim() : ''; + if (!title || !href || seen[href]) continue; + if (href.indexOf('/') === 0) continue; + seen[href] = true; + results.push([title, href, snippet]); + } + return results; +})()`; +} + +const command = cli({ + site: 'brave', + name: 'search', + access: 'read', + description: 'Search Brave Search', + domain: 'search.brave.com', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'keyword', positional: true, required: true, help: 'Search query' }, + { name: 'limit', type: 'int', default: 10, help: 'Number of results per page (max 18)' }, + { name: 'offset', type: 'int', default: 0, help: 'Page offset (0, 1, 2...). Brave returns ~18 results per page' }, + ], + columns: ['rank', 'title', 'url', 'snippet'], + func: async (page, kwargs) => { + const limit = requireBoundedInteger(kwargs.limit, 10, 1, 18, '--limit'); + const query = requireSearchQuery(kwargs.keyword); + const keyword = encodeURIComponent(query); + const offset = requireNonNegativeInteger(kwargs.offset, 0, '--offset'); + let url = `https://search.brave.com/search?q=${keyword}`; + if (offset > 0) url += `&offset=${offset}`; + await runBrowserStep('brave search navigation', () => page.goto(url)); + try { + await page.wait({ selector: '.snippet', timeout: 10 }); + } catch { + await page.wait(3).catch(function() {}); + } + const raw = await runBrowserStep('brave search extraction', () => page.evaluate(buildExtractorJs(limit))); + const results = requireRows(raw, 'brave search'); + if (results.length === 0) { + throw emptySearchResults('Brave', query); + } + const rows = results + .map(function(r, index) { + return { rank: index + 1 + offset * 18, title: r[0], url: toHttpsUrl(r[1], 'https://search.brave.com'), snippet: r[2] }; + }) + .filter((row) => row.url); + if (rows.length === 0) throw emptySearchResults('Brave', query); + return rows; + }, +}); + +export const __test__ = { command }; diff --git a/clis/brave/search.test.js b/clis/brave/search.test.js new file mode 100644 index 000000000..66747ee0f --- /dev/null +++ b/clis/brave/search.test.js @@ -0,0 +1,76 @@ +import { describe, it, expect, vi } from 'vitest'; + +const { __test__ } = await import('./search.js'); +const command = __test__.command; + +function createPageMock(evaluateResult = []) { + return { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(evaluateResult), + }; +} + +describe('brave search', () => { + it('should register as a valid command', () => { + expect(command).toBeDefined(); + expect(command.site).toBe('brave'); + expect(command.name).toBe('search'); + expect(command.access).toBe('read'); + expect(command.browser).toBe(true); + expect(command.strategy).toBe('public'); + expect(command.domain).toBe('search.brave.com'); + }); + + it('should define keyword positional arg', () => { + const kwArg = command.args.find(a => a.name === 'keyword'); + expect(kwArg).toBeDefined(); + expect(kwArg.positional).toBe(true); + expect(kwArg.required).toBe(true); + }); + + it('should define limit arg with default 10', () => { + const limitArg = command.args.find(a => a.name === 'limit'); + expect(limitArg).toBeDefined(); + expect(limitArg.type).toBe('int'); + expect(limitArg.default).toBe(10); + }); + + it('should define output columns', () => { + expect(command.columns).toContain('rank'); + expect(command.columns).toContain('title'); + expect(command.columns).toContain('url'); + expect(command.columns).toContain('snippet'); + }); + + it('rejects empty query, invalid limit, and invalid offset before navigation', async () => { + const page = createPageMock(); + await expect(command.func(page, { keyword: '', limit: 5 })).rejects.toMatchObject({ code: 'ARGUMENT' }); + await expect(command.func(page, { keyword: 'opencli', limit: 19 })).rejects.toMatchObject({ code: 'ARGUMENT' }); + await expect(command.func(page, { keyword: 'opencli', limit: 5, offset: -1 })).rejects.toMatchObject({ code: 'ARGUMENT' }); + expect(page.goto).not.toHaveBeenCalled(); + }); + + it('unwraps browser envelopes and returns ranked HTTPS rows', async () => { + const page = createPageMock({ + session: 'site:brave', + data: [['OpenCLI', 'https://github.com/jackwener/OpenCLI', 'CLI browser tooling']], + }); + + await expect(command.func(page, { keyword: 'opencli', limit: 1, offset: 1 })).resolves.toEqual([{ + rank: 19, + title: 'OpenCLI', + url: 'https://github.com/jackwener/OpenCLI', + snippet: 'CLI browser tooling', + }]); + }); + + it('fails typed instead of silently returning [] for malformed extraction payloads', async () => { + const page = createPageMock({ rows: [] }); + + await expect(command.func(page, { keyword: 'opencli', limit: 1 })).rejects.toMatchObject({ + code: 'COMMAND_EXEC', + message: expect.stringContaining('payload shape'), + }); + }); +}); diff --git a/clis/duckduckgo/search.js b/clis/duckduckgo/search.js new file mode 100644 index 000000000..a6678d5b6 --- /dev/null +++ b/clis/duckduckgo/search.js @@ -0,0 +1,131 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { ArgumentError } from '@jackwener/opencli/errors'; +import { + emptySearchResults, + requireBoundedInteger, + requireNonNegativeInteger, + requireRows, + requireSearchQuery, + runBrowserStep, + toHttpsUrl, +} from '../_shared/search-adapter.js'; + +function decodeDdgUrl(href) { + if (!href) return ''; + try { + const url = new URL(href, 'https://duckduckgo.com'); + const uddg = url.searchParams.get('uddg'); + return toHttpsUrl(uddg || href, 'https://duckduckgo.com'); + } catch { + return ''; + } +} + +function buildExtractFn(limit) { + return 'function(doc){' + + 'var r=[];var seen={};var items=doc.querySelectorAll(".result");' + + 'for(var i=0;i=' + limit + ')break;' + + 'var el=items[i];var te=el.querySelector(".result__a");' + + 'var se=el.querySelector(".result__snippet");' + + 'var ue=el.querySelector(".result__url");' + + 'var ie=el.querySelector(".result__icon__img");' + + 'var cls=el.className||"";var rt="web";' + + 'if(cls.indexOf("result--ad")!==-1||cls.indexOf("result--ads")!==-1||cls.indexOf("badge--ad")!==-1)continue;' + + 'if(!te)continue;' + + 'var t=(te.textContent||"").trim();' + + 'var h=te.getAttribute("href")||"";' + + 'var sn=se?(se.textContent||"").trim():"";' + + 'var du=ue?(ue.textContent||"").trim():"";' + + 'var ic=ie?(ie.getAttribute("src")||""):"";' + + 'if(cls.indexOf("news-result")!==-1)rt="news";' + + 'else if(cls.indexOf("video-result")!==-1)rt="video";' + + 'else if(cls.indexOf("image-result")!==-1)rt="image";' + + 'if(!t||!h||seen[h])continue;seen[h]=true;' + + 'r.push([t,h,sn,du,ic,rt]);' + + '}return r;}'; +} + +function buildExtractorJs(limit) { + return '(' + buildExtractFn(limit) + '(document))'; +} + +function buildPaginateJs(limit, keyword, offset, region) { + var params = 'q=' + encodeURIComponent(keyword) + '&s=' + offset + '&v=l&o=json'; + if (region) params += '&kl=' + encodeURIComponent(region); + return ( + 'new Promise(function($r){' + + 'var x=new XMLHttpRequest();' + + 'x.open("POST","/html/",true);' + + 'x.setRequestHeader("Content-Type","application/x-www-form-urlencoded");' + + 'x.onload=function(){' + + 'try{var d=new DOMParser().parseFromString(x.responseText,"text/html");' + + '$r(' + buildExtractFn(limit) + '(d));' + + '}catch(e){$r({error:"parse",message:String(e&&e.message||e)})}' + + '};' + + 'x.onerror=function(){$r({error:"network"})};' + + 'x.send("' + params + '");' + + '})' + ); +} + +const command = cli({ + site: 'duckduckgo', + name: 'search', + access: 'read', + description: 'Search DuckDuckGo', + domain: 'html.duckduckgo.com', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'keyword', positional: true, required: true, help: 'Search query' }, + { name: 'limit', type: 'int', default: 10, help: 'Number of results per page (1-10). For multi-page, use --offset' }, + { name: 'offset', type: 'int', default: 0, help: 'Result offset for pagination (0, 10, 20...). Uses XHR POST internally' }, + { name: 'region', help: 'Region code (e.g. jp-jp, us-en, cn-zh). Default: all regions' }, + { name: 'time', help: 'Time range: d (day), w (week), m (month), y (year)' }, + ], + columns: ['rank', 'title', 'url', 'snippet', 'displayUrl', 'icon', 'resultType'], + func: async (page, kwargs) => { + const limit = requireBoundedInteger(kwargs.limit, 10, 1, 10, '--limit'); + const keyword = requireSearchQuery(kwargs.keyword); + const offset = requireNonNegativeInteger(kwargs.offset, 0, '--offset'); + if (offset % 10 !== 0) { + throw new ArgumentError('--offset must be a multiple of 10 for DuckDuckGo HTML pagination'); + } + if (kwargs.time && !/^(d|w|m|y)$/.test(String(kwargs.time))) { + throw new ArgumentError('--time must be one of d, w, m, or y'); + } + let url = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(keyword)}`; + if (kwargs.region) url += `&kl=${encodeURIComponent(String(kwargs.region))}`; + if (kwargs.time) url += `&df=${encodeURIComponent(String(kwargs.time))}`; + await runBrowserStep('duckduckgo search navigation', () => page.goto(url)); + try { + await page.wait({ selector: '.result', timeout: 8 }); + } catch { + await page.wait(3).catch(function() {}); + } + var raw; + if (offset === 0) { + raw = await runBrowserStep('duckduckgo search extraction', () => page.evaluate(buildExtractorJs(limit))); + } else { + raw = await runBrowserStep('duckduckgo search pagination extraction', () => page.evaluate(buildPaginateJs(limit, keyword, offset, kwargs.region))); + } + const rows = requireRows(raw, 'duckduckgo search'); + if (rows.length === 0) { + throw emptySearchResults('DuckDuckGo', keyword); + } + return rows.map(function(r, index) { + return { + rank: index + 1 + offset, + title: r[0], + url: decodeDdgUrl(r[1]), + snippet: r[2], + displayUrl: r[3], + icon: r[4], + resultType: r[5], + }; + }).filter((row) => row.url); + }, +}); + +export const __test__ = { command }; diff --git a/clis/duckduckgo/search.test.js b/clis/duckduckgo/search.test.js new file mode 100644 index 000000000..24b8c9130 --- /dev/null +++ b/clis/duckduckgo/search.test.js @@ -0,0 +1,128 @@ +import { describe, it, expect, vi } from 'vitest'; +import { JSDOM } from 'jsdom'; + +const { __test__ } = await import('./search.js'); +const command = __test__.command; + +function createPageMock(evaluateResult = []) { + return { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(evaluateResult), + }; +} + +describe('duckduckgo search', () => { + it('should register as a valid command', () => { + expect(command).toBeDefined(); + expect(command.site).toBe('duckduckgo'); + expect(command.name).toBe('search'); + expect(command.access).toBe('read'); + expect(command.browser).toBe(true); + expect(command.strategy).toBe('public'); + expect(command.domain).toBe('html.duckduckgo.com'); + }); + + it('should define keyword positional arg', () => { + const kwArg = command.args.find(a => a.name === 'keyword'); + expect(kwArg).toBeDefined(); + expect(kwArg.positional).toBe(true); + expect(kwArg.required).toBe(true); + }); + + it('should define limit arg with default 10', () => { + const limitArg = command.args.find(a => a.name === 'limit'); + expect(limitArg).toBeDefined(); + expect(limitArg.type).toBe('int'); + expect(limitArg.default).toBe(10); + }); + + it('should define columns for output', () => { + expect(command.columns).toContain('rank'); + expect(command.columns).toContain('title'); + expect(command.columns).toContain('url'); + expect(command.columns).toContain('snippet'); + expect(command.columns).toContain('displayUrl'); + expect(command.columns).toContain('icon'); + expect(command.columns).toContain('resultType'); + }); + + it('rejects empty query and out-of-range pagination before navigation', async () => { + const page = createPageMock(); + await expect(command.func(page, { keyword: ' ', limit: 5 })).rejects.toMatchObject({ code: 'ARGUMENT' }); + await expect(command.func(page, { keyword: 'opencli', limit: 11 })).rejects.toMatchObject({ code: 'ARGUMENT' }); + await expect(command.func(page, { keyword: 'opencli', limit: 5, offset: 5 })).rejects.toMatchObject({ code: 'ARGUMENT' }); + expect(page.goto).not.toHaveBeenCalled(); + }); + + it('decodes DuckDuckGo redirect URLs and assigns listing rank', async () => { + const page = createPageMock([ + [ + 'OpenCLI', + '/l/?uddg=https%3A%2F%2Fgithub.com%2Fjackwener%2FOpenCLI', + 'CLI browser tooling', + 'github.com/jackwener/OpenCLI', + '', + 'web', + ], + ]); + + await expect(command.func(page, { keyword: 'opencli', limit: 1 })).resolves.toEqual([{ + rank: 1, + title: 'OpenCLI', + url: 'https://github.com/jackwener/OpenCLI', + snippet: 'CLI browser tooling', + displayUrl: 'github.com/jackwener/OpenCLI', + icon: '', + resultType: 'web', + }]); + }); + + it('executes the DOM extractor, filters ads, and returns canonical rows', async () => { + const dom = new JSDOM(` + +
+ Organic result + Organic snippet + example.com/article + +
+ `); + const page = { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn(async (source) => Function('document', `return ${source};`)(dom.window.document)), + }; + + await expect(command.func(page, { keyword: 'opencli', limit: 5 })).resolves.toEqual([{ + rank: 1, + title: 'Organic result', + url: 'https://example.com/article', + snippet: 'Organic snippet', + displayUrl: 'example.com/article', + icon: 'https://icons.duckduckgo.com/ip3/example.com.ico', + resultType: 'web', + }]); + }); + + it('unwraps browser envelopes for paginated extraction', async () => { + const page = createPageMock({ session: 'site:duckduckgo', data: [ + ['Result', 'https://example.com/', 'snippet', 'example.com', '', 'web'], + ] }); + + const result = await command.func(page, { keyword: 'opencli', limit: 1, offset: 10 }); + + expect(result[0]).toMatchObject({ rank: 11, url: 'https://example.com/' }); + }); + + it('fails typed instead of returning [] for malformed extraction payloads', async () => { + const page = createPageMock({ rows: [] }); + + await expect(command.func(page, { keyword: 'opencli', limit: 1 })).rejects.toMatchObject({ + code: 'COMMAND_EXEC', + message: expect.stringContaining('payload shape'), + }); + }); +}); diff --git a/clis/duckduckgo/suggest.js b/clis/duckduckgo/suggest.js new file mode 100644 index 000000000..d48575835 --- /dev/null +++ b/clis/duckduckgo/suggest.js @@ -0,0 +1,45 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { CommandExecutionError } from '@jackwener/opencli/errors'; +import { requireBoundedInteger, requireSearchQuery } from '../_shared/search-adapter.js'; + +const command = cli({ + site: 'duckduckgo', + name: 'suggest', + access: 'read', + description: 'DuckDuckGo search suggestions', + domain: 'duckduckgo.com', + strategy: Strategy.PUBLIC, + browser: false, + args: [ + { name: 'keyword', positional: true, required: true, help: 'Search query prefix' }, + { name: 'limit', type: 'int', default: 8, help: 'Max number of suggestions' }, + ], + columns: ['phrase'], + func: async (kwargs) => { + const limit = requireBoundedInteger(kwargs.limit, 8, 1, 20, '--limit'); + const keyword = encodeURIComponent(requireSearchQuery(kwargs.keyword)); + const url = `https://duckduckgo.com/ac/?q=${keyword}&type=list`; + let resp; + try { + resp = await fetch(url); + } catch (err) { + throw new CommandExecutionError(`DuckDuckGo suggest request failed: ${err instanceof Error ? err.message : String(err)}`); + } + if (!resp.ok) { + throw new CommandExecutionError(`DuckDuckGo suggest returned HTTP ${resp.status}`); + } + let data; + try { + data = await resp.json(); + } catch (err) { + throw new CommandExecutionError(`DuckDuckGo suggest returned malformed JSON: ${err?.message ?? err}`); + } + const phrases = Array.isArray(data) && data.length > 1 && Array.isArray(data[1]) ? data[1] : []; + return phrases + .filter((phrase) => typeof phrase === 'string' && phrase.trim()) + .slice(0, limit) + .map(function(p) { return { phrase: p }; }); + }, +}); + +export const __test__ = { command }; diff --git a/clis/duckduckgo/suggest.test.js b/clis/duckduckgo/suggest.test.js new file mode 100644 index 000000000..f02549fe1 --- /dev/null +++ b/clis/duckduckgo/suggest.test.js @@ -0,0 +1,66 @@ +import { afterEach, describe, it, expect, vi } from 'vitest'; + +const { __test__ } = await import('./suggest.js'); +const command = __test__.command; + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('duckduckgo suggest', () => { + it('should register as a valid command', () => { + expect(command).toBeDefined(); + expect(command.site).toBe('duckduckgo'); + expect(command.name).toBe('suggest'); + expect(command.access).toBe('read'); + expect(command.browser).toBe(false); + expect(command.strategy).toBe('public'); + }); + + it('should define keyword positional arg', () => { + const kwArg = command.args.find(a => a.name === 'keyword'); + expect(kwArg).toBeDefined(); + expect(kwArg.positional).toBe(true); + expect(kwArg.required).toBe(true); + }); + + it('should define limit arg with default 8', () => { + const limitArg = command.args.find(a => a.name === 'limit'); + expect(limitArg).toBeDefined(); + expect(limitArg.default).toBe(8); + }); + + it('should define phrase column', () => { + expect(command.columns).toEqual(['phrase']); + }); + + it('rejects empty query and invalid limit before fetch', async () => { + const fetchSpy = vi.spyOn(globalThis, 'fetch'); + await expect(command.func({ keyword: '', limit: 5 })).rejects.toMatchObject({ code: 'ARGUMENT' }); + await expect(command.func({ keyword: 'opencli', limit: 21 })).rejects.toMatchObject({ code: 'ARGUMENT' }); + expect(fetchSpy).not.toHaveBeenCalled(); + }); + + it('returns filtered suggestion rows from the public API payload', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ + ok: true, + json: async () => ['open', ['opencli', '', 'open source']], + }); + + await expect(command.func({ keyword: 'open', limit: 3 })).resolves.toEqual([ + { phrase: 'opencli' }, + { phrase: 'open source' }, + ]); + }); + + it('maps fetch and malformed JSON failures to typed command errors', async () => { + vi.spyOn(globalThis, 'fetch').mockRejectedValueOnce(new Error('offline')); + await expect(command.func({ keyword: 'open', limit: 3 })).rejects.toMatchObject({ code: 'COMMAND_EXEC' }); + + vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce({ + ok: true, + json: async () => { throw new Error('bad json'); }, + }); + await expect(command.func({ keyword: 'open', limit: 3 })).rejects.toMatchObject({ code: 'COMMAND_EXEC' }); + }); +}); diff --git a/clis/yahoo/search.js b/clis/yahoo/search.js new file mode 100644 index 000000000..f72624411 --- /dev/null +++ b/clis/yahoo/search.js @@ -0,0 +1,92 @@ +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { + emptySearchResults, + requireBoundedInteger, + requireRows, + requireSearchQuery, + runBrowserStep, + toHttpsUrl, +} from '../_shared/search-adapter.js'; + +function decodeYahooUrl(href) { + if (!href) return ''; + if (href.indexOf('RU=') !== -1 && href.indexOf('/RK=') !== -1) { + var match = href.match(/RU=([^/]+)\/RK=/); + if (match && match[1]) { + try { + return toHttpsUrl(decodeURIComponent(match[1]), 'https://search.yahoo.com'); + } catch { + return toHttpsUrl(href, 'https://search.yahoo.com'); + } + } + } + return toHttpsUrl(href, 'https://search.yahoo.com'); +} + +function buildExtractorJs(limit) { + return ` +(function() { + var results = []; + var seen = {}; + var items = document.querySelectorAll('.algo'); + for (var i = 0; i < items.length; i++) { + if (results.length >= ${limit}) break; + var el = items[i]; + var h3 = el.querySelector('h3'); + var linkEl = el.querySelector('.compTitle a'); + var snippetEl = el.querySelector('.compText'); + if (!h3 || !linkEl) continue; + var title = h3.textContent.trim(); + var href = linkEl.getAttribute('href') || ''; + var snippet = snippetEl ? snippetEl.textContent.trim() : ''; + if (!title || !href || seen[href]) continue; + seen[href] = true; + results.push([title, href, snippet]); + } + return results; +})()`; +} + +const command = cli({ + site: 'yahoo', + name: 'search', + access: 'read', + description: 'Search Yahoo (powered by Bing)', + domain: 'search.yahoo.com', + strategy: Strategy.PUBLIC, + browser: true, + args: [ + { name: 'keyword', positional: true, required: true, help: 'Search query' }, + { name: 'limit', type: 'int', default: 7, help: 'Number of results per page (max 7)' }, + { name: 'page', type: 'int', default: 1, help: 'Page number (1, 2, 3...). Yahoo returns ~7 results per page' }, + ], + columns: ['rank', 'title', 'url', 'snippet'], + func: async (page, kwargs) => { + const limit = requireBoundedInteger(kwargs.limit, 7, 1, 7, '--limit'); + const query = requireSearchQuery(kwargs.keyword); + const keyword = encodeURIComponent(query); + const pageNum = requireBoundedInteger(kwargs.page, 1, 1, 100, '--page'); + var url = `https://search.yahoo.com/search?p=${keyword}`; + if (pageNum > 1) url += `&b=${(pageNum - 1) * 7 + 1}`; + await runBrowserStep('yahoo search navigation', () => page.goto(url)); + try { + await page.wait({ selector: '.algo', timeout: 10 }); + } catch { + await page.wait(3).catch(function() {}); + } + const raw = await runBrowserStep('yahoo search extraction', () => page.evaluate(buildExtractorJs(limit))); + const results = requireRows(raw, 'yahoo search'); + if (results.length === 0) { + throw emptySearchResults('Yahoo', query); + } + const rows = results + .map(function(r, index) { + return { rank: index + 1 + (pageNum - 1) * 7, title: r[0], url: decodeYahooUrl(r[1]), snippet: r[2] }; + }) + .filter((row) => row.url); + if (rows.length === 0) throw emptySearchResults('Yahoo', query); + return rows; + }, +}); + +export const __test__ = { command }; diff --git a/clis/yahoo/search.test.js b/clis/yahoo/search.test.js new file mode 100644 index 000000000..0c215ac6c --- /dev/null +++ b/clis/yahoo/search.test.js @@ -0,0 +1,94 @@ +import { describe, it, expect, vi } from 'vitest'; + +const { __test__ } = await import('./search.js'); +const command = __test__.command; + +function createPageMock(evaluateResult = []) { + return { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(evaluateResult), + }; +} + +describe('yahoo search', () => { + it('should register as a valid command', () => { + expect(command).toBeDefined(); + expect(command.site).toBe('yahoo'); + expect(command.name).toBe('search'); + expect(command.access).toBe('read'); + expect(command.browser).toBe(true); + expect(command.strategy).toBe('public'); + expect(command.domain).toBe('search.yahoo.com'); + }); + + it('should define keyword positional arg', () => { + const kwArg = command.args.find(a => a.name === 'keyword'); + expect(kwArg).toBeDefined(); + expect(kwArg.positional).toBe(true); + expect(kwArg.required).toBe(true); + }); + + it('should define limit arg with default 7', () => { + const limitArg = command.args.find(a => a.name === 'limit'); + expect(limitArg).toBeDefined(); + expect(limitArg.type).toBe('int'); + expect(limitArg.default).toBe(7); + }); + + it('should define output columns', () => { + expect(command.columns).toContain('rank'); + expect(command.columns).toContain('title'); + expect(command.columns).toContain('url'); + expect(command.columns).toContain('snippet'); + }); + + it('rejects empty query, invalid limit, and invalid page before navigation', async () => { + const page = createPageMock(); + await expect(command.func(page, { keyword: ' ', limit: 5 })).rejects.toMatchObject({ code: 'ARGUMENT' }); + await expect(command.func(page, { keyword: 'opencli', limit: 8 })).rejects.toMatchObject({ code: 'ARGUMENT' }); + await expect(command.func(page, { keyword: 'opencli', limit: 5, page: 0 })).rejects.toMatchObject({ code: 'ARGUMENT' }); + expect(page.goto).not.toHaveBeenCalled(); + }); + + it('decodes Yahoo redirect URLs and assigns listing rank', async () => { + const page = createPageMock({ + session: 'site:yahoo', + data: [[ + 'OpenCLI', + 'https://r.search.yahoo.com/_ylt=x/RU=https%3A%2F%2Fgithub.com%2Fjackwener%2FOpenCLI/RK=2/RS=x', + 'CLI browser tooling', + ]], + }); + + await expect(command.func(page, { keyword: 'opencli', limit: 1, page: 2 })).resolves.toEqual([{ + rank: 8, + title: 'OpenCLI', + url: 'https://github.com/jackwener/OpenCLI', + snippet: 'CLI browser tooling', + }]); + }); + + it('drops decoded Yahoo redirect targets that are not http(s) URLs', async () => { + const page = createPageMock([ + [ + 'Bad redirect', + 'https://r.search.yahoo.com/_ylt=x/RU=javascript%3Aalert(1)/RK=2/RS=x', + 'should not be emitted', + ], + ]); + + await expect(command.func(page, { keyword: 'opencli', limit: 1 })).rejects.toMatchObject({ + code: 'EMPTY_RESULT', + }); + }); + + it('fails typed instead of silently returning [] for malformed extraction payloads', async () => { + const page = createPageMock({ rows: [] }); + + await expect(command.func(page, { keyword: 'opencli', limit: 1 })).rejects.toMatchObject({ + code: 'COMMAND_EXEC', + message: expect.stringContaining('payload shape'), + }); + }); +}); diff --git a/docs/adapters/browser/brave.md b/docs/adapters/browser/brave.md new file mode 100644 index 000000000..d36464600 --- /dev/null +++ b/docs/adapters/browser/brave.md @@ -0,0 +1,47 @@ +# Brave Search + +**Mode**: 🌐 Public · **Domain**: `search.brave.com` + +## Commands + +| Command | Description | +|---------|-------------| +| `opencli brave search ` | Search Brave Search and extract results from the page | + +## What works today + +- Uses browser mode to search `search.brave.com` and extract ranked results via DOM queries. +- Supports `--offset` for GET-based pagination. Brave returns approximately 18 results per page. +- Results include rank, title, URL, and snippet. +- `--limit` must be between 1 and 18; `--offset` must be a non-negative page offset. + +## Current limitations + +- Requires browser mode. Brave Search does not offer a public, no-auth search API. +- DOM structure uses Svelte-generated class names that may change with updates. +- Some results may have empty snippets depending on Brave's layout. + +## Usage Examples + +```bash +# Basic search +opencli brave search "machine learning" + +# Limit results +opencli brave search "machine learning" --limit 5 + +# Pagination (second page) +opencli brave search "machine learning" --offset 1 + +# JSON output +opencli brave search "machine learning" -f json +``` + +## Prerequisites + +- Requires Chrome running (Standalone mode will auto-launch) or the [Browser Bridge extension](/guide/browser-bridge). + +## Notes + +- Brave Search renders results server-side; all results are present in the initial HTML (no lazy loading). +- Brave also shows an AI-generated summary box as the first result. The adapter filters this out via the `.standalone` class check. diff --git a/docs/adapters/browser/duckduckgo.md b/docs/adapters/browser/duckduckgo.md new file mode 100644 index 000000000..8fd42b099 --- /dev/null +++ b/docs/adapters/browser/duckduckgo.md @@ -0,0 +1,60 @@ +# DuckDuckGo + +**Mode**: 🌐 Public · **Domains**: `html.duckduckgo.com`, `duckduckgo.com` + +## Commands + +| Command | Description | +|---------|-------------| +| `opencli duckduckgo search ` | Search DuckDuckGo and extract results from the page | +| `opencli duckduckgo suggest ` | Get DuckDuckGo search suggestions | + +## What works today + +- `duckduckgo search` uses browser mode to search `html.duckduckgo.com` and extract ranked results. +- `duckduckgo suggest` uses the public JSON API at `duckduckgo.com/ac/` — no browser needed. +- `search` supports `--region` (e.g. `jp-jp`, `us-en`, `cn-zh`) and `--time` (`d`, `w`, `m`, `y`) filters. +- `search` supports `--offset` for pagination via XHR POST (avoids page navigation issues with `form.submit()`). + +## Current limitations + +- `duckduckgo search` requires browser mode due to anti-bot protections on DuckDuckGo. +- The HTML version returns a maximum of 10 results per page; `--limit` must be between 1 and 10. +- Pagination uses POST-based navigation; results may have some overlap at page boundaries. +- Snippet extraction is based on the HTML version's DOM structure (`.result__snippet`). + +## Usage Examples + +```bash +# Basic search +opencli duckduckgo search "machine learning" + +# Limit results +opencli duckduckgo search "machine learning" --limit 5 + +# Region-specific search +opencli duckduckgo search "machine learning" --region jp-jp + +# Time filter (past week) +opencli duckduckgo search "machine learning" --time w + +# Pagination (second page) +opencli duckduckgo search "machine learning" --offset 10 + +# JSON output +opencli duckduckgo search "machine learning" -f json + +# Search suggestions +opencli duckduckgo suggest "machine" --limit 5 +``` + +## Prerequisites + +- `suggest` does not require Chrome. +- `search` requires Chrome running (Standalone mode will auto-launch) or the [Browser Bridge extension](/guide/browser-bridge). + +## Notes + +- DuckDuckGo uses `uddg=` URL redirects; the adapter automatically decodes them to return clean URLs. +- The `ac/` suggest API returns phonetic suggestions for CJK queries, which may not always match expected results. +- Region codes follow DuckDuckGo's format (e.g. `jp-jp`, `us-en`, `uk-en`). Default is all regions. diff --git a/docs/adapters/browser/yahoo.md b/docs/adapters/browser/yahoo.md new file mode 100644 index 000000000..61603939f --- /dev/null +++ b/docs/adapters/browser/yahoo.md @@ -0,0 +1,49 @@ +# Yahoo Search + +**Mode**: 🌐 Public · **Domain**: `search.yahoo.com` + +## Commands + +| Command | Description | +|---------|-------------| +| `opencli yahoo search ` | Search Yahoo (powered by Bing) and extract results from the page | + +## What works today + +- Uses browser mode to search `search.yahoo.com` and extract ranked results via DOM queries. +- Supports `--page` for pagination. Yahoo returns approximately 7 results per page. +- Results include rank, title, URL, and snippet. +- `--limit` must be between 1 and 7; `--page` must be a positive integer. + +## Current limitations + +- Requires browser mode. +- Yahoo returns fewer results per page (7) compared to other engines. +- Page 2+ results may include lower-quality or less relevant matches. +- Yahoo wraps URLs in a `RU=.../RK=` redirect structure; the adapter automatically decodes them. + +## Usage Examples + +```bash +# Basic search +opencli yahoo search "machine learning" + +# Limit results +opencli yahoo search "machine learning" --limit 5 + +# Pagination (second page) +opencli yahoo search "machine learning" --page 2 + +# JSON output +opencli yahoo search "machine learning" -f json +``` + +## Prerequisites + +- Requires Chrome running (Standalone mode will auto-launch) or the [Browser Bridge extension](/guide/browser-bridge). + +## Notes + +- Yahoo Search is powered by Bing. Results are rendered server-side in the initial HTML. +- Yahoo uses `RU=` redirect URLs to wrap search results; the adapter extracts the real URLs automatically. +- Region/language filtering is not currently exposed as a parameter. Results depend on the browser session's locale. From bccd275d666f70eb34081a2c44717b461f681e54 Mon Sep 17 00:00:00 2001 From: jakevin Date: Thu, 14 May 2026 18:04:07 +0800 Subject: [PATCH 14/37] test(extension): cover adapter group tiebreaker (#1566) --- extension/src/background.test.ts | 49 ++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/extension/src/background.test.ts b/extension/src/background.test.ts index 7fdac5f97..633df510d 100644 --- a/extension/src/background.test.ts +++ b/extension/src/background.test.ts @@ -1300,6 +1300,55 @@ describe('background tab isolation', () => { expect(chrome.tabs.group).toHaveBeenCalledWith({ groupId: 98, tabIds: [78] }); }); + it('uses the lowest group id as the final OpenCLI Adapter group tiebreaker', async () => { + const { chrome, tabs, groups, setLastFocusedWindowId } = createChromeMock(); + setLastFocusedWindowId(2); + tabs.push({ + id: 77, + windowId: 7, + url: 'about:blank', + title: 'blank', + active: true, + status: 'complete', + groupId: -1, + }); + tabs.push({ + id: 78, + windowId: 8, + url: 'about:blank', + title: 'blank', + active: true, + status: 'complete', + groupId: -1, + }); + groups.push( + { + id: 99, + windowId: 7, + title: 'OpenCLI Adapter', + color: 'orange', + collapsed: true, + }, + { + id: 98, + windowId: 8, + title: 'OpenCLI Adapter', + color: 'orange', + collapsed: true, + }, + ); + vi.stubGlobal('chrome', chrome); + + const mod = await import('./background'); + const tabId = await mod.__test__.resolveTabId(undefined, adapterKey('twitter')); + + expect(tabId).toBe(78); + expect(chrome.windows.create).not.toHaveBeenCalled(); + expect(mod.__test__.getAutomationWindowId(adapterKey('twitter'))).toBe(8); + expect(tabs.find((tab) => tab.id === 78)?.groupId).toBe(98); + expect(chrome.tabs.group).toHaveBeenCalledWith({ groupId: 98, tabIds: [78] }); + }); + it('discovers and reuses a legacy OpenCLI automation group before creating a duplicate', async () => { const { chrome, tabs, groups } = createChromeMock(); tabs.push({ From 42b5a4e68dfcef2484e51905eda7b86797742890 Mon Sep 17 00:00:00 2001 From: "J.Chen" Date: Thu, 14 May 2026 18:41:02 +0800 Subject: [PATCH 15/37] feat(boss): support job-seeker chatlist and chatmsg (#1539) * feat(boss): support job-seeker chatlist and chatmsg * fix(boss): type chat-side failure boundaries * fix(boss): guard malformed chat API payloads --------- Co-authored-by: Jeff Chen Co-authored-by: jackwener --- cli-manifest.json | 32 ++++- clis/boss/chatlist.js | 110 +++++++++++++--- clis/boss/chatlist.test.js | 211 +++++++++++++++++++++++++++++++ clis/boss/chatmsg.js | 122 ++++++++++++++---- clis/boss/chatmsg.test.js | 230 +++++++++++++++++++++++++++++++++ clis/boss/utils.js | 251 +++++++++++++++++++++++++++++++++++-- 6 files changed, 904 insertions(+), 52 deletions(-) create mode 100644 clis/boss/chatlist.test.js create mode 100644 clis/boss/chatmsg.test.js diff --git a/cli-manifest.json b/cli-manifest.json index 7e59454e2..d8293007b 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -3484,7 +3484,7 @@ { "site": "boss", "name": "chatlist", - "description": "BOSS直聘查看聊天列表(招聘端)", + "description": "BOSS直聘查看聊天列表(招聘端/求职端)", "access": "read", "domain": "www.zhipin.com", "strategy": "cookie", @@ -3509,12 +3509,26 @@ "type": "str", "default": "0", "required": false, - "help": "Filter by job ID (0=all)" + "help": "Filter by job ID (0=all, boss side only)" + }, + { + "name": "side", + "type": "str", + "default": "auto", + "required": false, + "help": "Identity side: auto (default), boss (recruiter), or geek (job-seeker)", + "choices": [ + "auto", + "boss", + "geek" + ] } ], "columns": [ "name", + "company", "job", + "title", "last_msg", "last_time", "uid", @@ -3528,7 +3542,7 @@ { "site": "boss", "name": "chatmsg", - "description": "BOSS直聘查看与候选人的聊天消息", + "description": "BOSS直聘查看聊天消息历史(招聘端/求职端)", "access": "read", "domain": "www.zhipin.com", "strategy": "cookie", @@ -3547,6 +3561,18 @@ "default": 1, "required": false, "help": "Page number" + }, + { + "name": "side", + "type": "str", + "default": "auto", + "required": false, + "help": "Identity side: auto (default), boss (recruiter), or geek (job-seeker)", + "choices": [ + "auto", + "boss", + "geek" + ] } ], "columns": [ diff --git a/clis/boss/chatlist.js b/clis/boss/chatlist.js index aaadec883..d0105451f 100644 --- a/clis/boss/chatlist.js +++ b/clis/boss/chatlist.js @@ -1,10 +1,60 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; -import { requirePage, navigateToChat, fetchFriendList } from './utils.js'; +import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import { + requirePage, navigateToChat, navigateToGeekChat, + fetchFriendList, fetchGeekFriendLabelList, fetchGeekFriendInfoList, + readEncryptSystemId, assertOk, IDENTITY_MISMATCH_CODE, + readPositiveInteger, +} from './utils.js'; + +function formatMsgTime(ms) { + if (!ms) return ''; + return new Date(ms).toLocaleString('zh-CN'); +} + +function mapBossRow(f) { + return { + name: f.name || '', + company: '', + job: f.jobName || '', + title: '', + last_msg: f.lastMessageInfo?.text || '', + last_time: f.lastTime || '', + uid: f.encryptUid || '', + security_id: f.securityId || '', + }; +} + +async function buildGeekRows(page, limit) { + const encryptSystemId = await readEncryptSystemId(page); + const labelList = await fetchGeekFriendLabelList(page, { encryptSystemId }); + if (labelList.length === 0) { + return []; + } + const slicedLabels = labelList.slice(0, limit); + const friendIds = slicedLabels.map((f) => f.friendId).filter(Boolean); + const enriched = await fetchGeekFriendInfoList(page, friendIds); + const enrichMap = new Map(enriched.map((f) => [String(f.friendId ?? f.uid), f])); + return slicedLabels.map((f) => { + const e = enrichMap.get(String(f.friendId)) || {}; + return { + name: e.name || f.name || '', + company: e.brandName || f.brandName || '', + job: e.jobName || f.jobName || '', + title: e.bossTitle || f.bossTitle || '', + last_msg: e.lastMessageInfo?.showText || e.lastMsg || f.lastMsg || '', + last_time: e.lastTime || formatMsgTime(e.lastMessageInfo?.msgTime) || formatMsgTime(f.updateTime) || '', + uid: e.encryptUid || f.encryptFriendId || String(e.uid ?? e.friendId ?? f.friendId ?? ''), + security_id: e.securityId || '', + }; + }); +} + cli({ site: 'boss', name: 'chatlist', access: 'read', - description: 'BOSS直聘查看聊天列表(招聘端)', + description: 'BOSS直聘查看聊天列表(招聘端/求职端)', domain: 'www.zhipin.com', strategy: Strategy.COOKIE, navigateBefore: false, @@ -12,23 +62,55 @@ cli({ args: [ { name: 'page', type: 'int', default: 1, help: 'Page number' }, { name: 'limit', type: 'int', default: 20, help: 'Number of results' }, - { name: 'job-id', default: '0', help: 'Filter by job ID (0=all)' }, + { name: 'job-id', default: '0', help: 'Filter by job ID (0=all, boss side only)' }, + { name: 'side', default: 'auto', choices: ['auto', 'boss', 'geek'], help: 'Identity side: auto (default), boss (recruiter), or geek (job-seeker)' }, ], - columns: ['name', 'job', 'last_msg', 'last_time', 'uid', 'security_id'], + columns: ['name', 'company', 'job', 'title', 'last_msg', 'last_time', 'uid', 'security_id'], func: async (page, kwargs) => { requirePage(page); + const limit = readPositiveInteger(kwargs.limit, 'chatlist --limit', 20, 100); + const pageNum = readPositiveInteger(kwargs.page, 'chatlist --page', 1); + const side = kwargs.side || 'auto'; + + if (side === 'boss') { + await navigateToChat(page); + const friends = await fetchFriendList(page, { + pageNum, + jobId: kwargs['job-id'] || '0', + }); + if (friends.length === 0) + throw new EmptyResultError('boss chatlist', 'No recruiter-side chat sessions were returned.'); + return friends.slice(0, limit).map(mapBossRow); + } + + if (side === 'geek') { + await navigateToGeekChat(page); + const rows = await buildGeekRows(page, limit); + if (rows.length === 0) + throw new EmptyResultError('boss chatlist', 'No job-seeker-side chat sessions were returned.'); + return rows; + } + + // auto: try recruiter first, fall back to geek on identity mismatch await navigateToChat(page); - const friends = await fetchFriendList(page, { - pageNum: kwargs.page || 1, + const bossResult = await fetchFriendList(page, { + pageNum, jobId: kwargs['job-id'] || '0', + allowNonZero: true, }); - return friends.slice(0, kwargs.limit || 20).map((f) => ({ - name: f.name || '', - job: f.jobName || '', - last_msg: f.lastMessageInfo?.text || '', - last_time: f.lastTime || '', - uid: f.encryptUid || '', - security_id: f.securityId || '', - })); + if (Array.isArray(bossResult)) { + if (bossResult.length === 0) + throw new EmptyResultError('boss chatlist', 'No recruiter-side chat sessions were returned.'); + return bossResult.slice(0, limit).map(mapBossRow); + } + if (bossResult.code === IDENTITY_MISMATCH_CODE) { + await navigateToGeekChat(page); + const rows = await buildGeekRows(page, limit); + if (rows.length === 0) + throw new EmptyResultError('boss chatlist', 'No job-seeker-side chat sessions were returned.'); + return rows; + } + assertOk(bossResult); + throw new CommandExecutionError('Boss chatlist returned an unexpected response'); }, }); diff --git a/clis/boss/chatlist.test.js b/clis/boss/chatlist.test.js new file mode 100644 index 000000000..856f9fda7 --- /dev/null +++ b/clis/boss/chatlist.test.js @@ -0,0 +1,211 @@ +import { describe, expect, it, vi } from 'vitest'; +import { getRegistry } from '@jackwener/opencli/registry'; +import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import './chatlist.js'; + +const BOSS_FRIEND = { + name: '张三', + jobName: '后端工程师', + lastMessageInfo: { text: '你好' }, + lastTime: '2024-01-01 10:00', + encryptUid: 'enc-boss-uid', + securityId: 'boss-sec-id', +}; + +const GEEK_LABEL_FRIEND = { + friendId: 12345, + name: '李四', + brandName: '字节跳动', + jobName: '产品经理', + bossTitle: 'HR', + lastMsg: '感谢投递', + updateTime: 1704067200000, + encryptFriendId: 'enc-geek-uid', +}; + +const GEEK_ENRICHED = { + friendId: 12345, + uid: 99999, + name: '李四', + brandName: '字节跳动', + jobName: '产品经理', + bossTitle: 'HR总监', + encryptUid: 'enc-geek-uid', + securityId: 'geek-sec-id', + lastMessageInfo: { showText: '感谢投递', msgTime: 1704067200000 }, + lastTime: '2024-01-01', +}; + +function createPageMock(evaluateImpl) { + return { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockImplementation(evaluateImpl), + }; +} + +describe('boss chatlist', () => { + const command = getRegistry().get('boss/chatlist'); + + it('--side boss preserves existing behavior with 8-column output', async () => { + const page = createPageMock(async (script) => { + if (script.includes('getBossFriendListV2')) { + return { code: 0, zpData: { friendList: [BOSS_FRIEND] } }; + } + return {}; + }); + const rows = await command.func(page, { page: 1, limit: 20, 'job-id': '0', side: 'boss' }); + expect(page.goto).toHaveBeenCalledWith(expect.stringContaining('/web/chat/index')); + expect(rows).toHaveLength(1); + expect(rows[0]).toMatchObject({ + name: '张三', + company: '', + job: '后端工程师', + title: '', + last_msg: '你好', + uid: 'enc-boss-uid', + security_id: 'boss-sec-id', + }); + }); + + it('--side geek maps enriched getGeekFriendList data into 8 columns', async () => { + const page = createPageMock(async (script) => { + if (script.includes('document.cookie')) return 'test-enc-sys-id'; + if (script.includes('geekFilterByLabel')) { + return { code: 0, zpData: { friendList: [GEEK_LABEL_FRIEND] } }; + } + if (script.includes('getGeekFriendList.json')) { + return { code: 0, zpData: { result: [GEEK_ENRICHED] } }; + } + return {}; + }); + const rows = await command.func(page, { page: 1, limit: 20, 'job-id': '0', side: 'geek' }); + expect(page.goto).toHaveBeenCalledWith(expect.stringContaining('/web/geek/chat')); + expect(rows).toHaveLength(1); + expect(rows[0]).toMatchObject({ + name: '李四', + company: '字节跳动', + job: '产品经理', + title: 'HR总监', + uid: 'enc-geek-uid', + security_id: 'geek-sec-id', + }); + }); + + it('--side geek falls back to label fields when enrichment has no match', async () => { + const page = createPageMock(async (script) => { + if (script.includes('document.cookie')) return 'test-enc-sys-id'; + if (script.includes('geekFilterByLabel')) { + return { code: 0, zpData: { friendList: [GEEK_LABEL_FRIEND] } }; + } + if (script.includes('getGeekFriendList.json')) { + return { code: 0, zpData: { result: [] } }; + } + return {}; + }); + const rows = await command.func(page, { page: 1, limit: 20, 'job-id': '0', side: 'geek' }); + expect(rows).toHaveLength(1); + expect(rows[0].name).toBe('李四'); + expect(rows[0].company).toBe('字节跳动'); + expect(rows[0].security_id).toBe(''); + }); + + it('rejects invalid --limit before navigating', async () => { + const page = createPageMock(async () => ({})); + await expect( + command.func(page, { page: 1, limit: 0, 'job-id': '0', side: 'geek' }) + ).rejects.toBeInstanceOf(ArgumentError); + expect(page.goto).not.toHaveBeenCalled(); + }); + + it('--side geek reports a true empty chat list as EmptyResultError', async () => { + const page = createPageMock(async (script) => { + if (script.includes('document.cookie')) return 'test-enc-sys-id'; + if (script.includes('geekFilterByLabel')) { + return { code: 0, zpData: { friendList: [] } }; + } + return {}; + }); + await expect( + command.func(page, { page: 1, limit: 20, 'job-id': '0', side: 'geek' }) + ).rejects.toBeInstanceOf(EmptyResultError); + }); + + it('treats malformed geek enrichment payload as CommandExecutionError', async () => { + const page = createPageMock(async (script) => { + if (script.includes('document.cookie')) return 'test-enc-sys-id'; + if (script.includes('geekFilterByLabel')) { + return { code: 0, zpData: { friendList: [GEEK_LABEL_FRIEND] } }; + } + if (script.includes('getGeekFriendList.json')) { + return { code: 0, zpData: {} }; + } + return {}; + }); + await expect( + command.func(page, { page: 1, limit: 20, 'job-id': '0', side: 'geek' }) + ).rejects.toBeInstanceOf(CommandExecutionError); + }); + + it('treats null Boss API payload as CommandExecutionError', async () => { + const page = createPageMock(async (script) => { + if (script.includes('getBossFriendListV2')) return null; + return {}; + }); + await expect( + command.func(page, { page: 1, limit: 20, 'job-id': '0', side: 'boss' }) + ).rejects.toBeInstanceOf(CommandExecutionError); + }); + + it('maps expired Boss cookies to AuthRequiredError', async () => { + const page = createPageMock(async (script) => { + if (script.includes('getBossFriendListV2')) { + return { code: 7, message: 'Cookie 已过期' }; + } + return {}; + }); + await expect( + command.func(page, { page: 1, limit: 20, 'job-id': '0', side: 'boss' }) + ).rejects.toBeInstanceOf(AuthRequiredError); + }); + + it('--side auto falls back to geek when recruiter returns code 24', async () => { + const page = createPageMock(async (script) => { + if (script.includes('getBossFriendListV2')) { + return { code: 24, message: '请切换身份后再试' }; + } + if (script.includes('document.cookie')) return 'test-enc-sys-id'; + if (script.includes('geekFilterByLabel')) { + return { code: 0, zpData: { friendList: [GEEK_LABEL_FRIEND] } }; + } + if (script.includes('getGeekFriendList.json')) { + return { code: 0, zpData: { result: [GEEK_ENRICHED] } }; + } + return {}; + }); + const rows = await command.func(page, { page: 1, limit: 20, 'job-id': '0', side: 'auto' }); + expect(rows).toHaveLength(1); + expect(rows[0].company).toBe('字节跳动'); + expect(page.goto).toHaveBeenCalledWith(expect.stringContaining('/web/geek/chat')); + }); + + it('--side auto uses recruiter results when code 0 and does not call geek API', async () => { + const page = createPageMock(async (script) => { + if (script.includes('getBossFriendListV2')) { + return { code: 0, zpData: { friendList: [BOSS_FRIEND] } }; + } + return {}; + }); + const rows = await command.func(page, { page: 1, limit: 20, 'job-id': '0', side: 'auto' }); + expect(rows).toHaveLength(1); + expect(rows[0].name).toBe('张三'); + const evaluateCalls = page.evaluate.mock.calls.map((c) => c[0]); + expect(evaluateCalls.some((s) => s.includes('geekFilterByLabel'))).toBe(false); + }); + + it('registers --side as a choices-constrained arg defaulting to auto', () => { + const sideArg = command.args.find((a) => a.name === 'side'); + expect(sideArg?.choices).toEqual(['auto', 'boss', 'geek']); + expect(sideArg?.default).toBe('auto'); + }); +}); diff --git a/clis/boss/chatmsg.js b/clis/boss/chatmsg.js index a9ae945cb..d422b4f70 100644 --- a/clis/boss/chatmsg.js +++ b/clis/boss/chatmsg.js @@ -1,10 +1,72 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; -import { requirePage, navigateToChat, bossFetch, findFriendByUid } from './utils.js'; +import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import { + requirePage, navigateToChat, navigateToGeekChat, + bossFetch, findFriendByUid, findGeekFriendByUid, + fetchGeekHistoryMsg, readEncryptSystemId, + assertOk, IDENTITY_MISMATCH_CODE, + readPositiveInteger, readRequiredString, +} from './utils.js'; + +const TYPE_MAP = { + 1: '文本', 2: '图片', 3: '招呼', 4: '简历', 5: '系统', + 6: '名片', 7: '语音', 8: '视频', 9: '表情', +}; + +function mapBossMsg(m, friend) { + const fromObj = m.from || {}; + const isSelf = typeof fromObj === 'object' ? fromObj.uid !== friend.uid : false; + return { + from: isSelf ? '我' : (typeof fromObj === 'object' ? fromObj.name : friend.name), + type: TYPE_MAP[m.type] || `其他(${m.type})`, + text: m.text || m.body?.text || '', + time: m.time ? new Date(m.time).toLocaleString('zh-CN') : '', + }; +} + +function mapGeekMsg(m, friend) { + const fromUid = m.from && m.from.uid; + const isFromBoss = fromUid != null && String(fromUid) === String(friend.uid); + return { + from: isFromBoss ? '对方' : '我', + type: TYPE_MAP[m.type] || `其他(${m.type})`, + text: m.text || m.body?.text || m.body?.content || m.body?.showText || + JSON.stringify(m.body || {}).slice(0, 120), + time: m.time ? new Date(m.time).toLocaleString('zh-CN') : '', + }; +} + +async function bossChatMsg(page, kwargs, existingFriend) { + const friend = existingFriend ?? await findFriendByUid(page, kwargs.uid); + if (!friend) throw new EmptyResultError('boss chatmsg', '未找到该候选人'); + if (!friend.securityId) throw new CommandExecutionError('该聊天缺少 securityId,无法获取历史消息'); + const gid = friend.uid; + const securityId = encodeURIComponent(friend.securityId); + const msgUrl = `https://www.zhipin.com/wapi/zpchat/boss/historyMsg?gid=${gid}&securityId=${securityId}&page=${kwargs.page}&c=20&src=0`; + const msgData = await bossFetch(page, msgUrl); + const messages = msgData.zpData?.messages ?? msgData.zpData?.historyMsgList; + if (!Array.isArray(messages)) { + throw new CommandExecutionError('Boss recruiter history response did not include a message list'); + } + if (messages.length === 0) { + throw new EmptyResultError('boss chatmsg', 'Boss returned no messages for this chat.'); + } + return messages.map((m) => mapBossMsg(m, friend)); +} + +async function geekChatMsg(page, kwargs, encryptSystemId) { + const friend = await findGeekFriendByUid(page, kwargs.uid, { encryptSystemId }); + if (!friend) throw new EmptyResultError('boss chatmsg', '未找到该聊天(geek 侧)'); + if (!friend.securityId) throw new CommandExecutionError('该聊天缺少 securityId,无法获取历史消息'); + const messages = await fetchGeekHistoryMsg(page, friend, { page: kwargs.page }); + return messages.map((m) => mapGeekMsg(m, friend)); +} + cli({ site: 'boss', name: 'chatmsg', access: 'read', - description: 'BOSS直聘查看与候选人的聊天消息', + description: 'BOSS直聘查看聊天消息历史(招聘端/求职端)', domain: 'www.zhipin.com', strategy: Strategy.COOKIE, navigateBefore: false, @@ -12,32 +74,44 @@ cli({ args: [ { name: 'uid', required: true, positional: true, help: 'Encrypted UID (from chatlist)' }, { name: 'page', type: 'int', default: 1, help: 'Page number' }, + { name: 'side', default: 'auto', choices: ['auto', 'boss', 'geek'], help: 'Identity side: auto (default), boss (recruiter), or geek (job-seeker)' }, ], columns: ['from', 'type', 'text', 'time'], func: async (page, kwargs) => { requirePage(page); + const uid = readRequiredString(kwargs.uid, 'chatmsg uid'); + const pageNum = readPositiveInteger(kwargs.page, 'chatmsg --page', 1); + const normalizedKwargs = { ...kwargs, uid, page: pageNum }; + const side = kwargs.side || 'auto'; + + if (side === 'boss') { + await navigateToChat(page); + return await bossChatMsg(page, normalizedKwargs); + } + + if (side === 'geek') { + await navigateToGeekChat(page); + const encryptSystemId = await readEncryptSystemId(page); + return await geekChatMsg(page, normalizedKwargs, encryptSystemId); + } + + // auto: try recruiter first, fall back to geek when not found or identity mismatch await navigateToChat(page); - const friend = await findFriendByUid(page, kwargs.uid); - if (!friend) - throw new Error('未找到该候选人'); - const gid = friend.uid; - const securityId = encodeURIComponent(friend.securityId); - const msgUrl = `https://www.zhipin.com/wapi/zpchat/boss/historyMsg?gid=${gid}&securityId=${securityId}&page=${kwargs.page}&c=20&src=0`; - const msgData = await bossFetch(page, msgUrl); - const TYPE_MAP = { - 1: '文本', 2: '图片', 3: '招呼', 4: '简历', 5: '系统', - 6: '名片', 7: '语音', 8: '视频', 9: '表情', - }; - const messages = msgData.zpData?.messages || msgData.zpData?.historyMsgList || []; - return messages.map((m) => { - const fromObj = m.from || {}; - const isSelf = typeof fromObj === 'object' ? fromObj.uid !== friend.uid : false; - return { - from: isSelf ? '我' : (typeof fromObj === 'object' ? fromObj.name : friend.name), - type: TYPE_MAP[m.type] || '其他(' + m.type + ')', - text: m.text || m.body?.text || '', - time: m.time ? new Date(m.time).toLocaleString('zh-CN') : '', - }; - }); + const bossResult = await findFriendByUid(page, uid, { allowNonZero: true }); + if (bossResult?.friend) { + return await bossChatMsg(page, normalizedKwargs, bossResult.friend); + } + // Not found or identity mismatch — check for hard errors before falling back + if (bossResult?.code && bossResult.code !== 0 && bossResult.code !== IDENTITY_MISMATCH_CODE) { + assertOk(bossResult); + } + // Fall back to geek side + await navigateToGeekChat(page); + const encryptSystemId = await readEncryptSystemId(page); + const geekFriend = await findGeekFriendByUid(page, uid, { encryptSystemId }); + if (!geekFriend) throw new EmptyResultError('boss chatmsg', 'uid 在招聘端与求职端聊天列表中均未找到'); + if (!geekFriend.securityId) throw new CommandExecutionError('该聊天缺少 securityId,无法获取历史消息'); + const messages = await fetchGeekHistoryMsg(page, geekFriend, { page: pageNum }); + return messages.map((m) => mapGeekMsg(m, geekFriend)); }, }); diff --git a/clis/boss/chatmsg.test.js b/clis/boss/chatmsg.test.js new file mode 100644 index 000000000..bf4c0b38b --- /dev/null +++ b/clis/boss/chatmsg.test.js @@ -0,0 +1,230 @@ +import { describe, expect, it, vi } from 'vitest'; +import { getRegistry } from '@jackwener/opencli/registry'; +import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import './chatmsg.js'; + +const BOSS_FRIEND = { + uid: 12345, + encryptUid: 'enc-boss-uid', + securityId: 'boss-sec-id', + name: '候选人甲', +}; +const BOSS_MSGS = [ + { type: 1, text: 'Hello', from: { uid: 99999, name: 'HR' }, time: 1704067200000 }, + { type: 1, text: '感谢', from: { uid: 12345, name: '候选人甲' }, time: 1704067201000 }, +]; + +const GEEK_FRIEND_LABEL = { + friendId: 11111, + encryptFriendId: 'enc-geek-uid', + name: 'Boss张', + brandName: '公司A', +}; +const GEEK_FRIEND_ENRICHED = { + friendId: 11111, + uid: 67890, + encryptUid: 'enc-geek-uid', + securityId: 'geek-sec-id', + name: 'Boss张', +}; +const GEEK_MSGS = [ + { type: 1, text: '欢迎投递', received: true, time: 1704067200000, from: { uid: 67890, name: 'Boss张' } }, + { type: 1, text: '谢谢', received: true, time: 1704067201000, from: { uid: 99999, name: '我' } }, +]; + +function createPageMock(evaluateImpl) { + return { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockImplementation(evaluateImpl), + }; +} + +describe('boss chatmsg', () => { + const command = getRegistry().get('boss/chatmsg'); + + it('rejects empty uid before navigating', async () => { + const page = createPageMock(async () => ({})); + await expect( + command.func(page, { uid: ' ', page: 1, side: 'geek' }) + ).rejects.toBeInstanceOf(ArgumentError); + expect(page.goto).not.toHaveBeenCalled(); + }); + + it('rejects invalid --page before navigating', async () => { + const page = createPageMock(async () => ({})); + await expect( + command.func(page, { uid: 'enc-geek-uid', page: 0, side: 'geek' }) + ).rejects.toBeInstanceOf(ArgumentError); + expect(page.goto).not.toHaveBeenCalled(); + }); + + it('--side boss preserves existing behavior', async () => { + const page = createPageMock(async (script) => { + if (script.includes('getBossFriendListV2')) { + return { code: 0, zpData: { friendList: [BOSS_FRIEND] } }; + } + if (script.includes('boss/historyMsg')) { + return { code: 0, zpData: { messages: BOSS_MSGS } }; + } + return {}; + }); + const rows = await command.func(page, { uid: 'enc-boss-uid', page: 1, side: 'boss' }); + expect(page.goto).toHaveBeenCalledWith(expect.stringContaining('/web/chat/index')); + expect(rows).toHaveLength(2); + expect(rows[0].from).toBe('我'); + expect(rows[1].from).toBe('候选人甲'); + }); + + it('--side geek calls historyMsg with bossId, securityId, page, c=20, src=0', async () => { + const page = createPageMock(async (script) => { + if (script.includes('document.cookie')) return 'test-enc-sys-id'; + if (script.includes('geekFilterByLabel')) { + return { code: 0, zpData: { friendList: [GEEK_FRIEND_LABEL] } }; + } + if (script.includes('getGeekFriendList.json')) { + return { code: 0, zpData: { result: [GEEK_FRIEND_ENRICHED] } }; + } + if (script.includes('geek/historyMsg')) { + return { code: 0, zpData: { messages: GEEK_MSGS } }; + } + return {}; + }); + await command.func(page, { uid: 'enc-geek-uid', page: 1, side: 'geek' }); + const historyScript = page.evaluate.mock.calls.find((c) => c[0].includes('geek/historyMsg'))?.[0]; + expect(historyScript).toBeDefined(); + expect(historyScript).toContain('bossId=67890'); + expect(historyScript).toContain('securityId='); + expect(historyScript).toContain('page=1'); + expect(historyScript).toContain('c=20'); + expect(historyScript).toContain('src=0'); + }); + + it('--side geek uses from.uid to determine direction, not received flag', async () => { + // Both messages have received:true (mirrors real geek historyMsg API behaviour) + // Direction is determined by whether m.from.uid matches the boss's uid (67890) + const msgsAllReceived = [ + { type: 1, text: '欢迎投递', received: true, time: 1704067200000, from: { uid: 67890, name: 'Boss张' } }, + { type: 1, text: '谢谢', received: true, time: 1704067201000, from: { uid: 99999, name: '我' } }, + ]; + const page = createPageMock(async (script) => { + if (script.includes('document.cookie')) return 'test-enc-sys-id'; + if (script.includes('geekFilterByLabel')) { + return { code: 0, zpData: { friendList: [GEEK_FRIEND_LABEL] } }; + } + if (script.includes('getGeekFriendList.json')) { + return { code: 0, zpData: { result: [GEEK_FRIEND_ENRICHED] } }; + } + if (script.includes('geek/historyMsg')) { + return { code: 0, zpData: { messages: msgsAllReceived } }; + } + return {}; + }); + const rows = await command.func(page, { uid: 'enc-geek-uid', page: 1, side: 'geek' }); + // from.uid=67890 matches friend.uid=67890 → boss sent it → '对方' + expect(rows[0].from).toBe('对方'); + // from.uid=99999 does not match → geek sent it → '我' + expect(rows[1].from).toBe('我'); + }); + + it('non-text message body does not crash and produces truncated JSON', async () => { + const nonTextMsg = { type: 99, received: true, time: 1704067200000, body: { action: 'resume_request', detail: 'X' } }; + const page = createPageMock(async (script) => { + if (script.includes('document.cookie')) return 'test-enc-sys-id'; + if (script.includes('geekFilterByLabel')) { + return { code: 0, zpData: { friendList: [GEEK_FRIEND_LABEL] } }; + } + if (script.includes('getGeekFriendList.json')) { + return { code: 0, zpData: { result: [GEEK_FRIEND_ENRICHED] } }; + } + if (script.includes('geek/historyMsg')) { + return { code: 0, zpData: { messages: [nonTextMsg] } }; + } + return {}; + }); + const rows = await command.func(page, { uid: 'enc-geek-uid', page: 1, side: 'geek' }); + expect(rows).toHaveLength(1); + expect(rows[0].text).toContain('resume_request'); + }); + + it('--side auto falls back to geek when recruiter returns code 24', async () => { + const page = createPageMock(async (script) => { + if (script.includes('getBossFriendListV2')) { + return { code: 24, message: '请切换身份后再试' }; + } + if (script.includes('document.cookie')) return 'test-enc-sys-id'; + if (script.includes('geekFilterByLabel')) { + return { code: 0, zpData: { friendList: [GEEK_FRIEND_LABEL] } }; + } + if (script.includes('getGeekFriendList.json')) { + return { code: 0, zpData: { result: [GEEK_FRIEND_ENRICHED] } }; + } + if (script.includes('geek/historyMsg')) { + return { code: 0, zpData: { messages: GEEK_MSGS } }; + } + return {}; + }); + const rows = await command.func(page, { uid: 'enc-geek-uid', page: 1, side: 'auto' }); + expect(rows).toHaveLength(2); + expect(rows[0].from).toBe('对方'); + }); + + it('--side geek throws when uid is not found in geek chat list', async () => { + const page = createPageMock(async (script) => { + if (script.includes('document.cookie')) return 'test-enc-sys-id'; + if (script.includes('geekFilterByLabel')) { + return { code: 0, zpData: { friendList: [] } }; + } + return {}; + }); + await expect( + command.func(page, { uid: 'unknown-uid', page: 1, side: 'geek' }) + ).rejects.toBeInstanceOf(EmptyResultError); + }); + + it('--side boss maps expired cookies to AuthRequiredError', async () => { + const page = createPageMock(async (script) => { + if (script.includes('getBossFriendListV2')) { + return { code: 7, message: 'Cookie 已过期' }; + } + return {}; + }); + await expect( + command.func(page, { uid: 'enc-boss-uid', page: 1, side: 'boss' }) + ).rejects.toBeInstanceOf(AuthRequiredError); + }); + + it('--side boss treats missing history list as parser drift', async () => { + const page = createPageMock(async (script) => { + if (script.includes('getBossFriendListV2')) { + return { code: 0, zpData: { friendList: [BOSS_FRIEND] } }; + } + if (script.includes('boss/historyMsg')) { + return { code: 0, zpData: {} }; + } + return {}; + }); + await expect( + command.func(page, { uid: 'enc-boss-uid', page: 1, side: 'boss' }) + ).rejects.toBeInstanceOf(CommandExecutionError); + }); + + it('--side geek reports an empty history as EmptyResultError', async () => { + const page = createPageMock(async (script) => { + if (script.includes('document.cookie')) return 'test-enc-sys-id'; + if (script.includes('geekFilterByLabel')) { + return { code: 0, zpData: { friendList: [GEEK_FRIEND_LABEL] } }; + } + if (script.includes('getGeekFriendList.json')) { + return { code: 0, zpData: { result: [GEEK_FRIEND_ENRICHED] } }; + } + if (script.includes('geek/historyMsg')) { + return { code: 0, zpData: { messages: [] } }; + } + return {}; + }); + await expect( + command.func(page, { uid: 'enc-geek-uid', page: 1, side: 'geek' }) + ).rejects.toBeInstanceOf(EmptyResultError); + }); +}); diff --git a/clis/boss/utils.js b/clis/boss/utils.js index 92404ac9c..a49553e75 100644 --- a/clis/boss/utils.js +++ b/clis/boss/utils.js @@ -1,3 +1,5 @@ +import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; + // ── Constants ─────────────────────────────────────────────────────────────── const BOSS_DOMAIN = 'www.zhipin.com'; const CHAT_URL = `https://${BOSS_DOMAIN}/web/chat/index`; @@ -10,7 +12,24 @@ const DEFAULT_TIMEOUT = 15_000; */ export function requirePage(page) { if (!page) - throw new Error('Browser page required'); + throw new CommandExecutionError('Browser page required'); +} +export function readPositiveInteger(raw, name, fallback, max) { + const value = raw === undefined || raw === null || raw === '' ? fallback : Number(raw); + if (!Number.isInteger(value) || value < 1) { + throw new ArgumentError(`boss ${name} must be a positive integer`); + } + if (max !== undefined && value > max) { + throw new ArgumentError(`boss ${name} must be <= ${max}`); + } + return value; +} +export function readRequiredString(raw, name) { + const value = String(raw ?? '').trim(); + if (!value) { + throw new ArgumentError(`boss ${name} cannot be empty`); + } + return value; } /** * Navigate to BOSS chat page and wait for it to settle. @@ -33,7 +52,7 @@ export async function navigateTo(page, url, waitSeconds = 1) { */ export function checkAuth(data) { if (COOKIE_EXPIRED_CODES.has(data.code)) { - throw new Error(COOKIE_EXPIRED_MSG); + throw new AuthRequiredError(BOSS_DOMAIN, COOKIE_EXPIRED_MSG); } } /** @@ -41,11 +60,14 @@ export function checkAuth(data) { * Checks for cookie expiry first, then throws with the provided message. */ export function assertOk(data, errorPrefix) { + if (!data || typeof data !== 'object') { + throw new CommandExecutionError(`${errorPrefix ? `${errorPrefix}: ` : ''}Boss API returned malformed response`); + } if (data.code === 0) return; checkAuth(data); const prefix = errorPrefix ? `${errorPrefix}: ` : ''; - throw new Error(`${prefix}${data.message || 'Unknown error'} (code=${data.code})`); + throw new CommandExecutionError(`${prefix}${data.message || 'Unknown error'} (code=${data.code})`); } /** * Make a credentialed XHR request via page.evaluate(). @@ -80,7 +102,19 @@ export async function bossFetch(page, url, opts = {}) { }); } `; - const data = await page.evaluate(script); + let data; + try { + data = await page.evaluate(script); + } catch (error) { + if (error instanceof AuthRequiredError || error instanceof CommandExecutionError) { + throw error; + } + const message = error instanceof Error ? error.message : String(error); + throw new CommandExecutionError(`Boss API request failed: ${message}`); + } + if (!data || typeof data !== 'object') { + throw new CommandExecutionError('Boss API returned malformed response'); + } // Auto-check auth unless caller opts out if (!opts.allowNonZero && data.code !== 0) { assertOk(data); @@ -95,8 +129,13 @@ export async function fetchFriendList(page, opts = {}) { const pageNum = opts.pageNum ?? 1; const jobId = opts.jobId ?? '0'; const url = `https://${BOSS_DOMAIN}/wapi/zprelation/friend/getBossFriendListV2.json?page=${pageNum}&status=0&jobId=${jobId}`; - const data = await bossFetch(page, url); - return data.zpData?.friendList || []; + const data = await bossFetch(page, url, { allowNonZero: opts.allowNonZero }); + if (opts.allowNonZero && data.code !== 0) return data; + const list = data.zpData?.friendList; + if (!Array.isArray(list)) { + throw new CommandExecutionError('Boss friend list response did not include zpData.friendList'); + } + return list; } /** * Fetch the recommended candidates (greetRecSortList). @@ -104,7 +143,11 @@ export async function fetchFriendList(page, opts = {}) { export async function fetchRecommendList(page) { const url = `https://${BOSS_DOMAIN}/wapi/zprelation/friend/greetRecSortList`; const data = await bossFetch(page, url); - return data.zpData?.friendList || []; + const list = data.zpData?.friendList; + if (!Array.isArray(list)) { + throw new CommandExecutionError('Boss recommend response did not include zpData.friendList'); + } + return list; } /** * Find a friend by encryptUid, searching through friend list and optionally greet list. @@ -115,10 +158,14 @@ export async function findFriendByUid(page, encryptUid, opts = {}) { const checkGreetList = opts.checkGreetList ?? false; // Search friend list pages for (let p = 1; p <= maxPages; p++) { - const friends = await fetchFriendList(page, { pageNum: p }); + const result = await fetchFriendList(page, { pageNum: p, allowNonZero: opts.allowNonZero }); + if (opts.allowNonZero && !Array.isArray(result)) { + return { friend: null, code: result.code }; + } + const friends = Array.isArray(result) ? result : []; const found = friends.find((f) => f.encryptUid === encryptUid); if (found) - return found; + return opts.allowNonZero ? { friend: found, code: 0 } : found; if (friends.length === 0) break; } @@ -127,9 +174,9 @@ export async function findFriendByUid(page, encryptUid, opts = {}) { const greetList = await fetchRecommendList(page); const found = greetList.find((f) => f.encryptUid === encryptUid); if (found) - return found; + return opts.allowNonZero ? { friend: found, code: 0 } : found; } - return null; + return opts.allowNonZero ? { friend: null, code: 0 } : null; } // ── UI automation helpers ─────────────────────────────────────────────────── /** @@ -221,3 +268,185 @@ export function verbose(msg) { console.error(`[opencli:boss] ${msg}`); } } +// ── Geek-side helpers ──────────────────────────────────────────────────────── +export const IDENTITY_MISMATCH_CODE = 24; +const GEEK_CHAT_URL = `https://${BOSS_DOMAIN}/web/geek/chat`; +/** + * Navigate to the job-seeker chat page. + * Establishes the cookie + JS-global context needed for geek-side API calls. + */ +export async function navigateToGeekChat(page, waitSeconds = 2) { + await page.goto(GEEK_CHAT_URL); + await page.wait({ time: waitSeconds }); +} +/** + * Read the encryptSystemId value required by the geek-side list API. + * Strategy (in order): + * 1. Vue app state / Pinia stores / $route.query (Option 1 — runtime source) + * 2. performance.getEntriesByType('resource') — parse from geekFilterByLabel URL + * that the page itself already issued (Option 2 — most deterministic) + * 3. cookie, inline