From 4bd68893ea35d3aca463a2b33db3b90400bb28cc Mon Sep 17 00:00:00 2001 From: sqllocks <29076762+sqllocks@users.noreply.github.com> Date: Mon, 29 Jun 2026 18:55:57 -0400 Subject: [PATCH 1/5] Windows: run the computer-use helper off-darwin (integration + transport) Relax the darwin-only guards in computer-use-helper.ts (ensure/available/ relaunchForGrant) and add win32 install()/launch() that spawn the native blitz-cu-helper.exe with the socket path (no LaunchServices, no TCC). Add bundledHelperExe()/installedHelperExe() and a winProc handle killed on shutdown. Transport fix: node:net IPC is a NAMED PIPE on Windows, not AF_UNIX (WINDOWS-INTEGRATION.md section 4 was wrong; verified: a filesystem path gives listen EACCES). sockPath is now \\.\pipe\blitzcu- on win32; the helper picks pipe-vs-AF_UNIX from the path shape. JSON framing stays byte-identical. index.ts: hoist droppedBrowser() once and force it undefined off-darwin so every Windows drop is a window-connect (HWND to connectionConnectWindow), never the Apple-Events tab path. connection-window-link.ts: send windowId alongside pid on ax_read/ax_act/activate so Windows resolves the exact dropped window via UIA FromHandle (pid to MainWindowHandle is wrong for UWP/WinUI apps). macOS reads pid and ignores it. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/main/computer-use-helper.ts | 69 ++++++++++++++++++++++++++++-- src/main/connection-window-link.ts | 10 +++-- src/main/index.ts | 9 +++- 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/src/main/computer-use-helper.ts b/src/main/computer-use-helper.ts index db0ee85e..1889c7ad 100644 --- a/src/main/computer-use-helper.ts +++ b/src/main/computer-use-helper.ts @@ -12,7 +12,7 @@ import { app } from 'electron' import net from 'node:net' -import { execFile } from 'node:child_process' +import { execFile, spawn } from 'node:child_process' import { existsSync, mkdirSync, readFileSync, rmSync } from 'node:fs' import { join } from 'node:path' import { fileURLToPath } from 'node:url' @@ -71,6 +71,35 @@ function installedHelperApp(): string { return join(app.getPath('appData'), 'BlitzOS', 'BlitzOS Automation.app') } +// Windows analog of bundledHelperApp(): the single self-contained .NET exe shipped via electron-builder +// extraResources (packaged -> resourcesPath; dev -> native/computer-use-helper/build, mirroring the Mac +// .app location). Same robust candidate walk; overridable with BLITZ_COMPUTER_USE_EXE. +function bundledHelperExe(): string { + const rel = ['native', 'computer-use-helper', 'build', 'blitz-cu-helper.exe'] + const here = (() => { + try { + return typeof __dirname !== 'undefined' ? __dirname : fileURLToPath(new URL('.', import.meta.url)) + } catch { + return '' + } + })() + const candidates = [ + process.env.BLITZ_COMPUTER_USE_EXE, + app.isPackaged ? join(process.resourcesPath, 'blitz-cu-helper.exe') : null, + join(app.getAppPath(), ...rel), + here ? join(here, '..', '..', ...rel) : null, // out/main -> repo root in dev + !app.isPackaged ? join(process.cwd(), ...rel) : null + ].filter((p): p is string => !!p) + for (const c of candidates) if (existsSync(c)) return c + return candidates[candidates.length - 1] ?? join(app.getAppPath(), ...rel) +} + +// Windows has no TCC, so there is no stable-install copy dance (the Mac copy exists only so a GRANTED +// bundle keeps its identity across updates). Launch the packaged exe in place. +function installedHelperExe(): string { + return bundledHelperExe() +} + // Older helper bundle names we may have installed before the rename to "BlitzOS Automation.app" // (BlitzOS.app collided with the main app; BlitzComputerUse.app was the original). Removed after a // successful install so a renamed upgrade leaves no orphan bundle behind. @@ -95,7 +124,13 @@ const exec = (cmd: string, args: string[]): Promise<{ ok: boolean; stdout: strin class HelperManager { private server: net.Server | null = null private sock: net.Socket | null = null - private sockPath = join(tmpdir(), `blitzcu-${process.pid}.sock`) + // node:net IPC is a Unix domain socket on macOS but a NAMED PIPE on Windows (paths must live under + // \\.\pipe\; a plain filesystem path gives listen EACCES). The helper reads this exact string as argv[0] + // and selects AF_UNIX vs named-pipe from its shape, so the newline-JSON contract is byte-identical. + private sockPath = + process.platform === 'win32' + ? `\\\\.\\pipe\\blitzcu-${process.pid}` + : join(tmpdir(), `blitzcu-${process.pid}.sock`) private buf = '' private pending = new Map) => void>() private scanProgress = new Map void>() @@ -106,10 +141,13 @@ class HelperManager { private connectWaiters: Array<() => void> = [] private supervise = false private ensuring: Promise<{ ok: boolean; error?: string }> | null = null // single-flight ensure() + private winProc: ReturnType | null = null // win32 child handle (mac uses LaunchServices, no handle) /** Copy the signed bundle to the stable install location if missing or version-changed. cp -R * (not fs.cp) preserves the code signature + symlinks the signature depends on. */ private async install(): Promise { + // Windows ships the exe in resources and launches it in place; nothing to install at runtime. + if (process.platform === 'win32') return existsSync(bundledHelperExe()) const src = bundledHelperApp() if (!existsSync(src)) return false const dst = installedHelperApp() @@ -212,6 +250,20 @@ class HelperManager { /** LaunchServices launch (own TCC identity). `-n` forces a fresh instance (used by relaunch). */ private async launch(): Promise { + if (process.platform === 'win32') { + // No LaunchServices / TCC identity on Windows: just spawn the exe with the pipe path as argv[0]. + // Supervision still rides the socket exactly like macOS (onClose -> ensure() respawn), so no exit + // handler is needed here; the handle is kept only so shutdown() can hard-kill an orphan. + const exe = installedHelperExe() + if (!existsSync(exe)) return false + try { + this.winProc = spawn(exe, [this.sockPath], { stdio: 'ignore', windowsHide: true }) + this.winProc.on('error', () => {}) + return true + } catch { + return false + } + } const appPath = installedHelperApp() if (!existsSync(appPath)) return false const r = await exec('/usr/bin/open', ['-n', appPath, '--args', '--connect', this.sockPath]) @@ -289,7 +341,8 @@ class HelperManager { * concurrent callers (e.g. the prewarm + a step) share one in-flight ensure, so two installs never * race on the same dst (one rm -rf while the other cp -R, which produced a spurious "not found"). */ ensure(): Promise<{ ok: boolean; error?: string }> { - if (process.platform !== 'darwin') return Promise.resolve({ ok: false, error: 'macOS only' }) + if (process.platform !== 'darwin' && process.platform !== 'win32') + return Promise.resolve({ ok: false, error: 'unsupported platform' }) if (this.hello) return Promise.resolve({ ok: true }) if (this.ensuring) return this.ensuring this.ensuring = (async () => { @@ -307,6 +360,7 @@ class HelperManager { } available(): boolean { + if (process.platform === 'win32') return existsSync(bundledHelperExe()) return process.platform === 'darwin' && existsSync(bundledHelperApp()) } @@ -386,6 +440,8 @@ class HelperManager { /** THE insight: quit + relaunch the HELPER so a just-granted permission takes effect, leaving * BlitzOS running. Returns once the fresh helper has reconnected. */ async relaunchForGrant(): Promise<{ ok: boolean }> { + // No TCC on Windows, so there is no just-granted permission to apply by relaunching; report liveness. + if (process.platform === 'win32') return { ok: this.connected() } if (process.platform !== 'darwin') return { ok: false } this.wantQuit = true if (this.sock) await this.rpc('quit', 3000) @@ -413,6 +469,13 @@ class HelperManager { } catch { /* gone */ } + try { + // Windows child: it exits on its own when the pipe closes (EOF) + the quit above, but hard-kill the + // handle so a wedged helper can never outlive BlitzOS. No-op on macOS (LaunchServices, no handle). + this.winProc?.kill() + } catch { + /* gone */ + } try { rmSync(this.sockPath, { force: true }) } catch { diff --git a/src/main/connection-window-link.ts b/src/main/connection-window-link.ts index b33e79a1..37f9f9b4 100644 --- a/src/main/connection-window-link.ts +++ b/src/main/connection-window-link.ts @@ -81,7 +81,11 @@ export function makeWindowLink({ connectionOps, helper }: { connectionOps: Conne const a = args || {} if (verb === 'read') { if (a.screenshot) return helper.call('window_screenshot', { windowId: Number(windowId) }, 15000) - const r = await helper.call('ax_read', { pid, maxDepth: a.maxDepth ?? 12, limit: a.max ?? 600 }) + // Send BOTH windowId and pid. The macOS helper reads `pid` (app-level AX) and ignores windowId; + // the Windows helper PREFERS windowId so it resolves the EXACT dropped window via UIA FromHandle. + // pid -> MainWindowHandle is only approximate on Windows and is wrong for UWP/WinUI apps (whose + // visible window is owned by ApplicationFrameHost.exe, a different pid). WINDOWS-INTEGRATION.md §3. + const r = await helper.call('ax_read', { pid, windowId: Number(windowId), maxDepth: a.maxDepth ?? 12, limit: a.max ?? 600 }) return r.error ? r : { result: r.tree } } if (verb === 'act') { @@ -97,9 +101,9 @@ export function makeWindowLink({ connectionOps, helper }: { connectionOps: Conne if (a.x != null || a.px != null) return helper.call('cg_click', { windowId: Number(windowId), x: a.x, y: a.y, px: a.px, py: a.py, button: a.button }) // ref act (background-capable): AX press / setValue on a role+title match const find = (a.find as Record) || { role: a.role, title: a.title ?? a.selector } - return helper.call('ax_act', { pid, find, action: a.action === 'set' ? 'setValue' : 'press', value: a.text }) + return helper.call('ax_act', { pid, windowId: Number(windowId), find, action: a.action === 'set' ? 'setValue' : 'press', value: a.text }) } - if (verb === 'reveal') return helper.call('activate', { pid }) + if (verb === 'reveal') return helper.call('activate', { pid, windowId: Number(windowId) }) return { error: `verb "${verb}" is not supported for a window connection` } } } diff --git a/src/main/index.ts b/src/main/index.ts index b7dff3c1..e3e253af 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -1804,6 +1804,11 @@ app.whenReady().then(() => { const bounds = { x: Number(m.x) || 0, y: Number(m.y) || 0, w: Number(m.w) || 0, h: Number(m.h) || 0 } const app = String(m.app || '') const bundleId = String(m.bundleId || '') + // Windows has no bundleIds and no Apple-Events tab bridge, so a browser drop must NOT take the tab path + // there: every Windows drop is a WINDOW connect driven by the helper's UIA (HWND). Browser-TAB attach on + // Windows belongs in the CDP layer BlitzOS already has (a follow-up), never here. Hoisted once so finish() + // and the async IIFE below share ONE value instead of recomputing it twice. + const browser = process.platform === 'darwin' ? droppedBrowser(bundleId, app) : undefined // Show the dropped app's icon in the dropbox INSTANTLY (optimistic), before the async tab-resolve + connect // (a Chrome bounds-match can take a beat). The `connected` event below firms up the real connId. mainWindow?.webContents.send('os:pick-event', { kind: 'dropped', windowId: m.windowId, app, icon: m.icon, title: String(m.title || '') }) @@ -1813,7 +1818,7 @@ app.whenReady().then(() => { // P0: a FAILED drop carries the permission it needs (Accessibility for a window, control- for a // browser tab) so the dropbox can show the inline grant screen (Give permission / Don't) instead of just a // red error. permissionFromError maps the raw failure; grantForConnection is the up-front fallback. - const browser = droppedBrowser(bundleId, app) + // (browser is hoisted above and is always undefined off-darwin, so Windows shows a window grant.) const permission = extra.permission !== undefined ? extra.permission @@ -1826,7 +1831,7 @@ app.whenReady().then(() => { // A browser drop (Chrome/Safari) connects a TAB (Apple Events) so the agent gets the real page. If the // browser's Automation isn't granted, show the inline grant card in the dropbox — NEVER silently fall back to // a window connect (the confusing bug the user hit). A plain app connects as a window via the helper (AX). - const browser = droppedBrowser(bundleId, app) + // browser is hoisted above the IIFE (undefined off-darwin), so Windows always window-connects below. let res: { error?: string; connId?: string } | undefined let action: 'tab' | 'window' | 'grant' = 'window' if (browser) { From 460733540b7adeb5d57302c3183b378cc99c1548 Mon Sep 17 00:00:00 2001 From: sqllocks <29076762+sqllocks@users.noreply.github.com> Date: Mon, 29 Jun 2026 18:56:13 -0400 Subject: [PATCH 2/5] Windows: package the computer-use helper (electron-builder target + build scripts) Add the C# helper project in-repo at native/computer-use-helper/win/ (Program.cs, Picker.cs, app.manifest, blitz-cu-helper.csproj), the Windows analog of main.swift. build-win.ps1 publishes it to a single self-contained exe at native/computer-use-helper/build/blitz-cu-helper.exe (the .app analog). dist-win.ps1 orchestrates helper build, electron-vite build, then electron-builder --win, mirroring dist-mac.sh; wired as npm run dist:win. electron-builder.yml: win nsis target (x64) + icon + extraResources shipping the exe to resourcesPath; perMachine install into Program Files (the trusted path the future uiAccess="true" + Authenticode signing needs to drive elevated windows). gitignore the helper build outputs and the .NET intermediates. Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitignore | 6 + electron-builder.yml | 21 + native/computer-use-helper/build-win.ps1 | 19 + native/computer-use-helper/win/Picker.cs | 481 ++++++++++++++ native/computer-use-helper/win/Program.cs | 607 ++++++++++++++++++ native/computer-use-helper/win/app.manifest | 47 ++ .../win/blitz-cu-helper.csproj | 29 + package.json | 1 + scripts/dist-win.ps1 | 34 + 9 files changed, 1245 insertions(+) create mode 100644 native/computer-use-helper/build-win.ps1 create mode 100644 native/computer-use-helper/win/Picker.cs create mode 100644 native/computer-use-helper/win/Program.cs create mode 100644 native/computer-use-helper/win/app.manifest create mode 100644 native/computer-use-helper/win/blitz-cu-helper.csproj create mode 100644 scripts/dist-win.ps1 diff --git a/.gitignore b/.gitignore index c1c18f83..d9291bbc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,12 @@ node_modules out/ dist/ +# native helper build outputs (Mac CU helper .app + Win blitz-cu-helper.exe) are built on demand by +# dist-mac.sh / dist-win.ps1 and bundled via extraResources; never commit the large binaries or the +# .NET intermediates from the Windows helper project. +native/computer-use-helper/build/ +native/computer-use-helper/win/bin/ +native/computer-use-helper/win/obj/ # …but the VENDORED agent-socket SDK's dist must be IN the repo (CI builds from a clean checkout) !vendor/agent-socket-sdk/dist/ *.log diff --git a/electron-builder.yml b/electron-builder.yml index 7ad80924..20a23a5f 100644 --- a/electron-builder.yml +++ b/electron-builder.yml @@ -75,4 +75,25 @@ mac: extendInfo: # the scan reads user data (Branch B/A) and the app drives local automation NSAppleEventsUsageDescription: BlitzOS automates apps on your behalf. +win: + # The Windows Computer Use helper ships as ONE self-contained .NET single-file exe (the analog of the + # Mac "BlitzOS Automation.app"), built by native/computer-use-helper/build-win.ps1 to + # native/computer-use-helper/build/blitz-cu-helper.exe. computer-use-helper.ts resolves it at + # process.resourcesPath/blitz-cu-helper.exe (installedHelperExe) and spawns it with the named-pipe + # socket path; Windows has no TCC, so there is no install-to-AppData step. Build everything with + # `npm run dist:win` (scripts/dist-win.ps1), which builds the helper first then packages. + icon: src/renderer/src/assets/blitz-dock-icon.png # 1024x1024; electron-builder generates the .ico + target: + - target: nsis + arch: [x64] + extraResources: + - from: native/computer-use-helper/build/blitz-cu-helper.exe + to: blitz-cu-helper.exe +nsis: + # perMachine installs into Program Files, the trusted path required before app.manifest uiAccess="true" + # (+ Authenticode signing) can let the helper drive ELEVATED windows. oneClick:false shows the standard + # wizard so the install elevation prompt is explicit. + oneClick: false + perMachine: true + allowToChangeInstallationDirectory: true npmRebuild: false diff --git a/native/computer-use-helper/build-win.ps1 b/native/computer-use-helper/build-win.ps1 new file mode 100644 index 00000000..af4a2c50 --- /dev/null +++ b/native/computer-use-helper/build-win.ps1 @@ -0,0 +1,19 @@ +# Build the Windows Computer Use helper: ONE self-contained single-file exe, the analog of build.sh +# (which builds the signed BlitzOS Automation.app on macOS). Output: +# native/computer-use-helper/build/blitz-cu-helper.exe +# electron-builder.yml (win.extraResources) ships it to process.resourcesPath; computer-use-helper.ts +# installedHelperExe() resolves it there. Windows has NO TCC, so no signing is required to RUN the helper; +# Authenticode signing is only needed later to flip app.manifest uiAccess="true" (drive elevated windows). +$ErrorActionPreference = 'Stop' +$here = $PSScriptRoot # native/computer-use-helper +$proj = Join-Path $here 'win\blitz-cu-helper.csproj' +$out = Join-Path $here 'build' + +Write-Host "[cu-helper] dotnet publish -> $out" +dotnet publish $proj -c Release -r win-x64 --self-contained -p:PublishSingleFile=true -o $out +if ($LASTEXITCODE -ne 0) { throw "[cu-helper] dotnet publish failed ($LASTEXITCODE)" } + +$exe = Join-Path $out 'blitz-cu-helper.exe' +if (-not (Test-Path $exe)) { throw "[cu-helper] publish produced no exe at $exe" } +$mb = [math]::Round((Get-Item $exe).Length / 1MB, 1) +Write-Host "[cu-helper] built -> $exe ($mb MB)" diff --git a/native/computer-use-helper/win/Picker.cs b/native/computer-use-helper/win/Picker.cs new file mode 100644 index 00000000..1230a778 --- /dev/null +++ b/native/computer-use-helper/win/Picker.cs @@ -0,0 +1,481 @@ +// Drag-to-attach picker (replaces the Mac CGEventTap + NSPanel highlight). +// +// Mac parity: arm a global pointer watch, hit-test the front normal window under the cursor (skipping +// BlitzOS's island via selfRect, our own overlays, and excludePids), glow-highlight it, and emit: +// pick_hover {windowId,pid,app,title} on a new hovered window +// pick_over {inside} when the drag crosses the drop-zone boundary +// pick_drop {windowId,pid,app,title,icon} on mouse-up INSIDE the drop zone +// pick_cancel on mouse-up outside it +// All as {type:"event", kind:...} frames, matching native/computer-use-helper/main.swift. +// +// Mechanics that differ from macOS: +// * WH_MOUSE_LL is a global hook whose callback fires on the installing thread's message loop — so the +// picker owns a dedicated STA thread that installs the hook, creates the overlays, and pumps messages. +// * "Swallowing" the grab click = returning (IntPtr)1 instead of CallNextHookEx, so the OS never starts +// a window-drag or shifts focus on mouse-down. +// * Highlight is a WS_EX_LAYERED|WS_EX_TRANSPARENT (click-through) topmost window painted via +// UpdateLayeredWindow with PREMULTIPLIED alpha (the usual gotcha — GetHbitmap doesn't premultiply). +// +// Skeleton edges to polish: the glow is a simple multi-pass stroke; per-monitor DPI isn't handled +// (mix-and-match scaling will misplace the overlay); the keep-alive of the hook/wndproc delegates is +// load-bearing (GC them and the process crashes mid-gesture). + +using System.Drawing; +using System.Drawing.Drawing2D; +using System.Drawing.Imaging; +using System.Runtime.InteropServices; +using System.Text; +using System.Text.Json.Nodes; + +internal sealed class Picker +{ + private static Picker? _active; + private static readonly object _gate = new(); + + private readonly Action _emit; + private RECT _dropZone; + private RECT _selfRect; + private readonly HashSet _exclude = new(); + + private Thread? _thread; + private uint _threadId; + private IntPtr _hook; + private N.HookProc? _hookProc; // keep alive — GC here == crash + private LayeredWindow? _glow; + private LayeredWindow? _icon; + + // gesture state (all touched only on the picker thread) + private IntPtr _hoverRoot; + private IntPtr _grabbed; + private bool _dragging; + private bool _lastInside; + + private Picker(JsonObject m, Action emit) + { + _emit = emit; + _dropZone = RectFrom(m["dropZone"]); + _selfRect = RectFrom(m["selfRect"]); + if (m["excludePids"] is JsonArray pids) + foreach (var p in pids) if (p is not null) _exclude.Add((int)p); + } + + // ---- command surface (called from the main read loop) --------------------------------------- + public static JsonObject Start(JsonObject m, Action emit) + { + lock (_gate) + { + StopLocked(); + var p = new Picker(m, emit); + _active = p; + p._thread = new Thread(p.Run) { IsBackground = true, Name = "blitz-picker" }; + p._thread.SetApartmentState(ApartmentState.STA); + p._thread.Start(); + return new JsonObject { ["ok"] = true }; + } + } + + public static JsonObject Update(JsonObject m) + { + lock (_gate) { if (_active is not null) _active._dropZone = RectFrom(m["dropZone"]); } + return new JsonObject { ["ok"] = true }; + } + + public static JsonObject StopCmd() + { + lock (_gate) StopLocked(); + return new JsonObject { ["ok"] = true }; + } + + private static void StopLocked() + { + var p = _active; + _active = null; + if (p is null) return; + // Break GetMessage; the thread tears down hook + overlays after the loop exits. + for (int i = 0; i < 50 && p._threadId == 0; i++) Thread.Sleep(2); // thread may not have armed yet + if (p._threadId != 0) N.PostThreadMessage(p._threadId, N.WM_QUIT, IntPtr.Zero, IntPtr.Zero); + } + + // ---- picker thread -------------------------------------------------------------------------- + private void Run() + { + _threadId = N.GetCurrentThreadId(); + _glow = LayeredWindow.Create(); + _icon = LayeredWindow.Create(); + _hookProc = HookCallback; + _hook = N.SetWindowsHookEx(N.WH_MOUSE_LL, _hookProc, N.GetModuleHandle(null), 0); + + while (N.GetMessage(out var msg, IntPtr.Zero, 0, 0) > 0) + { + N.TranslateMessage(ref msg); + N.DispatchMessage(ref msg); + } + + if (_hook != IntPtr.Zero) N.UnhookWindowsHookEx(_hook); + _glow?.Dispose(); + _icon?.Dispose(); + _hook = IntPtr.Zero; + _hookProc = null; + } + + private IntPtr HookCallback(int code, IntPtr wParam, IntPtr lParam) + { + if (code >= 0) + { + var data = Marshal.PtrToStructure(lParam); + var pt = data.pt; + switch ((int)wParam) + { + case N.WM_MOUSEMOVE: + if (_dragging) MoveDrag(pt); else UpdateHover(pt); + break; + case N.WM_LBUTTONDOWN: + if (!_dragging && _hoverRoot != IntPtr.Zero) { BeginDrag(pt); return (IntPtr)1; } // swallow grab + break; + case N.WM_LBUTTONUP: + if (_dragging) { EndDrag(pt); return (IntPtr)1; } + break; + } + } + return N.CallNextHookEx(_hook, code, wParam, lParam); + } + + // ---- hover / hit-test ----------------------------------------------------------------------- + private void UpdateHover(POINT p) + { + var root = FrontWindowAt(p); + if (root == _hoverRoot) return; + _hoverRoot = root; + if (root == IntPtr.Zero) { _glow?.Hide(); return; } + N.GetWindowRect(root, out var r); + float scale = N.GetDpiForWindow(root) / 96f; + if (scale <= 0) scale = 1f; + _glow?.PaintGlow(r, scale); + EmitWindow("pick_hover", root); + } + + // Front NORMAL top-level window under the cursor, skipping the island chassis, our overlays, and + // excluded pids. WindowFromPoint already ignores WS_EX_TRANSPARENT windows, so our click-through + // overlays don't shadow the hit-test. + private IntPtr FrontWindowAt(POINT p) + { + if (PointIn(_selfRect, p)) return IntPtr.Zero; + var h = N.WindowFromPoint(p); + if (h == IntPtr.Zero) return IntPtr.Zero; + var root = N.GetAncestor(h, N.GA_ROOT); + if (root == IntPtr.Zero || root == _glow?.Handle || root == _icon?.Handle) return IntPtr.Zero; + if (!N.IsWindowVisible(root)) return IntPtr.Zero; + N.GetWindowThreadProcessId(root, out uint pid); + if (_exclude.Contains((int)pid)) return IntPtr.Zero; + if (!N.GetWindowRect(root, out var r)) return IntPtr.Zero; + if (r.Right - r.Left < 40 || r.Bottom - r.Top < 40) return IntPtr.Zero; // skip slivers / tooltips + return root; + } + + // ---- drag ----------------------------------------------------------------------------------- + private void BeginDrag(POINT p) + { + _dragging = true; + _grabbed = _hoverRoot; + _lastInside = PointIn(_dropZone, p); + _glow?.Hide(); + using var ico = WindowIconBitmap(_grabbed, 48); + if (ico is not null) _icon?.PaintAt(ico, p.x - 24, p.y - 24); + } + + private void MoveDrag(POINT p) + { + _icon?.MoveTo(p.x - 24, p.y - 24); + bool inside = PointIn(_dropZone, p); + if (inside != _lastInside) + { + _lastInside = inside; + _emit(Event("pick_over", new JsonObject { ["inside"] = inside })); + } + } + + private void EndDrag(POINT p) + { + _dragging = false; + _icon?.Hide(); + if (PointIn(_dropZone, p) && _grabbed != IntPtr.Zero) + { + var extra = new JsonObject(); + var b64 = WindowIconB64(_grabbed); + if (b64 is not null) extra["icon"] = b64; + EmitWindow("pick_drop", _grabbed, extra); + } + else _emit(Event("pick_cancel")); + _grabbed = IntPtr.Zero; + _hoverRoot = IntPtr.Zero; + } + + // ---- event helpers -------------------------------------------------------------------------- + private void EmitWindow(string kind, IntPtr root, JsonObject? extra = null) + { + N.GetWindowThreadProcessId(root, out uint pid); + var o = Event(kind, extra); + o["windowId"] = root.ToInt64(); + o["pid"] = (int)pid; + o["app"] = ProcName((int)pid); + o["title"] = WindowTitle(root); + _emit(o); + } + + private static JsonObject Event(string kind, JsonObject? extra = null) + { + var o = new JsonObject { ["type"] = "event", ["kind"] = kind }; + if (extra is not null) foreach (var kv in extra) o[kv.Key] = kv.Value?.DeepClone(); + return o; + } + + // ---- small utilities ------------------------------------------------------------------------ + private static bool PointIn(RECT r, POINT p) => p.x >= r.Left && p.x < r.Right && p.y >= r.Top && p.y < r.Bottom; + + private static RECT RectFrom(JsonNode? n) + { + if (n is not JsonObject o) return default; + int x = (int?)o["x"] ?? 0, y = (int?)o["y"] ?? 0, w = (int?)o["w"] ?? 0, h = (int?)o["h"] ?? 0; + return new RECT { Left = x, Top = y, Right = x + w, Bottom = y + h }; + } + + private static string ProcName(int pid) + { + try { return System.Diagnostics.Process.GetProcessById(pid).ProcessName; } catch { return ""; } + } + + private static string WindowTitle(IntPtr h) + { + int len = N.GetWindowTextLength(h); + if (len == 0) return ""; + var sb = new StringBuilder(len + 1); + N.GetWindowText(h, sb, sb.Capacity); + return sb.ToString(); + } + + private static IntPtr WindowIcon(IntPtr h) + { + N.SendMessageTimeout(h, N.WM_GETICON, (IntPtr)1 /*ICON_BIG*/, IntPtr.Zero, 0, 200, out var r); + if (r == IntPtr.Zero) r = N.GetClassLongPtr(h, N.GCLP_HICON); + if (r == IntPtr.Zero) N.SendMessageTimeout(h, N.WM_GETICON, IntPtr.Zero /*ICON_SMALL*/, IntPtr.Zero, 0, 200, out r); + return r; + } + + private static Bitmap? WindowIconBitmap(IntPtr h, int size) + { + var hic = WindowIcon(h); + if (hic == IntPtr.Zero) return null; + try + { + using var ico = Icon.FromHandle(hic); + var bmp = new Bitmap(size, size, PixelFormat.Format32bppArgb); + using var g = Graphics.FromImage(bmp); + g.InterpolationMode = InterpolationMode.HighQualityBicubic; + g.DrawIcon(ico, new Rectangle(0, 0, size, size)); + return bmp; + } + catch { return null; } + } + + private static string? WindowIconB64(IntPtr h) + { + using var bmp = WindowIconBitmap(h, 64); + if (bmp is null) return null; + using var ms = new MemoryStream(); + bmp.Save(ms, ImageFormat.Png); + return Convert.ToBase64String(ms.ToArray()); + } + + // ===== Layered click-through overlay window ====================================================== + private sealed class LayeredWindow : IDisposable + { + public IntPtr Handle { get; private set; } + private static N.WndProc? _wndProc; + private static ushort _atom; + + public static LayeredWindow Create() + { + EnsureClass(); + const uint ex = N.WS_EX_LAYERED | N.WS_EX_TRANSPARENT | N.WS_EX_TOPMOST | N.WS_EX_TOOLWINDOW | N.WS_EX_NOACTIVATE; + var h = N.CreateWindowEx(ex, new IntPtr(_atom), null, unchecked((int)N.WS_POPUP), + 0, 0, 0, 0, IntPtr.Zero, IntPtr.Zero, N.GetModuleHandle(null), IntPtr.Zero); + return new LayeredWindow { Handle = h }; + } + + private static void EnsureClass() + { + if (_atom != 0) return; + _wndProc = (h, m, w, l) => N.DefWindowProc(h, m, w, l); + var wc = new N.WNDCLASSEX + { + cbSize = Marshal.SizeOf(), + lpfnWndProc = Marshal.GetFunctionPointerForDelegate(_wndProc), + hInstance = N.GetModuleHandle(null), + lpszClassName = "BlitzPickerOverlay", + }; + _atom = N.RegisterClassEx(ref wc); + } + + // Glow = a few inset rectangle strokes with falling alpha around the target bounds. Pad + stroke + // widths scale with the target monitor's DPI so the outline reads the same on 100% and 200% displays. + public void PaintGlow(RECT target, float scale = 1f) + { + int pad = (int)MathF.Round(6 * scale); + int w = (target.Right - target.Left) + pad * 2; + int h = (target.Bottom - target.Top) + pad * 2; + if (w <= 0 || h <= 0) return; + using var bmp = new Bitmap(w, h, PixelFormat.Format32bppArgb); + using (var g = Graphics.FromImage(bmp)) + { + g.SmoothingMode = SmoothingMode.AntiAlias; + g.Clear(Color.Transparent); + var accent = Color.FromArgb(95, 205, 255); + for (int i = 5; i >= 1; i--) + { + int a = 28 + (5 - i) * 24; // outer faint -> inner bright + using var pen = new Pen(Color.FromArgb(a, accent), i * 1.6f * scale); + float o = i * scale; // inset + g.DrawRectangle(pen, o, o, w - 1 - o * 2, h - 1 - o * 2); + } + } + PaintAt(bmp, target.Left - pad, target.Top - pad); + } + + public void PaintAt(Bitmap bmp, int x, int y) + { + N.ShowWindow(Handle, N.SW_SHOWNA); + IntPtr screen = N.GetDC(IntPtr.Zero); + IntPtr mem = N.CreateCompatibleDC(screen); + IntPtr dib = MakePremultipliedDib(mem, bmp, out IntPtr old); + try + { + var size = new SIZE { cx = bmp.Width, cy = bmp.Height }; + var dst = new POINT { x = x, y = y }; + var src = new POINT { x = 0, y = 0 }; + var blend = new N.BLENDFUNCTION { BlendOp = 0, BlendFlags = 0, SourceConstantAlpha = 255, AlphaFormat = 1 }; + N.UpdateLayeredWindow(Handle, screen, ref dst, ref size, mem, ref src, 0, ref blend, N.ULW_ALPHA); + } + finally + { + N.SelectObject(mem, old); + N.DeleteObject(dib); + N.DeleteDC(mem); + N.ReleaseDC(IntPtr.Zero, screen); + } + } + + public void MoveTo(int x, int y) => + N.SetWindowPos(Handle, N.HWND_TOPMOST, x, y, 0, 0, N.SWP_NOSIZE | N.SWP_NOACTIVATE); + + public void Hide() => N.ShowWindow(Handle, N.SW_HIDE); + + // UpdateLayeredWindow demands a premultiplied-alpha 32bpp DIB. Managed Format32bppArgb is + // straight alpha, so premultiply per pixel into a top-down DIBSection. + private static unsafe IntPtr MakePremultipliedDib(IntPtr memDc, Bitmap bmp, out IntPtr old) + { + int w = bmp.Width, hgt = bmp.Height; + var bi = new N.BITMAPINFO + { + biSize = 40, biWidth = w, biHeight = -hgt, biPlanes = 1, biBitCount = 32, biCompression = 0, + }; + IntPtr dib = N.CreateDIBSection(memDc, ref bi, 0, out IntPtr bits, IntPtr.Zero, 0); + old = N.SelectObject(memDc, dib); + + var rect = new Rectangle(0, 0, w, hgt); + var ld = bmp.LockBits(rect, ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); + byte* s = (byte*)ld.Scan0; // BGRA, straight alpha + byte* d = (byte*)bits; // BGRA, premultiplied + int count = w * hgt; + for (int i = 0; i < count; i++) + { + byte a = s[3]; + d[0] = (byte)(s[0] * a / 255); + d[1] = (byte)(s[1] * a / 255); + d[2] = (byte)(s[2] * a / 255); + d[3] = a; + s += 4; d += 4; + } + bmp.UnlockBits(ld); + return dib; + } + + public void Dispose() + { + if (Handle != IntPtr.Zero) { N.DestroyWindow(Handle); Handle = IntPtr.Zero; } + } + } +} + +// ===== P/Invoke surface for the picker ============================================================== +internal static class N +{ + public const int WH_MOUSE_LL = 14; + public const int WM_MOUSEMOVE = 0x0200, WM_LBUTTONDOWN = 0x0201, WM_LBUTTONUP = 0x0202, WM_QUIT = 0x0012; + public const int WM_GETICON = 0x007F; + public const int GA_ROOT = 2, GCLP_HICON = -14; + public const uint WS_POPUP = 0x80000000; + public const uint WS_EX_LAYERED = 0x00080000, WS_EX_TRANSPARENT = 0x00000020, WS_EX_TOPMOST = 0x00000008, + WS_EX_TOOLWINDOW = 0x00000080, WS_EX_NOACTIVATE = 0x08000000; + public const int SW_HIDE = 0, SW_SHOWNA = 8; + public const uint ULW_ALPHA = 2; + public const uint SWP_NOSIZE = 0x0001, SWP_NOACTIVATE = 0x0010; + public static readonly IntPtr HWND_TOPMOST = new(-1); + + public delegate IntPtr HookProc(int code, IntPtr wParam, IntPtr lParam); + public delegate IntPtr WndProc(IntPtr hWnd, uint msg, IntPtr wParam, IntPtr lParam); + + [StructLayout(LayoutKind.Sequential)] public struct MSLLHOOKSTRUCT { public POINT pt; public uint mouseData, flags, time; public IntPtr dwExtraInfo; } + [StructLayout(LayoutKind.Sequential)] public struct BLENDFUNCTION { public byte BlendOp, BlendFlags, SourceConstantAlpha, AlphaFormat; } + [StructLayout(LayoutKind.Sequential)] + public struct BITMAPINFO { public int biSize, biWidth, biHeight; public short biPlanes, biBitCount; public int biCompression, biSizeImage, biXPelsPerMeter, biYPelsPerMeter, biClrUsed, biClrImportant; } + [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Unicode)] + public struct WNDCLASSEX + { + public int cbSize, style; public IntPtr lpfnWndProc; public int cbClsExtra, cbWndExtra; + public IntPtr hInstance, hIcon, hCursor, hbrBackground; + [MarshalAs(UnmanagedType.LPWStr)] public string? lpszMenuName; + [MarshalAs(UnmanagedType.LPWStr)] public string? lpszClassName; + public IntPtr hIconSm; + } + + [DllImport("user32.dll")] public static extern IntPtr SetWindowsHookEx(int id, HookProc fn, IntPtr hMod, uint thread); + [DllImport("user32.dll")] public static extern bool UnhookWindowsHookEx(IntPtr hHook); + [DllImport("user32.dll")] public static extern IntPtr CallNextHookEx(IntPtr hHook, int code, IntPtr w, IntPtr l); + [DllImport("user32.dll")] public static extern int GetMessage(out MSG msg, IntPtr hWnd, uint min, uint max); + [DllImport("user32.dll")] public static extern bool TranslateMessage(ref MSG msg); + [DllImport("user32.dll")] public static extern IntPtr DispatchMessage(ref MSG msg); + [DllImport("user32.dll")] public static extern bool PostThreadMessage(uint thread, int msg, IntPtr w, IntPtr l); + [DllImport("user32.dll")] public static extern IntPtr WindowFromPoint(POINT p); + [DllImport("user32.dll")] public static extern IntPtr GetAncestor(IntPtr h, int flags); + [DllImport("user32.dll")] public static extern bool IsWindowVisible(IntPtr h); + [DllImport("user32.dll")] public static extern uint GetWindowThreadProcessId(IntPtr h, out uint pid); + [DllImport("user32.dll")] public static extern bool GetWindowRect(IntPtr h, out RECT r); + [DllImport("user32.dll")] public static extern uint GetDpiForWindow(IntPtr h); + [DllImport("user32.dll")] public static extern int GetWindowTextLength(IntPtr h); + [DllImport("user32.dll", CharSet = CharSet.Unicode)] public static extern int GetWindowText(IntPtr h, StringBuilder s, int max); + [DllImport("user32.dll", CharSet = CharSet.Unicode)] public static extern ushort RegisterClassEx(ref WNDCLASSEX c); + [DllImport("user32.dll", CharSet = CharSet.Unicode)] + public static extern IntPtr CreateWindowEx(uint ex, IntPtr cls, string? name, int style, int x, int y, int w, int h, IntPtr parent, IntPtr menu, IntPtr inst, IntPtr param); + [DllImport("user32.dll")] public static extern IntPtr DefWindowProc(IntPtr h, uint msg, IntPtr w, IntPtr l); + [DllImport("user32.dll")] public static extern bool DestroyWindow(IntPtr h); + [DllImport("user32.dll")] public static extern bool ShowWindow(IntPtr h, int cmd); + [DllImport("user32.dll")] public static extern bool SetWindowPos(IntPtr h, IntPtr after, int x, int y, int cx, int cy, uint flags); + [DllImport("user32.dll")] public static extern bool UpdateLayeredWindow(IntPtr h, IntPtr dst, ref POINT pDst, ref SIZE size, IntPtr src, ref POINT pSrc, uint key, ref BLENDFUNCTION blend, uint flags); + [DllImport("user32.dll")] public static extern IntPtr GetDC(IntPtr h); + [DllImport("user32.dll")] public static extern int ReleaseDC(IntPtr h, IntPtr dc); + [DllImport("user32.dll", EntryPoint = "GetClassLongPtrW")] public static extern IntPtr GetClassLongPtr(IntPtr h, int index); + [DllImport("user32.dll", CharSet = CharSet.Unicode)] public static extern IntPtr SendMessageTimeout(IntPtr h, int msg, IntPtr w, IntPtr l, uint flags, uint ms, out IntPtr result); + [DllImport("kernel32.dll")] public static extern uint GetCurrentThreadId(); + [DllImport("kernel32.dll", CharSet = CharSet.Unicode)] public static extern IntPtr GetModuleHandle(string? name); + [DllImport("gdi32.dll")] public static extern IntPtr CreateCompatibleDC(IntPtr dc); + [DllImport("gdi32.dll")] public static extern bool DeleteDC(IntPtr dc); + [DllImport("gdi32.dll")] public static extern IntPtr SelectObject(IntPtr dc, IntPtr obj); + [DllImport("gdi32.dll")] public static extern bool DeleteObject(IntPtr obj); + [DllImport("gdi32.dll")] public static extern IntPtr CreateDIBSection(IntPtr dc, ref BITMAPINFO bmi, uint usage, out IntPtr bits, IntPtr section, uint offset); +} + +// Shared simple structs (kept here so Picker.cs compiles standalone alongside Program.cs). +[StructLayout(LayoutKind.Sequential)] internal struct RECT { public int Left, Top, Right, Bottom; } +[StructLayout(LayoutKind.Sequential)] internal struct POINT { public int x, y; } +[StructLayout(LayoutKind.Sequential)] internal struct SIZE { public int cx, cy; } +[StructLayout(LayoutKind.Sequential)] internal struct MSG { public IntPtr hwnd; public uint message; public IntPtr wParam, lParam; public uint time; public POINT pt; } diff --git a/native/computer-use-helper/win/Program.cs b/native/computer-use-helper/win/Program.cs new file mode 100644 index 00000000..4ae1263d --- /dev/null +++ b/native/computer-use-helper/win/Program.cs @@ -0,0 +1,607 @@ +// BlitzOS Windows computer-use helper (skeleton). +// +// Mirrors native/computer-use-helper/main.swift, but on the Windows primitives: +// AXUIElement tree/act -> UI Automation (FlaUI) +// CGEvent click/type/key -> SendInput (P/Invoke) +// CGWindowList -> EnumWindows + GetWindowRect +// ScreenCaptureKit -> PrintWindow/CopyFromScreen (skeleton) -> Windows.Graphics.Capture (ship) +// +// Transport is IDENTICAL to the Mac helper so computer-use-helper.ts is unchanged: +// BlitzOS LISTENS on an AF_UNIX socket; this process CONNECTS out to it on launch. +// In: {"id":,"cmd":"", ...args}\n +// Out: {"id":, ...payload}\n (payload is {"ok":true,...} | {"error":"..."} | {tcc|windows|tree|...}) +// +// Commands NOT ported (intentional): chrome_* / automation_status -> route browser ops through the +// CDP layer BlitzOS already has. tcc_* -> Windows has no TCC (returned as granted). osa/scan -> PowerShell. +// pick_* / ax_observe -> stubbed; see TODOs. + +using System.Diagnostics; +using System.Drawing; +using System.Drawing.Imaging; +using System.IO.Pipes; +using System.Net.Sockets; +using System.Runtime.InteropServices; +using System.Text; +using System.Text.Json.Nodes; +using FlaUI.Core.AutomationElements; +using FlaUI.Core.Definitions; +using FlaUI.Core.Identifiers; +using FlaUI.UIA3; + +internal static class Program +{ + private static async Task Main(string[] argv) + { + // Socket path: argv[0] or BLITZ_HELPER_SOCK (whatever computer-use-helper.ts passes on launch). + string? sockPath = argv.Length > 0 ? argv[0] : Environment.GetEnvironmentVariable("BLITZ_HELPER_SOCK"); + if (string.IsNullOrEmpty(sockPath)) { Console.Error.WriteLine("no socket path"); return 1; } + + // BlitzOS LISTENS and we CONNECT. The carrier depends on what BlitzOS's server is: Node's + // node:net uses a Windows NAMED PIPE for an IPC path on win32 (never AF_UNIX), so a path under + // \\.\pipe\ means connect a pipe client; a plain filesystem path means AF_UNIX (the Mac transport, + // and the C#-side smoke test). The newline-JSON framing layered on top is identical either way. + await using Stream transport = ConnectTransport(sockPath); + using var reader = new StreamReader(transport, Encoding.UTF8); + await using var writer = new StreamWriter(transport, new UTF8Encoding(false)) { AutoFlush = true, NewLine = "\n" }; + + // Every frame is newline-delimited JSON tagged with "type": reply | event | hello. The TS reader + // (computer-use-helper.ts) routes on msg.type and DROPS anything untyped, so the tag is mandatory. + // The picker thread also writes here, so serialize every send under a lock. + var sendLock = new object(); + void Send(JsonObject o) { lock (sendLock) { writer.WriteLine(o.ToJsonString()); } } + + // One UIA instance, one read loop. Commands run SEQUENTIALLY here, which sidesteps UIA's + // thread-affinity grief. If you parallelize, marshal every FlaUI call onto a single STA thread. + using var automation = new UIA3Automation(); + + // ensure() on the TS side blocks until it sees this hello; without it, nothing else runs. + Send(new JsonObject + { + ["type"] = "hello", + ["pid"] = Environment.ProcessId, + ["tcc"] = new JsonObject { ["accessibility"] = true, ["screen"] = true, ["automation"] = true }, + }); + + string? line; + while ((line = await reader.ReadLineAsync()) != null) + { + if (line.Length == 0) continue; + int id = 0; + string cmd = ""; + JsonObject reply; + try + { + var msg = JsonNode.Parse(line)!.AsObject(); + id = (int?)msg["id"] ?? 0; + cmd = (string?)msg["cmd"] ?? ""; + reply = Dispatch(cmd, msg, automation, Send); + } + catch (Exception e) + { + reply = new JsonObject { ["error"] = e.Message }; + } + reply["type"] = "reply"; + reply["id"] = id; + Send(reply); + if (cmd == "quit") { Observe.StopAll(); Picker.StopCmd(); break; } + } + return 0; + } + + // Pick the transport from the path shape. A \\.\pipe\ (or \\?\pipe\) path is a Windows named pipe + // (what node:net listens on for an IPC path on win32); anything else is an AF_UNIX filesystem socket. + // Both are returned as a plain Stream so the read/dispatch loop above is transport-agnostic. + private static Stream ConnectTransport(string path) + { + const string pipePrefixDot = @"\\.\pipe\"; + const string pipePrefixQ = @"\\?\pipe\"; + if (path.StartsWith(pipePrefixDot, StringComparison.OrdinalIgnoreCase) || + path.StartsWith(pipePrefixQ, StringComparison.OrdinalIgnoreCase)) + { + // NamedPipeClientStream takes a server ("." = local) + the pipe name WITHOUT the \\.\pipe\ + // prefix. BlitzOS's Node server created it via net.Server.listen("\\\\.\\pipe\\blitzcu-"). + string name = path.Substring(pipePrefixDot.Length); + var pipe = new NamedPipeClientStream(".", name, PipeDirection.InOut, PipeOptions.Asynchronous); + pipe.Connect(8000); + return pipe; + } + var sock = new Socket(AddressFamily.Unix, SocketType.Stream, ProtocolType.Unspecified); + sock.Connect(new UnixDomainSocketEndPoint(path)); + return new NetworkStream(sock, ownsSocket: true); + } + + private static JsonObject Dispatch(string cmd, JsonObject m, UIA3Automation ua, Action send) => cmd switch + { + "ping" => new() { ["pong"] = true }, + "quit" => new() { ["ok"] = true }, + + // No TCC on Windows. UIA needs no grant; input/capture are largely ungated. Report granted so + // the TS onboarding flow is satisfied. (The real gate is UIPI/integrity level — see header.) + "tcc_status" or + "request_accessibility" or + "request_screen" => new() { ["tcc"] = new JsonObject { ["accessibility"] = true, ["screen"] = true, ["automation"] = true } }, + + "list_windows" => new() { ["ok"] = true, ["windows"] = Win.ListWindows() }, + "activate" => Win.Activate(m), + + "cg_click" => Input.Click(m), + "cg_type" => Input.Type((string?)m["text"] ?? ""), + "cg_key" => Input.Key((string?)m["key"] ?? ""), + + "ax_tree" or "ax_read" => Uia.Tree(m, ua), + "ax_act" => Uia.Act(m, ua), + + "screenshot" => Cap.FullScreen(), + "window_screenshot" => Cap.Window(m), + + // Deliberately unported — see header. + "chrome_pid" or "chrome_list_tabs" or "chrome_js" or "automation_status" + => new() { ["error"] = "route browser ops through CDP, not the helper" }, + "osa" or "scan" => new() { ["error"] = "not on win32 — use PowerShell" }, + "ax_observe" => Observe.Start(m, ua, send), + "pick_start" => Picker.Start(m, send), + "pick_update" => Picker.Update(m), + "pick_stop" => Picker.StopCmd(), + + _ => new() { ["error"] = $"unknown cmd: {cmd}" }, + }; + + // ---- UIA: tree read + act (replaces AXUIElement) --------------------------------------------- + private static class Uia + { + public static JsonObject Tree(JsonObject m, UIA3Automation ua) + { + var root = Root(m, ua); + if (root is null) return new() { ["error"] = "no root element" }; + int maxDepth = (int?)m["maxDepth"] ?? 12; + int limit = (int?)m["limit"] ?? 2000; + int count = 0; + return new() { ["ok"] = true, ["tree"] = Walk(root, 0, maxDepth, ref count, limit) }; + } + + private static JsonObject Walk(AutomationElement el, int depth, int maxDepth, ref int count, int limit) + { + var node = new JsonObject(); + try + { + // FlaUI's convenience getters (el.AutomationId, el.Name, el.BoundingRectangle) THROW + // PropertyNotSupportedException when an element does not expose that property (e.g. top-level + // Win32 windows have no AutomationId). Reading via Properties.X.ValueOrDefault degrades to a + // default instead of throwing, so one unsupported property never aborts the whole subtree. + node["role"] = el.Properties.ControlType.ValueOrDefault.ToString(); + node["name"] = el.Properties.Name.ValueOrDefault ?? ""; + node["id"] = el.Properties.AutomationId.ValueOrDefault ?? ""; + var r = el.Properties.BoundingRectangle.ValueOrDefault; + node["bounds"] = new JsonObject { ["x"] = r.X, ["y"] = r.Y, ["w"] = r.Width, ["h"] = r.Height }; + } + catch (Exception e) { node["error"] = e.Message; return node; } + + if (depth >= maxDepth || count >= limit) return node; + try + { + var kids = new JsonArray(); + foreach (var c in el.FindAllChildren()) + { + if (count++ >= limit) break; + kids.Add(Walk(c, depth + 1, maxDepth, ref count, limit)); + } + if (kids.Count > 0) node["children"] = kids; + } + catch { /* stale element — emit partial */ } + return node; + } + + public static JsonObject Act(JsonObject m, UIA3Automation ua) + { + var root = Root(m, ua); + if (root is null) return new() { ["error"] = "no root element" }; + var find = m["find"]?.AsObject(); + var target = Find(root, (string?)find?["role"], (string?)find?["title"], 0, 6000); + if (target is null) return new() { ["error"] = "element not found" }; + + string action = ((string?)m["action"] ?? "").ToLowerInvariant(); + string? value = (string?)m["value"]; + try + { + switch (action) + { + case "press" or "invoke": + target.Patterns.Invoke.PatternOrDefault?.Invoke(); + break; + case "setvalue" or "set_value": + target.Patterns.Value.PatternOrDefault?.SetValue(value ?? ""); + break; + case "toggle": + target.Patterns.Toggle.PatternOrDefault?.Toggle(); + break; + case "focus": + target.Focus(); + break; + default: + return new() { ["error"] = $"unknown action: {action}" }; + } + } + catch (Exception e) { return new() { ["error"] = $"action {action} failed: {e.Message}" }; } + return new() { ["ok"] = true, ["effect"] = new JsonObject { ["action"] = action, ["target"] = target.Name ?? "" } }; + } + + // Manual BFS match (role == ControlType, title substring). Swap for ConditionFactory if you + // want native-side filtering; this stays readable and dependency-light. + private static AutomationElement? Find(AutomationElement el, string? role, string? title, int depth, int limit) + { + if (limit <= 0) return null; + try + { + bool roleOk = role is null || string.Equals(el.ControlType.ToString(), role, StringComparison.OrdinalIgnoreCase); + bool titleOk = title is null || (el.Name ?? "").Contains(title, StringComparison.OrdinalIgnoreCase); + if (depth > 0 && roleOk && titleOk) return el; + } + catch { } + try + { + foreach (var c in el.FindAllChildren()) + { + var hit = Find(c, role, title, depth + 1, limit - 1); + if (hit is not null) return hit; + } + } + catch { } + return null; + } + + private static AutomationElement? Root(JsonObject m, UIA3Automation ua) + { + // Prefer an explicit HWND; fall back to a pid's main window; else the desktop root. + if (m["windowId"] is JsonNode w && (long)w != 0) + return ua.FromHandle(new IntPtr((long)w)); + if (m["pid"] is JsonNode p) + { + try { var h = Process.GetProcessById((int)p).MainWindowHandle; if (h != IntPtr.Zero) return ua.FromHandle(h); } + catch { } + } + return ua.GetDesktop(); + } + } + + // ---- UIA change observation (replaces AXObserver) ------------------------------------------- + private static class Observe + { + private static readonly object _lock = new(); + private static readonly Dictionary> _regs = new(); // pid -> handler refs (keep-alive) + private static readonly Dictionary _lastEmit = new(); + private static object? _focusReg; + private static Action? _send; + + public static JsonObject Start(JsonObject m, UIA3Automation ua, Action send) + { + int pid = ResolvePid(m); + if (pid <= 0) return new() { ["error"] = "pid (or resolvable windowId) required" }; + + lock (_lock) + { + _send = send; + if (_regs.ContainsKey(pid)) return new() { ["ok"] = true }; // already observing — mirror the Mac dedup + + var root = RootForPid(ua, pid); + if (root is null) return new() { ["error"] = "no window for pid" }; + + var handlers = new List(); + try + { + // Focus changes are global in UIA — register once, filter by observed pid in the callback. + _focusReg ??= ua.RegisterFocusChangedEvent(OnFocus); + + // Subtree property (Value/Name) + structure changes = the Mac set (value/title/main-window changed). + handlers.Add(root.RegisterStructureChangedEvent(TreeScope.Subtree, (_, _, _) => Emit(pid))); + handlers.Add(root.RegisterPropertyChangedEvent(TreeScope.Subtree, (_, _, _) => Emit(pid), + ua.PropertyLibrary.Value.Value, ua.PropertyLibrary.Element.Name)); + } + catch (Exception ex) { return new() { ["error"] = $"register failed: {ex.Message}" }; } + + _regs[pid] = handlers; + } + return new() { ["ok"] = true }; + } + + private static void OnFocus(AutomationElement el) + { + try { int pid = el.Properties.ProcessId.ValueOrDefault; if (Observed(pid)) Emit(pid); } catch { } + } + + private static bool Observed(int pid) { lock (_lock) { return _regs.ContainsKey(pid); } } + + // UIA Subtree events fire in bursts; the consumer just wakes the agent, so cap to one wake / pid / 250ms. + private static void Emit(int pid) + { + long now = Environment.TickCount64; + lock (_lock) + { + if (_lastEmit.TryGetValue(pid, out long last) && now - last < 250) return; + _lastEmit[pid] = now; + } + _send?.Invoke(new JsonObject { ["type"] = "event", ["kind"] = "ax_changed", ["pid"] = pid }); + } + + // Mac never unregisters either — observers live until quit, so this just drops the keep-alive refs and + // lets process teardown release the COM handlers. (Per-pid unregister isn't needed for the MVP.) + public static void StopAll() + { + lock (_lock) { _regs.Clear(); _focusReg = null; _lastEmit.Clear(); } + } + + private static int ResolvePid(JsonObject m) + { + if (m["pid"] is JsonNode p) return (int)p; + if (m["windowId"] is JsonNode w && (long)w != 0) + { + Native.GetWindowThreadProcessId(new IntPtr((long)w), out uint pid); + return (int)pid; + } + return -1; + } + + private static AutomationElement? RootForPid(UIA3Automation ua, int pid) + { + try { var h = Process.GetProcessById(pid).MainWindowHandle; if (h != IntPtr.Zero) return ua.FromHandle(h); } + catch { } + return null; + } + } + + // ---- Synthetic input: SendInput (replaces CGEvent) ------------------------------------------- + private static class Input + { + public static JsonObject Click(JsonObject m) + { + var (x, y) = ResolvePoint(m); + Native.SetCursorPos(x, y); + bool right = (string?)m["button"] == "right"; + Send(Mouse(right ? Native.MOUSEEVENTF_RIGHTDOWN : Native.MOUSEEVENTF_LEFTDOWN)); + Send(Mouse(right ? Native.MOUSEEVENTF_RIGHTUP : Native.MOUSEEVENTF_LEFTUP)); + // Mac emits effect.clicked (main.swift cg_click); match the key for byte-parity. + return new() { ["ok"] = true, ["effect"] = new JsonObject { ["clicked"] = new JsonObject { ["x"] = x, ["y"] = y } } }; + } + + public static JsonObject Type(string text) + { + foreach (var ch in text) + { + Send(KeyUnicode(ch, false)); + Send(KeyUnicode(ch, true)); + } + return new() { ["ok"] = true, ["effect"] = new JsonObject { ["typed"] = text } }; + } + + // "ctrl+shift+c" / "cmd+c" / "enter". cmd|meta|super -> Ctrl (the cross-platform-shortcut + // convention); win -> the actual Windows key. Adjust if your agent emits literal Windows specs. + public static JsonObject Key(string spec) + { + var parts = spec.Split('+', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); + if (parts.Length == 0) return new() { ["error"] = "empty key" }; + var mods = new List(); + ushort? key = null; + foreach (var raw in parts) + { + var t = raw.ToLowerInvariant(); + if (Mod(t) is ushort mv) mods.Add(mv); + else if (Vk(t) is ushort kv) key = kv; + else return new() { ["error"] = $"unknown key token: {raw}" }; + } + if (key is null) return new() { ["error"] = "no non-modifier key" }; + foreach (var mv in mods) Send(KeyVk(mv, false)); + Send(KeyVk(key.Value, false)); + Send(KeyVk(key.Value, true)); + for (int i = mods.Count - 1; i >= 0; i--) Send(KeyVk(mods[i], true)); + return new() { ["ok"] = true, ["effect"] = new JsonObject { ["key"] = spec } }; + } + + private static (int x, int y) ResolvePoint(JsonObject m) + { + // Absolute screen coords (x,y), or window-relative fractions (windowId + px,py 0..1). + if (m["windowId"] is JsonNode w && (long)w != 0 && m["px"] is not null && m["py"] is not null) + { + if (Native.GetWindowRect(new IntPtr((long)w), out var r)) + { + double px = (double)m["px"]!, py = (double)m["py"]!; + return (r.Left + (int)((r.Right - r.Left) * px), r.Top + (int)((r.Bottom - r.Top) * py)); + } + } + return ((int?)m["x"] ?? 0, (int?)m["y"] ?? 0); + } + + private static ushort? Mod(string t) => t switch + { + "ctrl" or "control" => 0x11, + "shift" => 0x10, + "alt" or "option" => 0x12, + "cmd" or "meta" or "super" => 0x11, // -> Ctrl + "win" => 0x5B, + _ => null, + }; + + private static ushort? Vk(string t) + { + if (t.Length == 1 && char.IsLetterOrDigit(t[0])) return char.ToUpperInvariant(t[0]); + return t switch + { + "enter" or "return" => 0x0D, + "tab" => 0x09, + "esc" or "escape" => 0x1B, + "space" => 0x20, + "backspace" => 0x08, + "delete" => 0x2E, + "up" => 0x26, "down" => 0x28, "left" => 0x25, "right" => 0x27, + "home" => 0x24, "end" => 0x23, "pageup" => 0x21, "pagedown" => 0x22, + _ => null, + }; + } + + private static Native.INPUT Mouse(uint flags) => new() + { + type = Native.INPUT_MOUSE, + U = new Native.InputUnion { mi = new Native.MOUSEINPUT { dwFlags = flags } } + }; + + private static Native.INPUT KeyUnicode(char ch, bool up) => new() + { + type = Native.INPUT_KEYBOARD, + U = new Native.InputUnion + { + ki = new Native.KEYBDINPUT + { + wVk = 0, + wScan = ch, + dwFlags = Native.KEYEVENTF_UNICODE | (up ? Native.KEYEVENTF_KEYUP : 0) + } + } + }; + + private static Native.INPUT KeyVk(ushort vk, bool up) => new() + { + type = Native.INPUT_KEYBOARD, + U = new Native.InputUnion { ki = new Native.KEYBDINPUT { wVk = vk, dwFlags = up ? Native.KEYEVENTF_KEYUP : 0 } } + }; + + private static void Send(Native.INPUT input) + { + var arr = new[] { input }; + Native.SendInput(1, arr, Marshal.SizeOf()); + } + } + + // ---- Windows: enumerate + activate (replaces CGWindowList) ----------------------------------- + private static class Win + { + public static JsonArray ListWindows() + { + var list = new JsonArray(); + Native.EnumWindows((h, _) => + { + if (!Native.IsWindowVisible(h)) return true; + int len = Native.GetWindowTextLength(h); + if (len == 0) return true; + var sb = new StringBuilder(len + 1); + Native.GetWindowText(h, sb, sb.Capacity); + if (!Native.GetWindowRect(h, out var r)) return true; + if (r.Right - r.Left <= 0 || r.Bottom - r.Top <= 0) return true; + + Native.GetWindowThreadProcessId(h, out uint pid); + string app = ""; + try { app = Process.GetProcessById((int)pid).ProcessName; } catch { } + + list.Add(new JsonObject + { + ["windowId"] = h.ToInt64(), + ["pid"] = (int)pid, + ["app"] = app, + ["title"] = sb.ToString(), + ["bounds"] = new JsonObject { ["x"] = r.Left, ["y"] = r.Top, ["w"] = r.Right - r.Left, ["h"] = r.Bottom - r.Top }, + }); + return true; + }, IntPtr.Zero); + return list; + } + + public static JsonObject Activate(JsonObject m) + { + IntPtr h = IntPtr.Zero; + if (m["windowId"] is JsonNode w && (long)w != 0) h = new IntPtr((long)w); + else if (m["pid"] is JsonNode p) { try { h = Process.GetProcessById((int)p).MainWindowHandle; } catch { } } + if (h == IntPtr.Zero) return new() { ["error"] = "no window" }; + bool ok = Native.ForceForeground(h); + return new() { ["ok"] = ok }; + } + } + + // ---- Capture (skeleton). Ship: Windows.Graphics.Capture for occluded/GPU content ------------- + private static class Cap + { + public static JsonObject FullScreen() + { + var b = Native.VirtualScreenBounds(); + using var bmp = new Bitmap(b.Width, b.Height, PixelFormat.Format32bppArgb); + using (var g = Graphics.FromImage(bmp)) g.CopyFromScreen(b.X, b.Y, 0, 0, b.Size); + return new() { ["ok"] = true, ["png"] = ToB64(bmp) }; + } + + public static JsonObject Window(JsonObject m) + { + if (m["windowId"] is not JsonNode w || (long)w == 0) return new() { ["error"] = "windowId required" }; + IntPtr h = new((long)w); + if (!Native.GetWindowRect(h, out var r)) return new() { ["error"] = "GetWindowRect failed" }; + using var bmp = new Bitmap(r.Right - r.Left, r.Bottom - r.Top, PixelFormat.Format32bppArgb); + using (var g = Graphics.FromImage(bmp)) + { + IntPtr hdc = g.GetHdc(); + // PW_RENDERFULLCONTENT (2) grabs many GPU/DWM surfaces PrintWindow used to miss. Still + // returns black for SetWindowDisplayAffinity(WDA_EXCLUDEFROMCAPTURE) windows — by design. + Native.PrintWindow(h, hdc, 2); + g.ReleaseHdc(hdc); + } + return new() { ["ok"] = true, ["png"] = ToB64(bmp) }; + } + + private static string ToB64(Bitmap bmp) + { + using var ms = new MemoryStream(); + bmp.Save(ms, ImageFormat.Png); + return Convert.ToBase64String(ms.ToArray()); + } + } + + // ---- P/Invoke --------------------------------------------------------------------------------- + private static class Native + { + public const uint INPUT_MOUSE = 0, INPUT_KEYBOARD = 1; + public const uint MOUSEEVENTF_LEFTDOWN = 0x0002, MOUSEEVENTF_LEFTUP = 0x0004; + public const uint MOUSEEVENTF_RIGHTDOWN = 0x0008, MOUSEEVENTF_RIGHTUP = 0x0010; + public const uint KEYEVENTF_KEYUP = 0x0002, KEYEVENTF_UNICODE = 0x0004; + + [StructLayout(LayoutKind.Sequential)] public struct RECT { public int Left, Top, Right, Bottom; } + [StructLayout(LayoutKind.Sequential)] public struct MOUSEINPUT { public int dx, dy; public uint mouseData, dwFlags, time; public IntPtr dwExtraInfo; } + [StructLayout(LayoutKind.Sequential)] public struct KEYBDINPUT { public ushort wVk, wScan; public uint dwFlags, time; public IntPtr dwExtraInfo; } + [StructLayout(LayoutKind.Explicit)] public struct InputUnion { [FieldOffset(0)] public MOUSEINPUT mi; [FieldOffset(0)] public KEYBDINPUT ki; } + [StructLayout(LayoutKind.Sequential)] public struct INPUT { public uint type; public InputUnion U; } + + public delegate bool EnumWindowsProc(IntPtr hWnd, IntPtr lParam); + + [DllImport("user32.dll")] public static extern uint SendInput(uint n, INPUT[] inputs, int cb); + [DllImport("user32.dll")] public static extern bool SetCursorPos(int x, int y); + [DllImport("user32.dll")] public static extern bool EnumWindows(EnumWindowsProc cb, IntPtr lParam); + [DllImport("user32.dll")] public static extern bool IsWindowVisible(IntPtr h); + [DllImport("user32.dll")] public static extern int GetWindowTextLength(IntPtr h); + [DllImport("user32.dll", CharSet = CharSet.Unicode)] public static extern int GetWindowText(IntPtr h, StringBuilder s, int max); + [DllImport("user32.dll")] public static extern bool GetWindowRect(IntPtr h, out RECT r); + [DllImport("user32.dll")] public static extern uint GetWindowThreadProcessId(IntPtr h, out uint pid); + [DllImport("user32.dll")] public static extern bool SetForegroundWindow(IntPtr h); + [DllImport("user32.dll")] public static extern IntPtr GetForegroundWindow(); + [DllImport("user32.dll")] public static extern bool BringWindowToTop(IntPtr h); + [DllImport("user32.dll")] public static extern bool ShowWindow(IntPtr h, int cmd); + [DllImport("user32.dll")] public static extern bool IsIconic(IntPtr h); + [DllImport("user32.dll")] public static extern bool AttachThreadInput(uint idAttach, uint idAttachTo, bool attach); + [DllImport("kernel32.dll")] public static extern uint GetCurrentThreadId(); + + public const int SW_RESTORE = 9; + + // SetForegroundWindow alone obeys Windows' focus-stealing rules and silently no-ops when the + // caller isn't already the foreground process. Briefly attaching our input queue to the current + // foreground thread's queue lifts that restriction for the duration of the call — the standard + // recipe. (Some locked-down setups still need SystemParametersInfo(SPI_SETFOREGROUNDLOCKTIMEOUT,0).) + public static bool ForceForeground(IntPtr h) + { + if (IsIconic(h)) ShowWindow(h, SW_RESTORE); + uint fgThread = GetWindowThreadProcessId(GetForegroundWindow(), out _); + uint thisThread = GetCurrentThreadId(); + bool attached = fgThread != thisThread && AttachThreadInput(fgThread, thisThread, true); + BringWindowToTop(h); + bool ok = SetForegroundWindow(h); + if (attached) AttachThreadInput(fgThread, thisThread, false); + return ok; + } + [DllImport("user32.dll")] public static extern bool PrintWindow(IntPtr h, IntPtr hdc, uint flags); + [DllImport("user32.dll")] public static extern int GetSystemMetrics(int i); + + public static Rectangle VirtualScreenBounds() => + new(GetSystemMetrics(76), GetSystemMetrics(77), GetSystemMetrics(78), GetSystemMetrics(79)); // SM_*VIRTUALSCREEN + } +} diff --git a/native/computer-use-helper/win/app.manifest b/native/computer-use-helper/win/app.manifest new file mode 100644 index 00000000..969551a3 --- /dev/null +++ b/native/computer-use-helper/win/app.manifest @@ -0,0 +1,47 @@ + + + + + + + + + PerMonitorV2, PerMonitor + true/PM + true + + + + + + + + + + + + + + + + + + + + + diff --git a/native/computer-use-helper/win/blitz-cu-helper.csproj b/native/computer-use-helper/win/blitz-cu-helper.csproj new file mode 100644 index 00000000..36842a7a --- /dev/null +++ b/native/computer-use-helper/win/blitz-cu-helper.csproj @@ -0,0 +1,29 @@ + + + + Exe + net8.0-windows + enable + enable + blitz-cu-helper + + true + + true + + app.manifest + + + + + + + + + + diff --git a/package.json b/package.json index c2166c73..38502830 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "onboard:fda": "node scripts/onboarding-scan.mjs --prompt src/main/blitzos-onboarding.md --out -", "onboard:scan": "node scripts/onboarding-scan.mjs --out -", "dist": "bash scripts/dist-mac.sh", + "dist:win": "powershell -ExecutionPolicy Bypass -File scripts/dist-win.ps1", "build:app": "bash scripts/build-local-app.sh", "test": "node scripts/run-tests.mjs" }, diff --git a/scripts/dist-win.ps1 b/scripts/dist-win.ps1 new file mode 100644 index 00000000..e91a4aa6 --- /dev/null +++ b/scripts/dist-win.ps1 @@ -0,0 +1,34 @@ +# Local Windows prod build, the analog of scripts/dist-mac.sh. Output: release\BlitzOS Setup .exe +# +# Order matters: build the native CU helper FIRST so electron-builder's win.extraResources finds the exe, +# then electron-vite build, then electron-builder --win. +# +# Signing: UNSIGNED by default (electron-builder builds an unsigned installer, which runs fine because the +# helper needs no TCC). For a SIGNED build set the standard electron-builder env vars before running: +# $env:CSC_LINK = 'path\to\cert.pfx'; $env:CSC_KEY_PASSWORD = '...' +# Signing is a prerequisite for flipping app.manifest uiAccess="true" + a perMachine (Program Files) +# install, which is what lets the helper drive ELEVATED windows. Until then it stays asInvoker. +$ErrorActionPreference = 'Stop' +$root = Split-Path -Parent $PSScriptRoot # repo root +Set-Location $root + +Write-Host "[dist-win] building native CU helper" +& (Join-Path $root 'native\computer-use-helper\build-win.ps1') +if ($LASTEXITCODE -ne 0) { throw "[dist-win] helper build failed ($LASTEXITCODE)" } + +Write-Host "[dist-win] electron-vite build" +npm run build +if ($LASTEXITCODE -ne 0) { throw "[dist-win] electron-vite build failed ($LASTEXITCODE)" } + +if (-not $env:CSC_LINK) { + # No cert provided: build cleanly unsigned instead of letting electron-builder hunt the cert store. + $env:CSC_IDENTITY_AUTO_DISCOVERY = 'false' + Write-Host "[dist-win] UNSIGNED build (set CSC_LINK + CSC_KEY_PASSWORD for a signed installer)" +} + +Write-Host "[dist-win] electron-builder --win" +npx electron-builder --win --x64 --publish never +if ($LASTEXITCODE -ne 0) { throw "[dist-win] electron-builder failed ($LASTEXITCODE)" } + +Get-ChildItem (Join-Path $root 'release') -ErrorAction SilentlyContinue | + Where-Object { -not $_.PSIsContainer } | Select-Object Name, @{n='MB';e={[math]::Round($_.Length/1MB,1)}} From d15c79fea77483a1975d2df074978375d24ef783 Mon Sep 17 00:00:00 2001 From: sqllocks <29076762+sqllocks@users.noreply.github.com> Date: Mon, 29 Jun 2026 19:37:29 -0400 Subject: [PATCH 3/5] Windows pack: declare extraResources per-platform Top-level extraResources (arm64 tmux, the two macOS helper .app bundles, notch-geometry, activity-logging) were inherited by every target, so a Windows pack would skip the missing macOS build outputs with warnings and wrongly ship the arm64 tmux. Move all macOS runtime resources under mac.extraResources; keep only the win exe + activity-logging under win.extraResources. No top-level extraResources remain, so the win target inherits none of mac's. Co-Authored-By: Claude Opus 4.8 (1M context) --- electron-builder.yml | 56 +++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 34 deletions(-) diff --git a/electron-builder.yml b/electron-builder.yml index 20a23a5f..fe4c0304 100644 --- a/electron-builder.yml +++ b/electron-builder.yml @@ -21,40 +21,9 @@ files: # UNPACKED below; onboarding.ts swaps app.asar -> app.asar.unpacked when packaged. - scripts/onboarding-scan.mjs - src/main/blitzos-onboarding.md -extraResources: - # Portable tmux (arm64, static libevent + system curses, Developer-ID signed): the agent-terminal - # runtime ships WITH the app — the user never installs dependencies (docs/prod-ci.md). - - from: vendor/bin/tmux - to: bin/tmux - # The Computer Use helper (separate Developer-ID-signed .app that holds Accessibility + Screen - # Recording TCC, so BlitzOS never quits/reopens for them — plans/blitzos-computer-use-helper.md). - # Built + signed by scripts/dist-mac.sh before packaging; the lifecycle manager (computer-use- - # helper.ts) installs it to ~/Library/Application Support/BlitzOS and launches it via LaunchServices. - - from: "native/computer-use-helper/build/BlitzOS Automation.app" - to: "BlitzOS Automation.app" - # The native dynamic-island HUD (faceless ⌥Space notch helper — plans/blitzos-dynamic-island.md). - # Built + signed by scripts/dist-mac.sh before packaging (same as the CU helper above); index.ts - # resolves it at process.resourcesPath/BlitzIsland.app and launchIslandHelper (island-bridge.mjs) - # `open`s it. Without this copy the packaged candidate never exists and the HUD never starts in prod. - - from: native/island-helper/build/BlitzIsland.app - to: BlitzIsland.app - # The runtime Dock-icon source for dev/notch-mode branding (index.ts app.dock.setIcon) — an INSET copy (the - # standard macOS icon grid, ~86% / 7% margin) so app.dock.setIcon renders it at the same Dock tile size as - # the other mac apps. Shipped UNPACKED so the packaged build reads it at process.resourcesPath/blitz-dock-icon.png. - - from: src/renderer/src/assets/blitz-dock-icon.png - to: blitz-dock-icon.png - # The notch-geometry CLI: reads the EXACT physical notch from NSScreen so the always-interactive notch hit-window - # sits over the real cutout (src/main/notch-overlay.ts readNotchGeometry resolves it at process.resourcesPath/ - # notch-geometry). No notch => hasNotch:false and BlitzOS draws no band. Plain binary, no TCC/entitlement needed. - - from: native/notch-geometry/notch-geometry - to: notch-geometry - # Product activity-logging config ({url, WRITE-ONLY ingest key}). CI (release.yml) writes build-config/activity- - # logging.json from repo secrets ONLY when ACTIVITY_INGEST_KEY is set; in local/fork builds the file is absent and - # this resource is skipped → analytics OFF (activity-logging.mjs reads it at process.resourcesPath/activity- - # logging.json, after the local ~/.blitzos override). The bundled key is extractable from the public app, so it is - # an ingest-only credential you can rotate, never an account/app token. Never committed (.gitignore: build-config/). - - from: build-config/activity-logging.json - to: activity-logging.json +# extraResources are declared PER-PLATFORM (under mac: / win: below), NOT at top level, so a Windows pack +# never inherits the macOS build outputs (the .app bundles + notch-geometry, which only dist-mac.sh +# produces) or the arm64 tmux. Cross-platform resources (activity-logging) are listed under each platform. asarUnpack: - scripts/onboarding-scan.mjs - src/main/blitzos-onboarding.md @@ -75,6 +44,22 @@ mac: extendInfo: # the scan reads user data (Branch B/A) and the app drives local automation NSAppleEventsUsageDescription: BlitzOS automates apps on your behalf. + # macOS runtime resources (built by scripts/dist-mac.sh): portable arm64 tmux, the two signed helper + # .app bundles, the notch-geometry CLI, the dock-icon source, and the optional activity-logging config + # (skipped when absent in local/fork builds). Declared HERE so a Windows pack never tries to copy them. + extraResources: + - from: vendor/bin/tmux + to: bin/tmux + - from: "native/computer-use-helper/build/BlitzOS Automation.app" + to: "BlitzOS Automation.app" + - from: native/island-helper/build/BlitzIsland.app + to: BlitzIsland.app + - from: native/notch-geometry/notch-geometry + to: notch-geometry + - from: src/renderer/src/assets/blitz-dock-icon.png + to: blitz-dock-icon.png + - from: build-config/activity-logging.json + to: activity-logging.json win: # The Windows Computer Use helper ships as ONE self-contained .NET single-file exe (the analog of the # Mac "BlitzOS Automation.app"), built by native/computer-use-helper/build-win.ps1 to @@ -89,6 +74,9 @@ win: extraResources: - from: native/computer-use-helper/build/blitz-cu-helper.exe to: blitz-cu-helper.exe + # cross-platform analytics config; skipped when absent (local/fork builds) + - from: build-config/activity-logging.json + to: activity-logging.json nsis: # perMachine installs into Program Files, the trusted path required before app.manifest uiAccess="true" # (+ Authenticode signing) can let the helper drive ELEVATED windows. oneClick:false shows the standard From 0055b3a5dd2981cfcec1d06719346b27a2d5b459 Mon Sep 17 00:00:00 2001 From: sqllocks <29076762+sqllocks@users.noreply.github.com> Date: Mon, 29 Jun 2026 19:46:43 -0400 Subject: [PATCH 4/5] dist-win.ps1: note the synced-folder EPERM workaround electron-builder renames win-unpacked.tmp -> win-unpacked during Electron extraction; on a OneDrive/Dropbox-synced checkout the sync client locks the fresh files and the rename fails with EPERM. Document pointing directories.output outside the synced tree (verified: the full nsis pack succeeds that way). Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/dist-win.ps1 | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/dist-win.ps1 b/scripts/dist-win.ps1 index e91a4aa6..501a3453 100644 --- a/scripts/dist-win.ps1 +++ b/scripts/dist-win.ps1 @@ -8,6 +8,12 @@ # $env:CSC_LINK = 'path\to\cert.pfx'; $env:CSC_KEY_PASSWORD = '...' # Signing is a prerequisite for flipping app.manifest uiAccess="true" + a perMachine (Program Files) # install, which is what lets the helper drive ELEVATED windows. Until then it stays asInvoker. +# +# SYNCED-FOLDER GOTCHA: electron-builder extracts Electron to /win-unpacked.tmp then RENAMES it to +# win-unpacked. On a OneDrive / Dropbox / Google-Drive-synced checkout the sync client locks the freshly +# written files and the rename fails with "EPERM: operation not permitted, rename ... win-unpacked". Fix: +# point directories.output (electron-builder.yml) at a path OUTSIDE the synced tree (e.g. under %LOCALAPPDATA%), +# or pause sync for the repo before building. CI / non-synced checkouts are unaffected and use `release`. $ErrorActionPreference = 'Stop' $root = Split-Path -Parent $PSScriptRoot # repo root Set-Location $root From 07075ed261662c37e4ef59ef181cf722e18387c7 Mon Sep 17 00:00:00 2001 From: sqllocks <29076762+sqllocks@users.noreply.github.com> Date: Mon, 29 Jun 2026 22:21:24 -0400 Subject: [PATCH 5/5] Windows: pin the island OPEN so the UI is reachable without a notch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The whole UI is the macOS notch overlay: it opens via notch hover or the ⌥Space chord, both unavailable on Windows (no notch; Alt+Space is the Win32 window menu). Main's notchGated is false off-darwin, so the os:notch-geometry push that sets notchOn + carries `synthetic` never fires, and NotchHost (gated on notchOn) never mounts. Result: a blank window with no entry point. Fix: on first mount on a non-macOS platform, pin the island OPEN exactly like the VM/synthetic path (notchOn=true, hasNotch=false, pinned panel, interactive). The chat + "+" attach panel (computer-use / window picker) are then always visible in the normal window. macOS is untouched (gated on platform). Verified via CDP on a real Windows run: NotchHost mounts, the Blitz chat + attach button render. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/renderer/src/App.tsx | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/renderer/src/App.tsx b/src/renderer/src/App.tsx index 9da3f41b..413bccd9 100644 --- a/src/renderer/src/App.tsx +++ b/src/renderer/src/App.tsx @@ -297,6 +297,23 @@ export default function App(): JSX.Element { }), [] ) + // Non-macOS has NO notch overlay: main's notchGated is false, so the os:notch-geometry push that carries + // `synthetic` never fires and the island has no hover/Alt+Space entry point (Alt+Space is the Win32 window + // menu). Pin it OPEN on first mount so the chat + attach UI is always visible in the normal window. Same + // effect as the VM/synthetic geometry path above; gated on platform so macOS is untouched. + useEffect(() => { + if (syntheticRef.current) return + const isMac = /Mac/i.test((navigator.platform || '') + ' ' + (navigator.userAgent || '')) + if (!isMac) { + syntheticRef.current = true + setNotchOn(true) // NotchHost portal is gated on notchOn (normally set by the geometry push, which is macOS-only) + setHasNotch(false) + setNotchPinnedBoth(true) + applyNotchState('panel') + setNotchInteractive(true) + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []) // Collapse the island (panel → closed). Shared by Esc and the main-driven os:notch-close. const closeIsland = (): void => { if (syntheticRef.current) return // VM/synthetic: Esc / os:notch-close never hide it — ⌥Space (toggleIsland) is the SOLE show/hide toggle there