From 4f6d4cf6e56caab55d9ab26582db0701a537a906 Mon Sep 17 00:00:00 2001 From: moreih29 Date: Tue, 2 Jun 2026 10:09:59 +0900 Subject: [PATCH 01/11] fix(fs): raise read cap to 50MB and fix image preview clipping/errors Three related file/image viewing fixes: - Raise MaxReadableFileSize 5MB -> 50MB (and the renderer mirror MAX_READABLE_FILE_SIZE) so larger files and images open. Also raise the agent's inbound NDJSON scanner cap 4MiB -> 64MiB; the previous 4MiB cap was already below the 5MB limit, silently breaking writeFile for files 4-5MB. Reads are unaffected by the scanner (they travel outbound). - ImagePreview: replace flex center alignment (items-center/justify-center) with safe alignment (items-center-safe/justify-center-safe). When a zoomed image overflows the scroll container, centering pushed the top/left edges into negative scroll space that scrollLeft/scrollTop can never reach, leaving part of the image unviewable. Safe alignment falls back to start on overflow so the whole image is scrollable. - ImagePreview: on image load failure, stat the file and show a size-specific message ("too large, max 50 MB") instead of a generic "could not load image", matching the text editor's TOO_LARGE message. Co-Authored-By: Claude Opus 4.8 --- internal/fs/service.go | 6 ++- internal/stdioserver/host.go | 9 ++-- .../editor/preview/image-preview.tsx | 51 ++++++++++++++++--- src/shared/fs/defaults.ts | 2 +- src/shared/i18n/locales/en/common.json | 3 +- src/shared/i18n/locales/ko/common.json | 3 +- 6 files changed, 59 insertions(+), 15 deletions(-) diff --git a/internal/fs/service.go b/internal/fs/service.go index 1811dc00..51b638c2 100644 --- a/internal/fs/service.go +++ b/internal/fs/service.go @@ -20,8 +20,10 @@ import ( // MaxReadableFileSize caps how many bytes one fs.readFile / fs.writeFile call // may move. The threshold matches the renderer's editor capacity so we never -// produce a file that we couldn't reload. -const MaxReadableFileSize = 5 * 1024 * 1024 +// produce a file that we couldn't reload. Raising this requires raising the +// inbound scanner cap in stdioserver.Host.Run accordingly — writeFile content +// travels inbound as one NDJSON line and is rejected by that cap first. +const MaxReadableFileSize = 50 * 1024 * 1024 // EventSink is the callback fs uses to push agent events back to Electron. type EventSink func(event string, payload any) error diff --git a/internal/stdioserver/host.go b/internal/stdioserver/host.go index 4ad55ad3..6488a77e 100644 --- a/internal/stdioserver/host.go +++ b/internal/stdioserver/host.go @@ -120,9 +120,12 @@ func (h *Host) EmitEvent(event string, payload any) error { // the parent's shutdown signal is honored. func (h *Host) Run() { scanner := bufio.NewScanner(h.in) - // 4 MiB cap matches the largest request shape we expect (writeFile - // content up to MaxReadableFileSize plus envelope overhead). - scanner.Buffer(make([]byte, 0, 64*1024), 4*1024*1024) + // Cap matches the largest request shape we expect: a writeFile whose + // content is up to MaxReadableFileSize (50 MiB), JSON-escaped, plus + // envelope overhead. 64 MiB leaves headroom for escaping without + // allocating eagerly — Scanner grows its buffer lazily from 64 KiB up to + // this ceiling. + scanner.Buffer(make([]byte, 0, 64*1024), 64*1024*1024) for scanner.Scan() { // Copy the slice — scanner reuses its internal buffer between diff --git a/src/renderer/components/editor/preview/image-preview.tsx b/src/renderer/components/editor/preview/image-preview.tsx index 94b7c171..bed39750 100644 --- a/src/renderer/components/editor/preview/image-preview.tsx +++ b/src/renderer/components/editor/preview/image-preview.tsx @@ -38,6 +38,8 @@ import { useEffect, useLayoutEffect, useRef, useState } from "react"; import { useTranslation } from "react-i18next"; +import { MAX_READABLE_FILE_SIZE } from "../../../../shared/fs/defaults"; +import { ipcCallResult } from "../../../ipc/client"; import { buildWorkspaceUrl } from "../../../services/editor/preview/workspace-url"; import { useWorkspacesStore } from "../../../state/stores/workspaces"; import { relPath } from "../../../utils/path"; @@ -89,12 +91,24 @@ export function ImagePreview({ workspaceId, filePath, onNaturalSize }: ImagePrev } const url = buildWorkspaceUrl(workspaceId, rel); - return ; + return ( + + ); } interface ImageCanvasProps { url: string; alt: string; + /** Workspace owning the file — used to stat the file on a load error. */ + workspaceId: string; + /** Workspace-relative path — used to stat the file on a load error. */ + relPath: string; onNaturalSize?: (size: { w: number; h: number }) => void; } @@ -129,14 +143,16 @@ interface ZoomAnchor { * - File deleted/moved on disk after the tab opened (404 from protocol). * - Format the OS/Chromium can't decode (rare for the supported set). */ -function ImageCanvas({ url, alt, onNaturalSize }: ImageCanvasProps) { +function ImageCanvas({ url, alt, workspaceId, relPath, onNaturalSize }: ImageCanvasProps) { const { t } = useTranslation(); const containerRef = useRef(null); const imgRef = useRef(null); const [naturalSize, setNaturalSize] = useState<{ w: number; h: number } | null>(null); const [fitScale, setFitScale] = useState(1); const [userScale, setUserScale] = useState(1); - const [errored, setErrored] = useState(false); + // null = loaded OK. "generic" = decode/missing failure. "too_large" = the + // file exceeds the read cap (resolved via a stat round-trip in onError). + const [errorKind, setErrorKind] = useState<"generic" | "too_large" | null>(null); // Anchor for the in-flight zoom: applied in useLayoutEffect after the new // dimensions commit, then cleared. Held in a ref so the wheel callback @@ -226,8 +242,18 @@ function ImageCanvas({ url, alt, onNaturalSize }: ImageCanvasProps) { return () => el.removeEventListener("wheel", onWheel); }, []); - if (errored) { - return ; + if (errorKind) { + return ( + + ); } // Until the image loads we don't know its natural size yet, so render @@ -244,7 +270,7 @@ function ImageCanvas({ url, alt, onNaturalSize }: ImageCanvasProps) { ref={containerRef} className="app-scrollbar flex-1 min-h-0 overflow-auto bg-[var(--surface-backdrop-bg)]" > -
+
setErrored(true)} + onError={() => { + // A load failure on an over-cap file is a size rejection — the + // workspace protocol 404s reads above MAX_READABLE_FILE_SIZE — not + // a missing or undecodable image. stat to disambiguate so the user + // sees the real reason instead of a generic "could not load". + setErrorKind("generic"); + void ipcCallResult("fs", "stat", { workspaceId, relPath }).then((result) => { + if (result.ok && result.value.size > MAX_READABLE_FILE_SIZE) { + setErrorKind("too_large"); + } + }); + }} // maxWidth/maxHeight: 'none' override Tailwind Preflight's global // `img { max-width: 100%; height: auto; }`. Without this, our // explicit width is silently capped at the inner wrapper's width diff --git a/src/shared/fs/defaults.ts b/src/shared/fs/defaults.ts index 8e2aab54..fea3d616 100644 --- a/src/shared/fs/defaults.ts +++ b/src/shared/fs/defaults.ts @@ -1,4 +1,4 @@ -export const MAX_READABLE_FILE_SIZE = 5 * 1024 * 1024; +export const MAX_READABLE_FILE_SIZE = 50 * 1024 * 1024; export const BINARY_DETECTION_BYTES = 512; /** Maximum file size considered for text search — mirrors the read limit so search never reads more than the editor would. */ diff --git a/src/shared/i18n/locales/en/common.json b/src/shared/i18n/locales/en/common.json index 6c062be3..fda2e882 100644 --- a/src/shared/i18n/locales/en/common.json +++ b/src/shared/i18n/locales/en/common.json @@ -311,7 +311,8 @@ "imagePreview": { "workspace_not_found": "Workspace not found.", "outside_workspace": "Image is outside the workspace and cannot be previewed.", - "load_failed": "Could not load image." + "load_failed": "Could not load image.", + "too_large": "Image is too large to preview (max {{maxMb}} MB)." }, "markdown": { "copy_code": "Copy code", diff --git a/src/shared/i18n/locales/ko/common.json b/src/shared/i18n/locales/ko/common.json index 91e4e6b7..a4e7fb81 100644 --- a/src/shared/i18n/locales/ko/common.json +++ b/src/shared/i18n/locales/ko/common.json @@ -311,7 +311,8 @@ "imagePreview": { "workspace_not_found": "워크스페이스를 찾을 수 없습니다.", "outside_workspace": "이미지가 워크스페이스 외부에 있어 미리볼 수 없습니다.", - "load_failed": "이미지를 불러올 수 없습니다." + "load_failed": "이미지를 불러올 수 없습니다.", + "too_large": "이미지가 너무 커서 미리볼 수 없습니다 (최대 {{maxMb}}MB)." }, "markdown": { "copy_code": "코드 복사", From b35168be1fb3dee64bc4fd7332a34e5be869ca36 Mon Sep 17 00:00:00 2001 From: moreih29 Date: Tue, 2 Jun 2026 10:50:29 +0900 Subject: [PATCH 02/11] fix(terminal): update tab title for inline TUIs like Claude Code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude Code sets its window title via OSC 2 while staying in the NORMAL screen buffer (it never enters the alternate screen). The onTitleChange handler's alt-screen guard (`buffer.active.type !== "alternate" → return`) dropped those titles, and since claude emits no alt-screen enter, the foregroundProcess fallback never fired either — so the tab name never updated. lazygit/lazydocker/yazi were unaffected (they use the alt screen). This was transport-independent; SSH was a red herring. Replace the binary alt-screen guard with classifyOscTitle(): - alternate + non-shell-like -> apply (unchanged) - alternate + shell-like -> ignore (alt-enter RPC labels these) - normal + shell-like -> clear (prompt; also resets the tab when an inline TUI exits) - normal + non-shell-like -> confirm via foregroundProcess, applying only when a real (non-login-shell) program holds the foreground, so starship-style preexec command echoes can't hijack the tab. Decision logic extracted to pure functions (isLoginShell, classifyOscTitle, foregroundConfirmsTitle) with unit tests; end-to-end behavior over a live PTY still needs manual smoke (claude/lazygit/yazi/plain shell). Co-Authored-By: Claude Opus 4.8 --- src/renderer/services/terminal/controller.ts | 122 +++++++++++--- .../services/terminal-services.test.ts | 155 +++++++++++++++++- 2 files changed, 246 insertions(+), 31 deletions(-) diff --git a/src/renderer/services/terminal/controller.ts b/src/renderer/services/terminal/controller.ts index 4dd9ad36..b0562a29 100644 --- a/src/renderer/services/terminal/controller.ts +++ b/src/renderer/services/terminal/controller.ts @@ -118,6 +118,65 @@ export function isShellPromptLikeTitle(title: string): boolean { return false; } +/** + * Login-shell program names. When one of these is the PTY's foreground process, + * an OSC title arriving in the normal screen buffer is shell-prompt or preexec + * command-echo noise rather than a real inline TUI's title. + * + * `ps -o comm=` may prefix a login shell with "-" (e.g. "-zsh"); we strip it. + */ +const LOGIN_SHELL_NAMES: ReadonlySet = new Set([ + "sh", + "bash", + "zsh", + "fish", + "dash", + "ksh", + "tcsh", + "csh", +]); + +/** True when `comm` (a `ps -o comm=` basename) names a login shell. */ +export function isLoginShell(comm: string): boolean { + return LOGIN_SHELL_NAMES.has(comm.trim().replace(/^-/, "")); +} + +export type OscTitleAction = "ignore" | "apply" | "clear" | "confirm"; + +/** + * First-pass classification of an OSC window-title change, by title shape and + * screen buffer. See the onTitleChange wiring for how each action is handled. + * + * - alternate screen (lazygit/yazi/vim/less): a non-shell-like title applies + * directly; a shell-like variant is ignored (the alt-enter foregroundProcess + * path labels those tabs instead). + * - normal screen: a shell-like title (prompt/path) clears back to the default, + * which also resets the tab when an inline TUI like Claude Code exits. A + * non-shell-like normal-screen title is deferred to a foregroundProcess + * check ("confirm") because preexec hooks can echo commands here. + */ +export function classifyOscTitle( + title: string, + bufferType: "normal" | "alternate", +): OscTitleAction { + if (bufferType === "alternate") { + return isShellPromptLikeTitle(title) ? "ignore" : "apply"; + } + if (isShellPromptLikeTitle(title)) return "clear"; + return "confirm"; +} + +/** + * Whether a deferred ("confirm") normal-screen title should apply, given the + * PTY's current foreground process name. Applies only when a real, non-shell + * program holds the foreground — so command-echo noise (fired while the shell + * is still foreground) is rejected. + */ +export function foregroundConfirmsTitle(foregroundName: string): boolean { + const name = foregroundName.trim(); + return name !== "" && !isLoginShell(name); +} + /** * Backslash-escape a filesystem path for injection into a shell / TUI prompt, * matching the drag-and-drop behavior of iTerm2 and Terminal.app. @@ -381,32 +440,49 @@ class XtermTerminalController implements TerminalController { this.installFileDrop(term); // OSC 0/1/2(window title) — xterm.js 내부 파서가 시퀀스를 수신해 onTitleChange로 - // 노출한다. claude / lazygit / lazydocker 같은 TUI가 자기 이름/상태를 보내면 - // 탭의 processTitle을 그것으로 갱신해 표시 title이 바뀐다. - // 사용자가 직접 rename으로 customTitle을 설정해두면 표시는 그대로 유지된다. + // 노출한다. lazygit/yazi/vim 같은 alternate-screen TUI, 그리고 Claude Code처럼 + // alternate에 들어가지 않고 일반 화면에서 OSC로 제목을 쏘는 프로그램의 이름/상태를 + // 탭의 processTitle로 반영한다. customTitle이 설정돼 있으면 표시 title은 유지된다. // - // 빈 문자열은 store가 processTitle clear로 해석 → 자동 복귀. - // - // 두 단계 필터: - // 1) Alternate screen buffer 가드 — 가장 강한 신호. - // TUI(claude/lazygit/lazydocker/vim/less/htop)는 시작 시 alternate - // screen으로 진입(\\x1b[?1049h)한 뒤 자기 이름을 OSC 2로 발사한다. - // 반면 ls/grep/cat 같은 단발 명령은 normal screen에서 출력되며, 일부 - // zsh 플러그인(starship 등)이 preexec hook에서 OSC 2로 "ls -G" 같은 - // 현재 명령을 발사해 탭 이름을 흔드는 케이스가 있다. buffer.active.type - // 이 "alternate"일 때만 받아 두 신호를 깔끔히 분리한다. - // 2) Shell prompt 패턴 필터 — 백업. - // alternate screen인데도 `user@host:cwd` 형태를 보내는 변종 prompt - // 대응. `/`, `~`, `@`+`:` 동시 포함이면 거부. - // - // 거부된 OSC는 silently drop — store 갱신 안 함. - // 빈 문자열도 alternate 가드에 막혀 그대로 통과시 store가 clear로 해석하지만, - // alternate 종료 시점에 빈 OSC는 normal screen에서 발사되므로 자연스럽게 무시된다. + // 노이즈(셸 프롬프트의 `user@host:cwd`, starship 등 preexec hook의 명령 에코)와 + // 실제 프로그램 제목을 구분해야 한다. classifyOscTitle이 (title, buffer type)으로 + // 1차 분류한다: + // - "ignore" : 버림 + // - "apply" : processTitle = title + // - "clear" : processTitle = null → defaultTitle/customTitle 복귀. + // normal screen의 셸 프롬프트가 여기 해당하며, inline TUI(claude)가 + // 종료돼 프롬프트가 돌아올 때 탭 이름을 자연스럽게 되돌리는 역할도 한다. + // - "confirm" : normal screen의 비셸틱 제목(claude 등). foreground process가 실제 + // 프로그램인지 RPC로 확인한 뒤에만 적용 → preexec 명령 에코가 탭을 + // 가로채지 못한다. this.titleDisposable = term.onTitleChange((title) => { if (this.disposed) return; - if (term.buffer.active.type !== "alternate") return; - if (isShellPromptLikeTitle(title)) return; - useTabsStore.getState().setProcessTitle(this.options.workspaceId, this.options.tabId, title); + const action = classifyOscTitle(title, term.buffer.active.type); + if (action === "ignore") return; + if (action === "clear") { + useTabsStore.getState().setProcessTitle(this.options.workspaceId, this.options.tabId, null); + return; + } + if (action === "apply") { + useTabsStore + .getState() + .setProcessTitle(this.options.workspaceId, this.options.tabId, title); + return; + } + // action === "confirm": normal-screen 비셸틱 제목. foreground 확인 후 적용. + void (async () => { + const result = await ipcCallResult("pty", "foregroundProcess", { + workspaceId: this.options.workspaceId, + tabId: this.options.tabId, + }); + if (this.disposed || !result.ok) return; + if (!foregroundConfirmsTitle(result.value.name)) return; + useTabsStore + .getState() + .setProcessTitle(this.options.workspaceId, this.options.tabId, title); + })().catch(() => { + // RPC 실패는 silent — 제목 미적용(기존 title 유지). + }); }); // alt → ENTER 전이 시 PTY의 foreground process 이름을 IPC로 가져와 processTitle로 diff --git a/tests/unit/renderer/services/terminal-services.test.ts b/tests/unit/renderer/services/terminal-services.test.ts index 743ef107..7f31afe6 100644 --- a/tests/unit/renderer/services/terminal-services.test.ts +++ b/tests/unit/renderer/services/terminal-services.test.ts @@ -104,9 +104,13 @@ mock.module("../../../../src/renderer/ipc/client", () => ({ const { closeTerminal, createTerminalController, openTerminal } = await import( "../../../../src/renderer/services/terminal" ); -const { TERMINAL_REOPENED_SEPARATOR, isShellPromptLikeTitle } = await import( - "../../../../src/renderer/services/terminal/controller" -); +const { + TERMINAL_REOPENED_SEPARATOR, + isShellPromptLikeTitle, + isLoginShell, + classifyOscTitle, + foregroundConfirmsTitle, +} = await import("../../../../src/renderer/services/terminal/controller"); const { createPtyClient } = await import("../../../../src/renderer/services/terminal/pty-client"); const { closeGroup } = await import("../../../../src/renderer/state/operations"); const { useLayoutStore } = await import("../../../../src/renderer/state/stores/layout"); @@ -874,6 +878,71 @@ describe("isShellPromptLikeTitle", () => { }); }); +// --------------------------------------------------------------------------- +// isLoginShell — foreground comm이 로그인 셸인지 판정 (명령 에코 노이즈 거름) +// --------------------------------------------------------------------------- +describe("isLoginShell", () => { + it("알려진 셸 이름 → true", () => { + for (const sh of ["sh", "bash", "zsh", "fish", "dash", "ksh", "tcsh", "csh"]) { + expect(isLoginShell(sh)).toBe(true); + } + }); + + it("로그인 셸 prefix '-' 제거 후 판정 (ps -o comm=)", () => { + expect(isLoginShell("-zsh")).toBe(true); + expect(isLoginShell("-bash")).toBe(true); + }); + + it("실제 프로그램 → false", () => { + expect(isLoginShell("node")).toBe(false); + expect(isLoginShell("claude")).toBe(false); + expect(isLoginShell("lazygit")).toBe(false); + expect(isLoginShell("")).toBe(false); + }); +}); + +// --------------------------------------------------------------------------- +// classifyOscTitle — (title, buffer type) 1차 분류 +// --------------------------------------------------------------------------- +describe("classifyOscTitle", () => { + it("alternate + 비셸틱 → apply (lazygit/yazi/vim)", () => { + expect(classifyOscTitle("lazygit", "alternate")).toBe("apply"); + expect(classifyOscTitle("", "alternate")).toBe("apply"); + }); + + it("alternate + 셸틱 → ignore (변종 prompt; alt-enter RPC가 라벨링)", () => { + expect(classifyOscTitle("Yazi: ~/workspaces/img_intern", "alternate")).toBe("ignore"); + expect(classifyOscTitle("kih@host:~/x", "alternate")).toBe("ignore"); + }); + + it("normal + 셸틱 → clear (프롬프트 복귀 / inline TUI 종료 정리)", () => { + expect(classifyOscTitle("kih@monolith:~/workspaces/img_intern", "normal")).toBe("clear"); + expect(classifyOscTitle("/abs/path", "normal")).toBe("clear"); + }); + + it("normal + 비셸틱 → confirm (claude 등 — foreground 확인 필요)", () => { + expect(classifyOscTitle("✳ Claude Code", "normal")).toBe("confirm"); + expect(classifyOscTitle("ls -G", "normal")).toBe("confirm"); + }); +}); + +// --------------------------------------------------------------------------- +// foregroundConfirmsTitle — confirm 경로에서 foreground 이름으로 적용 여부 결정 +// --------------------------------------------------------------------------- +describe("foregroundConfirmsTitle", () => { + it("실제 프로그램이 foreground → 적용", () => { + expect(foregroundConfirmsTitle("node")).toBe(true); + expect(foregroundConfirmsTitle("claude")).toBe(true); + }); + + it("셸이 foreground 또는 빈 값 → 거부 (명령 에코 노이즈)", () => { + expect(foregroundConfirmsTitle("zsh")).toBe(false); + expect(foregroundConfirmsTitle("-bash")).toBe(false); + expect(foregroundConfirmsTitle("")).toBe(false); + expect(foregroundConfirmsTitle(" ")).toBe(false); + }); +}); + // --------------------------------------------------------------------------- // OSC 0/1/2 title sync — claude / lazygit / lazydocker가 자신의 이름/상태를 // 보내면 탭의 processTitle이 갱신되어야 한다. @@ -941,8 +1010,10 @@ describe("services/terminal controller — OSC title sync", () => { controller.dispose(); }); - it("normal screen에서 발사된 title은 무시 — ls/grep 같은 단발 명령 가드", async () => { - // 새 harness — buffer 기본 alternate를 normal로 전환해 단발 명령 시나리오 재현. + it("normal screen 명령 에코는 foreground가 셸이면 무시 — starship preexec 가드", async () => { + // zsh preexec hook이 "ls -G" 같은 현재 명령을 OSC 2로 발사하는 케이스. 이때 + // foreground는 아직 셸(명령 exec 전)이라 foregroundProcess 확인에서 거부된다. + mockForegroundProcessName = "zsh"; const tab = useTabsStore.getState().createTab(WS, { type: "terminal", props: { cwd: "/" } }); const harness = makeTerminalControllerDeps(); harness.setBufferType("normal"); @@ -960,8 +1031,72 @@ describe("services/terminal controller — OSC title sync", () => { const titleHandler = harness.getTitleHandler(); if (!titleHandler) throw new Error("title handler missing"); - // zsh preexec hook이 현재 명령을 OSC 2로 발사하는 케이스 — buffer는 여전히 normal. titleHandler("ls -G"); + // confirm 경로의 async foregroundProcess 응답 대기 + await Promise.resolve(); + await Promise.resolve(); + + const updated = useTabsStore.getState().byWorkspace[WS][tab.id]; + expect(updated.processTitle).toBeUndefined(); + expect(updated.title).toBe("Terminal"); + + controller.dispose(); + mockForegroundProcessName = "lazygit"; // restore default + }); + + it("normal screen inline TUI(claude)는 foreground가 실제 프로그램이면 적용", async () => { + // Claude Code는 alternate screen에 들어가지 않고 normal screen에서 OSC 2로 + // 제목을 발사한다. foreground가 셸이 아닌 실제 프로그램이면 적용되어야 한다. + mockForegroundProcessName = "node"; + const tab = useTabsStore.getState().createTab(WS, { type: "terminal", props: { cwd: "/" } }); + const harness = makeTerminalControllerDeps(); + harness.setBufferType("normal"); + const controller = createTerminalController( + { + workspaceId: WS, + tabId: tab.id, + cwd: "/workspace", + container: {} as HTMLElement, + autoSpawn: false, + }, + harness.deps, + ); + await flushTerminalInit(); + const titleHandler = harness.getTitleHandler(); + if (!titleHandler) throw new Error("title handler missing"); + + titleHandler("✳ Claude Code"); + await Promise.resolve(); + await Promise.resolve(); + + expect(useTabsStore.getState().byWorkspace[WS][tab.id].title).toBe("✳ Claude Code"); + + controller.dispose(); + mockForegroundProcessName = "lazygit"; // restore default + }); + + it("normal screen 셸 프롬프트는 processTitle을 clear — inline TUI 종료 시 복귀", async () => { + const tab = useTabsStore.getState().createTab(WS, { type: "terminal", props: { cwd: "/" } }); + // 직전에 claude가 적용해둔 processTitle을 시뮬레이션 + useTabsStore.getState().setProcessTitle(WS, tab.id, "✳ Claude Code"); + const harness = makeTerminalControllerDeps(); + harness.setBufferType("normal"); + const controller = createTerminalController( + { + workspaceId: WS, + tabId: tab.id, + cwd: "/workspace", + container: {} as HTMLElement, + autoSpawn: false, + }, + harness.deps, + ); + await flushTerminalInit(); + const titleHandler = harness.getTitleHandler(); + if (!titleHandler) throw new Error("title handler missing"); + + // claude 종료 → 셸 프롬프트가 OSC 2로 user@host:cwd 발사 (shellLike) → clear + titleHandler("kih@monolith:~/workspaces/img_intern"); const updated = useTabsStore.getState().byWorkspace[WS][tab.id]; expect(updated.processTitle).toBeUndefined(); @@ -990,11 +1125,15 @@ describe("services/terminal controller — OSC title sync", () => { if (!titleHandler) throw new Error("title handler missing"); if (!altExitHandler) throw new Error("alt-exit handler missing"); - // shell preexec — 무시되어야 함 + // shell preexec — foreground가 셸이라 무시되어야 함 + mockForegroundProcessName = "zsh"; titleHandler("ls -G"); + await Promise.resolve(); + await Promise.resolve(); expect(useTabsStore.getState().byWorkspace[WS][tab.id].processTitle).toBeUndefined(); - // 사용자가 lazygit 실행 → alternate 진입 → "lazygit" 발사 + // 사용자가 lazygit 실행 → alternate 진입 → "lazygit" 발사 (alternate는 RPC 불필요) + mockForegroundProcessName = "lazygit"; // restore default harness.setBufferType("alternate"); titleHandler("lazygit"); expect(useTabsStore.getState().byWorkspace[WS][tab.id].title).toBe("lazygit"); From 2c90752092429f3a7bd0338e7031aa6123d9c204 Mon Sep 17 00:00:00 2001 From: moreih29 Date: Tue, 2 Jun 2026 11:12:43 +0900 Subject: [PATCH 03/11] feat(workspace): show SSH agent bootstrap progress while connecting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Connecting to an SSH workspace uploads the agent binary / Node runtime / LSP archives to the remote, which can take seconds. The UI showed only a static "connecting", so users couldn't tell it was working and sometimes force-quit mid-upload (leaving a busy binary behind). The bootstrap already emitted onProgress events, but the agent-bootstrap path never forwarded them (only the LSP path did). Wire them through, mirroring the existing lsp.bootstrap.progress pattern: - New workspaceId-scoped IPC event workspace/connectionProgress { workspaceId, name, phase, bytesDone?, bytesTotal? }, reusing the bootstrap phase enum. Because both connect flows (add-new and app-startup reconnect) funnel through startSshProvider, one event covers both. - WorkspaceManager.startSshProvider passes onProgress → broadcastConnectionProgress; main/index forwards deps.onProgress into ensureRemoteAgent. - Renderer store tracks connectionProgressByWorkspaceId, cleared when the connection reaches a terminal status. - Workspace panel placeholder renders phase label + artifact size + a bar (determinate only when 0 --- src/main/features/workspace/manager.ts | 51 ++++++++--- src/main/index.ts | 3 +- src/renderer/components/workbench/sidebar.tsx | 3 + src/renderer/components/workspace/panel.tsx | 87 +++++++++++++++++- src/renderer/state/stores/workspaces.ts | 45 ++++++++- src/shared/i18n/locales/en/common.json | 12 ++- src/shared/i18n/locales/ko/common.json | 12 ++- src/shared/ipc/contract.ts | 2 + src/shared/types/workspace.ts | 17 ++++ .../renderer/state/stores/workspaces.test.ts | 91 ++++++++++++++++++- 10 files changed, 299 insertions(+), 24 deletions(-) diff --git a/src/main/features/workspace/manager.ts b/src/main/features/workspace/manager.ts index bbb82114..06344c53 100644 --- a/src/main/features/workspace/manager.ts +++ b/src/main/features/workspace/manager.ts @@ -72,6 +72,7 @@ type SshWorkspaceLocation = Extract; export type WorkspaceSshChannelFactory = (options: CreateSshChannelOptions) => SshChannel; export type WorkspaceSshBootstrap = ( options: EnsureRemoteAgentOptions, + dependencies?: Pick, ) => Promise; export type WorkspaceSshLspBootstrap = ( options: EnsureRemoteLspServerOptions, @@ -1069,21 +1070,24 @@ export class WorkspaceManager { this.adoptedSshMasters.delete(meta.id); let bootstrap: EnsureRemoteAgentResult; try { - bootstrap = await this.sshBootstrap({ - host: meta.location.host, - user: meta.location.user, - port: meta.location.port, - identityFile: meta.location.identityFile, - authMode: meta.location.authMode, - remotePath: meta.location.remotePath, - cachedRemoteArch: meta.location.remoteArch, - controlPath: adoptedMaster?.controlPath, - // Pass workspaceId so the bootstrap also uploads the per-workspace - // shim rc files (`.zshrc`/`.zshenv`/`bashrc`) into the remote's - // `~/.nexus-code/shim//`, making them available to the - // remote PTY's zsh `ZDOTDIR` / bash `--rcfile` activation. - workspaceId: meta.id, - }); + bootstrap = await this.sshBootstrap( + { + host: meta.location.host, + user: meta.location.user, + port: meta.location.port, + identityFile: meta.location.identityFile, + authMode: meta.location.authMode, + remotePath: meta.location.remotePath, + cachedRemoteArch: meta.location.remoteArch, + controlPath: adoptedMaster?.controlPath, + // Pass workspaceId so the bootstrap also uploads the per-workspace + // shim rc files (`.zshrc`/`.zshenv`/`bashrc`) into the remote's + // `~/.nexus-code/shim//`, making them available to the + // remote PTY's zsh `ZDOTDIR` / bash `--rcfile` activation. + workspaceId: meta.id, + }, + { onProgress: (event) => this.broadcastConnectionProgress(meta.id, event) }, + ); } catch (error) { // Bootstrap failed before any channel existed. Release the adopted // master (we own it now) and surface the error state instead of @@ -1217,6 +1221,23 @@ export class WorkspaceManager { this.broadcastFn("workspace", "connectionChanged", { workspaceId, status }); } + /** + * 에이전트 부트스트랩 진행 이벤트를 렌더러로 전달한다. + * workspaceId 범위로 scoped되므로 "새 워크스페이스 추가"와 "앱 시작 시 재연결" 양쪽 흐름 모두 커버한다. + */ + private broadcastConnectionProgress( + workspaceId: string, + event: LspBootstrapProgressEvent, + ): void { + this.broadcastFn("workspace", "connectionProgress", { + workspaceId, + name: event.name, + phase: event.phase, + bytesDone: event.bytesDone, + bytesTotal: event.bytesTotal, + }); + } + /** * Handles terminal SSH channel lifecycle events and restores the inert SSH provider. */ diff --git a/src/main/index.ts b/src/main/index.ts index 12ed1c19..e18de8ac 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -150,9 +150,10 @@ const workspaceManager = new WorkspaceManager( createSshChannel(options, { promptHandler: (prompt) => sshAuthPromptHub.request(prompt), }), - (options) => + (options, deps) => ensureRemoteAgent(options, { promptHandler: (prompt) => sshAuthPromptHub.request(prompt), + onProgress: deps?.onProgress, }), ); diff --git a/src/renderer/components/workbench/sidebar.tsx b/src/renderer/components/workbench/sidebar.tsx index 4c9801e3..411a6b79 100644 --- a/src/renderer/components/workbench/sidebar.tsx +++ b/src/renderer/components/workbench/sidebar.tsx @@ -629,6 +629,7 @@ export function Sidebar({ function ConnectionStatusDot({ status }: { status: WorkspaceConnectionStatus }) { const { t } = useTranslation(); const label = t("sidebar.ssh_status", { status }); + const isTransient = status === "connecting" || status === "reconnecting"; return ( ); diff --git a/src/renderer/components/workspace/panel.tsx b/src/renderer/components/workspace/panel.tsx index 7be44d19..74f91c74 100644 --- a/src/renderer/components/workspace/panel.tsx +++ b/src/renderer/components/workspace/panel.tsx @@ -3,13 +3,17 @@ import { useTranslation } from "react-i18next"; import { openTerminal } from "@/services/terminal"; import { cn } from "@/utils/cn"; import { createLogger } from "../../../shared/log/renderer"; -import type { WorkspaceMeta } from "../../../shared/types/workspace"; +import type { + WorkspaceConnectionProgressEvent, + WorkspaceMeta, +} from "../../../shared/types/workspace"; import { ipcCallResult } from "../../ipc/client"; import { useLayoutStore } from "../../state/stores/layout"; import { type Tab, useTabsStore } from "../../state/stores/tabs"; import { useTerminalDeathStore } from "../../state/stores/terminal-deaths"; import { selectIsWorkspaceOnline, + selectWorkspaceConnectionProgress, selectWorkspaceConnectionStatus, useWorkspacesStore, } from "../../state/stores/workspaces"; @@ -54,6 +58,9 @@ export function WorkspacePanel({ workspace, isActive }: WorkspacePanelProps) { const connectionStatus = useWorkspacesStore((s) => selectWorkspaceConnectionStatus(s, workspace.id), ); + const connectionProgress = useWorkspacesStore((s) => + selectWorkspaceConnectionProgress(s, workspace.id), + ); const deadTerminalCount = deadTerminalTabs(tabs).length; const showTerminalStatusBanner = shouldShowWorkspaceTerminalStatusBanner({ aggregate, @@ -95,6 +102,7 @@ export function WorkspacePanel({ workspace, isActive }: WorkspacePanelProps) { if (showOfflinePlaceholder) { const isError = connectionStatus === "error"; + const isConnecting = connectionStatus === "connecting" || connectionStatus === "reconnecting"; const sshLabel = workspace.location.kind === "ssh" ? `${workspace.location.user ? `${workspace.location.user}@` : ""}${workspace.location.host}` @@ -108,7 +116,7 @@ export function WorkspacePanel({ workspace, isActive }: WorkspacePanelProps) { aria-hidden={!isActive || undefined} inert={!isActive || undefined} > -
+
+ {isConnecting && connectionProgress && ( + + )}
); @@ -165,3 +176,75 @@ export function WorkspacePanel({ workspace, isActive }: WorkspacePanelProps) {
); } + +// --------------------------------------------------------------------------- +// 부트스트랩 진행 표시줄 +// --------------------------------------------------------------------------- + +/** + * diff-tab.tsx의 formatBytes와 동일한 구현. 별도 공유 유틸이 없으므로 인라인 복사. + */ +function formatBytes(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + const kib = bytes / 1024; + if (kib < 1024) return `${kib.toFixed(kib >= 10 ? 0 : 1)} KB`; + const mib = kib / 1024; + return `${mib.toFixed(mib >= 10 ? 0 : 1)} MB`; +} + +/** + * SSH 에이전트 부트스트랩 진행 표시줄. + * + * 정직한 진행률 표시 규칙: + * - bytesTotal>0 이고 0 0 ? formatBytes(bytesTotal) : undefined; + + // Determinate 여부: bytesDone이 존재하고 0 0 && + bytesDone !== undefined && + bytesDone > 0 && + bytesDone < bytesTotal; + const percent = isDeterminate ? Math.round((bytesDone / bytesTotal) * 100) : undefined; + + return ( +
+
+ {phaseLabel} + {sizeLabel && {sizeLabel}} +
+
+ {isDeterminate ? ( +
+ ) : ( + // indeterminate: pulse 애니메이션으로 진행 중임을 표시 +
+ )} +
+
+ ); +} diff --git a/src/renderer/state/stores/workspaces.ts b/src/renderer/state/stores/workspaces.ts index f5c82c29..0f32c826 100644 --- a/src/renderer/state/stores/workspaces.ts +++ b/src/renderer/state/stores/workspaces.ts @@ -1,6 +1,7 @@ import { create } from "zustand"; import type { WorkspaceConnectionEventStatus, + WorkspaceConnectionProgressEvent, WorkspaceMeta, } from "../../../shared/types/workspace"; import { canUseIpcBridge, ipcCallResult, ipcListen, mustSucceed } from "../../ipc/client"; @@ -20,6 +21,8 @@ export type WorkspaceConnectionStatus = export interface WorkspacesState { workspaces: WorkspaceMeta[]; connectionStatusByWorkspaceId: Record; + /** 워크스페이스 ID별 마지막 부트스트랩 진행 이벤트. 연결 완료/오류 시 undefined로 클리어된다. */ + connectionProgressByWorkspaceId: Record; setAll: (workspaces: WorkspaceMeta[]) => void; upsert: (meta: WorkspaceMeta) => void; remove: (id: string) => void; @@ -149,6 +152,17 @@ function workspaceIsOnline( return status === "connected"; } +/** + * 워크스페이스의 마지막 부트스트랩 진행 이벤트를 반환한다. + * 연결이 terminal 상태에 도달하면 undefined를 반환한다. + */ +export function selectWorkspaceConnectionProgress( + state: WorkspacesState, + workspaceId: string, +): WorkspaceConnectionProgressEvent | undefined { + return state.connectionProgressByWorkspaceId[workspaceId]; +} + /** * Selects whether a workspace should expose online-only per-tab status UI. */ @@ -242,10 +256,29 @@ export function createWorkspacesStore(deps: WorkspacesStoreDeps = defaultWorkspa }); deps.listen("workspace", "connectionChanged", ({ workspaceId, status }) => { + set((state) => { + const displayStatus = statusFromConnectionEvent(status); + // 연결이 terminal 상태(connected/error/idle/disconnected)에 도달하면 + // 진행 표시줄이 남아있지 않도록 progress 항목을 클리어한다. + const isTerminal = displayStatus !== "connecting" && displayStatus !== "reconnecting"; + const nextProgress = isTerminal + ? { ...state.connectionProgressByWorkspaceId, [workspaceId]: undefined } + : state.connectionProgressByWorkspaceId; + return { + connectionStatusByWorkspaceId: { + ...state.connectionStatusByWorkspaceId, + [workspaceId]: displayStatus, + }, + connectionProgressByWorkspaceId: nextProgress, + }; + }); + }); + + deps.listen("workspace", "connectionProgress", (event) => { set((state) => ({ - connectionStatusByWorkspaceId: { - ...state.connectionStatusByWorkspaceId, - [workspaceId]: statusFromConnectionEvent(status), + connectionProgressByWorkspaceId: { + ...state.connectionProgressByWorkspaceId, + [event.workspaceId]: event, }, })); }); @@ -264,12 +297,18 @@ export function createWorkspacesStore(deps: WorkspacesStoreDeps = defaultWorkspa ([workspaceId]) => workspaceId !== id, ), ), + connectionProgressByWorkspaceId: Object.fromEntries( + Object.entries(state.connectionProgressByWorkspaceId).filter( + ([workspaceId]) => workspaceId !== id, + ), + ), })); }); return { workspaces: [], connectionStatusByWorkspaceId: {}, + connectionProgressByWorkspaceId: {}, setAll(workspaces) { set((state) => ({ diff --git a/src/shared/i18n/locales/en/common.json b/src/shared/i18n/locales/en/common.json index fda2e882..d25cbe40 100644 --- a/src/shared/i18n/locales/en/common.json +++ b/src/shared/i18n/locales/en/common.json @@ -330,7 +330,17 @@ "panel": { "connection_failed": "Connection failed", "ssh_workspace": "SSH workspace — {{label}}", - "could_not_connect": "Could not connect to {{label}}. Check your SSH settings and try again." + "could_not_connect": "Could not connect to {{label}}. Check your SSH settings and try again.", + "bootstrap_phase": { + "checking": "Checking remote…", + "uploading": "Uploading {{name}}…", + "verifying": "Verifying…", + "extracting": "Extracting {{name}}…", + "linking": "Linking…", + "pruning": "Cleaning up…", + "skipped": "Ready", + "ready": "Ready" + } }, "conflictDialog": { "title": "Save conflict — {{filename}} changed on disk", diff --git a/src/shared/i18n/locales/ko/common.json b/src/shared/i18n/locales/ko/common.json index a4e7fb81..5b1487c1 100644 --- a/src/shared/i18n/locales/ko/common.json +++ b/src/shared/i18n/locales/ko/common.json @@ -330,7 +330,17 @@ "panel": { "connection_failed": "연결 실패", "ssh_workspace": "SSH 워크스페이스 — {{label}}", - "could_not_connect": "{{label}}에 연결할 수 없습니다. SSH 설정을 확인하고 다시 시도하세요." + "could_not_connect": "{{label}}에 연결할 수 없습니다. SSH 설정을 확인하고 다시 시도하세요.", + "bootstrap_phase": { + "checking": "원격 확인 중…", + "uploading": "{{name}} 업로드 중…", + "verifying": "검증 중…", + "extracting": "{{name}} 압축 해제 중…", + "linking": "링크 중…", + "pruning": "정리 중…", + "skipped": "준비 완료", + "ready": "준비 완료" + } }, "conflictDialog": { "title": "저장 충돌 — {{filename}}이(가) 디스크에서 변경되었습니다", diff --git a/src/shared/ipc/contract.ts b/src/shared/ipc/contract.ts index 21449229..65f13fa0 100644 --- a/src/shared/ipc/contract.ts +++ b/src/shared/ipc/contract.ts @@ -100,6 +100,7 @@ import { import { TabMetaSchema } from "../types/tab"; import { WorkspaceConnectionChangedEventSchema, + WorkspaceConnectionProgressEventSchema, WorkspaceLocationSchema, WorkspaceMetaSchema, } from "../types/workspace"; @@ -533,6 +534,7 @@ export const ipcContract = { removed: listen(WorkspaceIdSchema), attention: listen(WorkspaceIdSchema), connectionChanged: listen(WorkspaceConnectionChangedEventSchema), + connectionProgress: listen(WorkspaceConnectionProgressEventSchema), reordered: listen(WorkspaceReorderedEventSchema), }, }, diff --git a/src/shared/types/workspace.ts b/src/shared/types/workspace.ts index 58d2f80f..094b429a 100644 --- a/src/shared/types/workspace.ts +++ b/src/shared/types/workspace.ts @@ -1,4 +1,5 @@ import { z } from "zod"; +import { LspBootstrapProgressPhaseSchema } from "../lsp/diagnostics"; import { ColorToneSchema } from "./color-tone"; import { TabMetaSchema } from "./tab"; import { WorkspaceIdSchema } from "./workspace-id"; @@ -55,6 +56,22 @@ export const WorkspaceConnectionChangedEventSchema = z.object({ export type WorkspaceConnectionEventStatus = z.infer; +/** + * 워크스페이스 SSH 에이전트 부트스트랩 진행 이벤트. + * LSP 부트스트랩의 LspBootstrapProgressPhaseSchema와 동일한 phase enum을 재사용한다. + */ +export const WorkspaceConnectionProgressEventSchema = z.object({ + workspaceId: z.string(), + name: z.string(), + phase: LspBootstrapProgressPhaseSchema, + bytesDone: z.number().int().nonnegative().optional(), + bytesTotal: z.number().int().nonnegative().optional(), +}); + +export type WorkspaceConnectionProgressEvent = z.infer< + typeof WorkspaceConnectionProgressEventSchema +>; + /** * Returns the path-like root used by legacy local-only callers. */ diff --git a/tests/unit/renderer/state/stores/workspaces.test.ts b/tests/unit/renderer/state/stores/workspaces.test.ts index 962c3e5c..063b438a 100644 --- a/tests/unit/renderer/state/stores/workspaces.test.ts +++ b/tests/unit/renderer/state/stores/workspaces.test.ts @@ -63,6 +63,7 @@ function resetStore(): void { useWorkspacesStore.setState({ workspaces: [], connectionStatusByWorkspaceId: {}, + connectionProgressByWorkspaceId: {}, }); } @@ -106,7 +107,11 @@ const storeWithFetch = createWorkspacesStore({ }); function resetFetchStore(): void { - storeWithFetch.setState({ workspaces: [], connectionStatusByWorkspaceId: {} }); + storeWithFetch.setState({ + workspaces: [], + connectionStatusByWorkspaceId: {}, + connectionProgressByWorkspaceId: {}, + }); fetchListResults.length = 0; fetchListCallCount = 0; } @@ -332,3 +337,87 @@ describe("workspaces store — fetchList fallback on inconsistency", () => { expect(ids).toContain(ID_B); }); }); + +// --------------------------------------------------------------------------- +// workspace.connectionProgress 이벤트 구독 및 상태 관리 +// --------------------------------------------------------------------------- + +describe("workspaces store — connection progress", () => { + beforeEach(resetStore); + + test("stores progress event by workspaceId on connectionProgress", () => { + emitWorkspaceEvent("connectionProgress", { + workspaceId: WORKSPACE_ID, + name: "nexus-agent", + phase: "uploading", + bytesTotal: 8_388_608, + }); + + const progress = useWorkspacesStore.getState().connectionProgressByWorkspaceId[WORKSPACE_ID]; + expect(progress?.phase).toBe("uploading"); + expect(progress?.name).toBe("nexus-agent"); + expect(progress?.bytesTotal).toBe(8_388_608); + }); + + test("overwrites previous progress event with latest", () => { + emitWorkspaceEvent("connectionProgress", { + workspaceId: WORKSPACE_ID, + name: "nexus-agent", + phase: "uploading", + }); + emitWorkspaceEvent("connectionProgress", { + workspaceId: WORKSPACE_ID, + name: "nexus-agent", + phase: "verifying", + }); + + const progress = useWorkspacesStore.getState().connectionProgressByWorkspaceId[WORKSPACE_ID]; + expect(progress?.phase).toBe("verifying"); + }); + + test("clears progress entry when connectionChanged arrives with a terminal status", () => { + emitWorkspaceEvent("connectionProgress", { + workspaceId: WORKSPACE_ID, + name: "nexus-agent", + phase: "uploading", + }); + // 연결 완료 → terminal 상태 → progress가 undefined로 클리어되어야 한다. + emitWorkspaceEvent("connectionChanged", { + workspaceId: WORKSPACE_ID, + status: "connected", + }); + + const progress = useWorkspacesStore.getState().connectionProgressByWorkspaceId[WORKSPACE_ID]; + expect(progress).toBeUndefined(); + }); + + test("does not clear progress when connectionChanged arrives with connecting status", () => { + emitWorkspaceEvent("connectionProgress", { + workspaceId: WORKSPACE_ID, + name: "nexus-agent", + phase: "checking", + }); + emitWorkspaceEvent("connectionChanged", { + workspaceId: WORKSPACE_ID, + status: "connecting", + }); + + const progress = useWorkspacesStore.getState().connectionProgressByWorkspaceId[WORKSPACE_ID]; + expect(progress?.phase).toBe("checking"); + }); + + test("clears progress on error terminal status", () => { + emitWorkspaceEvent("connectionProgress", { + workspaceId: WORKSPACE_ID, + name: "nexus-agent", + phase: "uploading", + }); + emitWorkspaceEvent("connectionChanged", { + workspaceId: WORKSPACE_ID, + status: "error", + }); + + const progress = useWorkspacesStore.getState().connectionProgressByWorkspaceId[WORKSPACE_ID]; + expect(progress).toBeUndefined(); + }); +}); From ef5c26c7cc7fe36672b1f3e441bcd3f08207eeb5 Mon Sep 17 00:00:00 2001 From: moreih29 Date: Tue, 2 Jun 2026 12:07:57 +0900 Subject: [PATCH 04/11] fix(ssh): keepalive + atomic agent install to prevent orphan/ETXTBSY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two hardening layers for the case where the client dies abnormally (force-kill, sleep, network drop) while connected to an SSH workspace. L1 — connection keepalive: add ServerAliveInterval=15 / ServerAliveCountMax=3 to the agent channel, the persistent ControlMaster, and bootstrap transport commands. Previously there was no keepalive, so a dead client left the remote agent (holding its binary) alive until the kernel's default TCP timeout (hours), which then blocked the next launch's re-upload. Now a dead peer is detected in ~45s, ssh exits, and the remote agent gets stdin EOF and shuts down (killing its PTY children). L3 — atomic agent install: upload each artifact to a unique temp path in the same directory, then `mv -f` it into place. A rename over a file that a lingering old agent is still executing succeeds (the running process keeps the old inode), so a stale remote agent can never block reinstall with ETXTBSY ("Text file busy") — the exact failure seen after a force-quit. Same-dir rename is atomic, so there's no missing-file window either. No `pkill`, so a co-tenant session's agent on the same host is never killed. Co-Authored-By: Claude Opus 4.8 --- src/main/infra/agent/ssh/master.ts | 19 ++++++++++++- .../agent/ssh/ssh-bootstrap/transport.ts | 27 ++++++++++++++++--- tests/unit/main/agent/ssh-auth-pty.test.ts | 4 +++ tests/unit/main/agent/ssh-bootstrap.test.ts | 13 +++++++++ tests/unit/main/agent/ssh-channel.test.ts | 16 ++++++++++- 5 files changed, 74 insertions(+), 5 deletions(-) diff --git a/src/main/infra/agent/ssh/master.ts b/src/main/infra/agent/ssh/master.ts index a97fa461..b6434872 100644 --- a/src/main/infra/agent/ssh/master.ts +++ b/src/main/infra/agent/ssh/master.ts @@ -38,6 +38,22 @@ export interface SshControlMaster { const CONTROL_EXIT_UNLINK_FALLBACK_MS = 5_000; +/** + * Keepalive options applied to every long-lived ssh invocation (the agent + * channel and the persistent ControlMaster). Without these, a client that dies + * abnormally (force-kill, sleep, network drop) leaves the remote agent — and + * the binary it holds — alive until the kernel's default TCP timeout (hours), + * which then blocks the next launch's re-upload. ServerAliveInterval probes the + * peer at the SSH layer; after ServerAliveCountMax unanswered probes ssh exits, + * the remote session tears down, and the agent gets stdin EOF. ~15s × 3 ≈ 45s. + */ +const SSH_KEEPALIVE_ARGS: readonly string[] = [ + "-o", + "ServerAliveInterval=15", + "-o", + "ServerAliveCountMax=3", +]; + /** * Spawns the SSH client for direct stdin/stdout NDJSON exchange. Interactive * ControlMaster authentication and socket reuse live in `ssh-master`'s @@ -59,7 +75,7 @@ export function spawnSshMaster( * Creates the OpenSSH argument list without invoking a shell locally. */ export function buildSshArgs(options: SshMasterOptions): string[] { - const args = ["-o", "BatchMode=yes"]; + const args = ["-o", "BatchMode=yes", ...SSH_KEEPALIVE_ARGS]; if (options.controlPath) { args.push("-S", options.controlPath, "-o", "ControlMaster=no"); } @@ -150,6 +166,7 @@ export function buildSshControlMasterArgs( "ControlMaster=yes", "-o", "ControlPersist=60", + ...SSH_KEEPALIVE_ARGS, "-f", "-N", ]; diff --git a/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts b/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts index bf460c82..858df8e1 100644 --- a/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts +++ b/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts @@ -4,7 +4,7 @@ * uploading and verifying files, and quoting arbitrary strings into safe * shell or sftp tokens. */ -import { createHash } from "node:crypto"; +import { createHash, randomBytes } from "node:crypto"; import fs from "node:fs/promises"; import path from "node:path"; import { type SpawnOptionsWithoutStdio, spawn as defaultSpawn } from "node:child_process"; @@ -73,7 +73,16 @@ export function buildSftpArgs(options: EnsureRemoteAgentOptions): string[] { /** Builds the ssh transport args shared by every remote command we run. */ export function buildSshTransportArgs(options: EnsureRemoteAgentOptions): string[] { - const args = ["-o", "BatchMode=yes"]; + // Keepalive so a bootstrap command over a dead connection fails fast (~45s) + // rather than hanging on the kernel's default TCP timeout. + const args = [ + "-o", + "BatchMode=yes", + "-o", + "ServerAliveInterval=15", + "-o", + "ServerAliveCountMax=3", + ]; if (options.controlPath) args.push("-S", options.controlPath, "-o", "ControlMaster=no"); if (options.port !== undefined) args.push("-p", String(options.port)); if (options.identityFile) args.push("-i", options.identityFile); @@ -110,15 +119,27 @@ export async function uploadAndVerifyFile(args: { const progressName = args.progressName ?? path.basename(args.remotePath); for (let attempt = 0; attempt < 2; attempt += 1) { + // Upload to a unique temp path in the same directory, then atomically + // rename it into place. `mv -f` over a file that a lingering OLD agent is + // still executing succeeds — the running process keeps the old inode while + // the name is repointed to the new one — so a stale remote agent never + // blocks reinstall with ETXTBSY ("Text file busy"). Same-dir rename keeps + // it on one filesystem, so the swap is atomic with no missing-file window. + const tmpRemotePath = `${args.remotePath}.tmp.${randomBytes(6).toString("hex")}`; args.onProgress?.({ name: progressName, phase: "uploading", bytesDone: 0, bytesTotal: payload.byteLength, }); - await uploadFile(args.options, args.runner, args.localPath, args.remotePath, payload, { + await uploadFile(args.options, args.runner, args.localPath, tmpRemotePath, payload, { executable: args.executable, }); + await runSsh( + args.options, + args.runner, + `mv -f ${quoteShellArg(tmpRemotePath)} ${quoteShellArg(args.remotePath)}`, + ); args.onProgress?.({ name: progressName, phase: "uploading", diff --git a/tests/unit/main/agent/ssh-auth-pty.test.ts b/tests/unit/main/agent/ssh-auth-pty.test.ts index 72839c41..23309278 100644 --- a/tests/unit/main/agent/ssh-auth-pty.test.ts +++ b/tests/unit/main/agent/ssh-auth-pty.test.ts @@ -107,6 +107,10 @@ describe("authenticateSshControlMaster", () => { "ControlMaster=yes", "-o", "ControlPersist=60", + "-o", + "ServerAliveInterval=15", + "-o", + "ServerAliveCountMax=3", "-f", "-N", "-p", diff --git a/tests/unit/main/agent/ssh-bootstrap.test.ts b/tests/unit/main/agent/ssh-bootstrap.test.ts index 158fca75..e1d1891b 100644 --- a/tests/unit/main/agent/ssh-bootstrap.test.ts +++ b/tests/unit/main/agent/ssh-bootstrap.test.ts @@ -186,6 +186,7 @@ describe("ssh-bootstrap", () => { }), }; } + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; @@ -241,6 +242,7 @@ describe("ssh-bootstrap", () => { return { stdout: "" }; } if (command === "ssh" && remoteCommand.startsWith("index=0")) return { stdout: "" }; + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; @@ -292,6 +294,7 @@ describe("ssh-bootstrap", () => { } if (command === "ssh" && remoteCommand.startsWith("rm -rf")) return { stdout: "" }; if (command === "ssh" && remoteCommand.startsWith("index=0")) return { stdout: "" }; + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; @@ -364,6 +367,7 @@ describe("ssh-bootstrap", () => { } if (command === "ssh" && remoteCommand.startsWith("rm -rf")) return { stdout: "" }; if (command === "ssh" && remoteCommand.startsWith("index=0")) return { stdout: "" }; + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; @@ -416,6 +420,7 @@ describe("ssh-bootstrap", () => { }), }; } + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; @@ -499,6 +504,7 @@ describe("ssh-bootstrap", () => { return { stdout: "" }; } if (command === "ssh" && remoteCommand.startsWith("index=0")) return { stdout: "" }; + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; @@ -540,6 +546,7 @@ describe("ssh-bootstrap", () => { return { stdout: "" }; } if (command === "ssh" && remoteCommand.startsWith("index=0")) return { stdout: "" }; + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; @@ -579,6 +586,7 @@ describe("ssh-bootstrap", () => { return { stdout: "" }; } if (command === "ssh" && remoteCommand.startsWith("index=0")) return { stdout: "" }; + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; @@ -612,6 +620,7 @@ describe("ssh-bootstrap", () => { }), }; } + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; @@ -650,6 +659,7 @@ describe("ssh-bootstrap", () => { }), }; } + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; @@ -699,6 +709,7 @@ describe("ssh-bootstrap", () => { } return { stdout: "" }; } + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; @@ -770,6 +781,7 @@ describe("ssh-bootstrap", () => { shimCatCalls += 1; return { stdout: "" }; } + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; @@ -808,6 +820,7 @@ describe("ssh-bootstrap", () => { }), }; } + if (command === "ssh" && String(args.at(-1) ?? "").startsWith("mv -f")) return { stdout: "" }; throw new Error(`unexpected command: ${command} ${args.join(" ")}`); }) as SshBootstrapRunner; diff --git a/tests/unit/main/agent/ssh-channel.test.ts b/tests/unit/main/agent/ssh-channel.test.ts index 358a2ada..2bbca90c 100644 --- a/tests/unit/main/agent/ssh-channel.test.ts +++ b/tests/unit/main/agent/ssh-channel.test.ts @@ -132,6 +132,10 @@ describe("createSshChannel", () => { args: [ "-o", "BatchMode=yes", + "-o", + "ServerAliveInterval=15", + "-o", + "ServerAliveCountMax=3", "-p", "2222", "-i", @@ -404,7 +408,17 @@ describe("createSshChannel", () => { expect(spawnCalls).toEqual([ { command: "ssh", - args: ["-o", "BatchMode=yes", "--", "deploy@dev.example.com", "printf ready"], + args: [ + "-o", + "BatchMode=yes", + "-o", + "ServerAliveInterval=15", + "-o", + "ServerAliveCountMax=3", + "--", + "deploy@dev.example.com", + "printf ready", + ], }, ]); }); From e528ccd01cf54724c0fbc51ed4b776d3501412c6 Mon Sep 17 00:00:00 2001 From: moreih29 Date: Tue, 2 Jun 2026 12:16:45 +0900 Subject: [PATCH 05/11] feat(agent): idle watchdog + client keepalive to reap orphaned agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defense-in-depth (L2) for the case where the client is gone but the SSH connection lingers without delivering stdin EOF — a hung client process or a connection the kernel hasn't yet torn down. Previously the remote agent would keep running (holding its binary) indefinitely, since it had no liveness check of its own. Agent: StartIdleWatchdog self-terminates (via drainAndExit, which kills PTY children) when no inbound request line arrives within 60s. Run() stamps lastInbound on every received line, so any real traffic resets it. drainAndExit now calls an injectable `exit` (default os.Exit) so the watchdog and drain paths are unit-testable without killing the runner. Client: pipe.ts pings the agent every 20s (fire-and-forget `ping`, a no-op handler registered on the agent) once heartbeat is enabled, so a healthy but idle session keeps resetting the watchdog (~3 pings per 60s window). The timer is unref'd and cleared on dispose/fail. Client and remote agent are always the same build (the app uploads its own agent), so the `ping` method is always present — no version-skew concern. Combined with the L1 keepalive (ssh ServerAlive) this closes both paths: L1 reaps the connection at the SSH layer (~45s); L2 reaps the agent even if the connection itself never reports dead. Co-Authored-By: Claude Opus 4.8 --- cmd/agent/main.go | 15 ++++ internal/stdioserver/host.go | 62 +++++++++++++++-- internal/stdioserver/host_idle_test.go | 94 ++++++++++++++++++++++++++ src/main/infra/agent/pipe.ts | 36 +++++++++- 4 files changed, 201 insertions(+), 6 deletions(-) create mode 100644 internal/stdioserver/host_idle_test.go diff --git a/cmd/agent/main.go b/cmd/agent/main.go index 0c30db26..7d356ad7 100644 --- a/cmd/agent/main.go +++ b/cmd/agent/main.go @@ -95,6 +95,14 @@ func main() { } d.Register("hook.getInfo", newHookGetInfoHandler(hookProvider)) + // ping — client keepalive. The TS client calls this periodically so the + // idle watchdog (StartIdleWatchdog below) can tell a live-but-idle session + // from a vanished client. The handler is a no-op; merely receiving the line + // resets the agent's lastInbound timestamp. + d.Register("ping", func(_ context.Context, _ json.RawMessage) (any, error) { + return struct{}{}, nil + }) + host := stdioserver.New(d, os.Stdin, os.Stdout, agentLogger) fsys.SetEventSink(func(event string, payload any) error { err := host.EmitEvent(event, payload) @@ -143,6 +151,13 @@ func main() { // 일치해야 한다. ctx 취소(드레인) 시 자동 정지한다. host.StartHeartbeat(10 * time.Second) + // Idle watchdog: self-terminate if the client sends nothing for 60s. The + // client pings every ~20s (KEEPALIVE_PING_INTERVAL_MS in pipe.ts), so a + // healthy idle session resets the timer ~3× per window; only a vanished + // client (half-open TCP, hung process, sleep) with no stdin EOF trips it, + // preventing an orphaned remote agent from holding its binary. + host.StartIdleWatchdog(60 * time.Second) + host.Run() } diff --git a/internal/stdioserver/host.go b/internal/stdioserver/host.go index 6488a77e..a4098ded 100644 --- a/internal/stdioserver/host.go +++ b/internal/stdioserver/host.go @@ -63,6 +63,16 @@ type Host struct { // 종료된다 — 멈춘 hook이 셧다운을 막을 수 없다. hooksMu sync.Mutex shutdownHooks []func() + + // lastInbound is the UnixNano timestamp of the most recently received + // request line, read by the idle watchdog (StartIdleWatchdog) to detect a + // vanished client. Written from Run's single reader goroutine, read from + // the watchdog goroutine — atomic keeps that race-free. + lastInbound atomic.Int64 + + // exit terminates the process. Defaults to os.Exit; tests inject a fake so + // drain/watchdog termination can be observed without killing the runner. + exit func(int) } // New constructs a Host bound to the given dispatcher and stdio streams. @@ -81,6 +91,7 @@ func New(d *dispatch.Dispatcher, in io.Reader, out io.Writer, logger *slog.Logge ctx: ctx, cancel: cancel, accepting: true, + exit: os.Exit, } } @@ -131,7 +142,12 @@ func (h *Host) Run() { // Copy the slice — scanner reuses its internal buffer between // calls and the line escapes into a goroutine below. line := append([]byte(nil), scanner.Bytes()...) - if len(line) == 0 || !h.isAccepting() { + if len(line) == 0 { + continue + } + // Any inbound line proves the client is alive — reset the idle watchdog. + h.lastInbound.Store(time.Now().UnixNano()) + if !h.isAccepting() { continue } h.wg.Add(1) @@ -181,6 +197,41 @@ func (h *Host) StartHeartbeat(interval time.Duration) { }() } +// StartIdleWatchdog self-terminates the agent (via drainAndExit) when no +// inbound request line arrives within `limit`. This reaps the agent — and, +// through it, every PTY child — when the client has vanished but the SSH +// connection lingers without delivering stdin EOF: a half-open TCP link, a +// hung client process, or a suspended laptop. The client sends a periodic +// `ping` so a healthy but idle session keeps resetting lastInbound; only a +// genuinely absent client trips the limit. +// +// `limit` must be comfortably larger than the client's keepalive ping interval +// (KEEPALIVE_PING_INTERVAL_MS in pipe.ts) so normal jitter never false-fires. +// A non-positive limit disables the watchdog. Call before Run(); the goroutine +// stops when h.ctx is cancelled (drain). +func (h *Host) StartIdleWatchdog(limit time.Duration) { + if limit <= 0 { + return + } + h.lastInbound.Store(time.Now().UnixNano()) + go func() { + ticker := time.NewTicker(limit / 3) + defer ticker.Stop() + for { + select { + case <-ticker.C: + last := time.Unix(0, h.lastInbound.Load()) + if time.Since(last) >= limit { + h.drainAndExit(0) + return + } + case <-h.ctx.Done(): + return + } + } + }() +} + // RegisterShutdownHook 는 drainAndExit가 os.Exit 호출 직전에 등록 순서대로 // 실행할 cleanup 콜백을 추가한다. SIGTERM 시 defer가 우회되므로, hookserver // 소켓 파일 정리 등 명시적 정리가 필요한 자원은 이 훅을 사용한다. @@ -269,7 +320,7 @@ func (h *Host) drainAndExit(code int) { // even if Wait() itself blocks", which Wait can do when a // handler is stuck in a syscall. The same forceExit covers // the shutdown-hook execution window below. - forceExit := time.AfterFunc(forceExitAfter, func() { os.Exit(code) }) + forceExit := time.AfterFunc(forceExitAfter, func() { h.exit(code) }) done := make(chan struct{}) go func() { h.wg.Wait() @@ -279,14 +330,15 @@ func (h *Host) drainAndExit(code int) { case <-done: // Continue past the select to run shutdown hooks before exit. case <-time.After(forceExitAfter): - os.Exit(code) + h.exit(code) + return } // Hooks run synchronously under the same forceExit timer. - // If a hook hangs the AfterFunc above still trips os.Exit on time. + // If a hook hangs the AfterFunc above still trips h.exit on time. h.runShutdownHooks() forceExit.Stop() h.cancel() - os.Exit(code) + h.exit(code) }) } diff --git a/internal/stdioserver/host_idle_test.go b/internal/stdioserver/host_idle_test.go new file mode 100644 index 00000000..9916705d --- /dev/null +++ b/internal/stdioserver/host_idle_test.go @@ -0,0 +1,94 @@ +// Package stdioserver — idle watchdog tests. +// +// StartIdleWatchdog must self-terminate the agent when the client stops +// sending (vanished client, half-open link) but must never fire while inbound +// traffic keeps arriving. We inject a fake exit so termination is observable +// without killing the test runner. +package stdioserver + +import ( + "io" + "log/slog" + "strings" + "testing" + "time" + + "github.com/nexus-code/nexus-code/internal/dispatch" +) + +func newTestHost() *Host { + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + return New(dispatch.New(), strings.NewReader(""), io.Discard, logger) +} + +// Watchdog fires when no inbound line arrives within the limit. +func TestIdleWatchdogExitsWhenClientVanishes(t *testing.T) { + host := newTestHost() + exited := make(chan int, 1) + host.exit = func(code int) { + select { + case exited <- code: + default: + } + } + + host.StartIdleWatchdog(30 * time.Millisecond) + + select { + case code := <-exited: + if code != 0 { + t.Fatalf("exit code = %d, want 0", code) + } + case <-time.After(2 * time.Second): + t.Fatal("idle watchdog did not terminate within 2s of client silence") + } +} + +// Watchdog must NOT fire while inbound lines keep resetting lastInbound. +func TestIdleWatchdogStaysAliveWithTraffic(t *testing.T) { + host := newTestHost() + exited := make(chan int, 1) + host.exit = func(code int) { + select { + case exited <- code: + default: + } + } + + host.StartIdleWatchdog(60 * time.Millisecond) + + stop := time.After(240 * time.Millisecond) + tick := time.NewTicker(20 * time.Millisecond) + defer tick.Stop() + for { + select { + case <-tick.C: + host.lastInbound.Store(time.Now().UnixNano()) + case code := <-exited: + t.Fatalf("watchdog fired during active traffic (code=%d)", code) + case <-stop: + return // survived the window without firing + } + } +} + +// A non-positive limit disables the watchdog entirely. +func TestIdleWatchdogDisabledWhenLimitNonPositive(t *testing.T) { + host := newTestHost() + exited := make(chan int, 1) + host.exit = func(code int) { + select { + case exited <- code: + default: + } + } + + host.StartIdleWatchdog(0) + + select { + case <-exited: + t.Fatal("watchdog fired despite a non-positive (disabled) limit") + case <-time.After(100 * time.Millisecond): + // expected: no termination + } +} diff --git a/src/main/infra/agent/pipe.ts b/src/main/infra/agent/pipe.ts index 8695b297..e4ee8d4d 100644 --- a/src/main/infra/agent/pipe.ts +++ b/src/main/infra/agent/pipe.ts @@ -110,6 +110,11 @@ const STDOUT_BACKPRESSURE_LWM = 64 * 1024; // 64 KiB /** Event name emitted by the Go agent at a regular heartbeat interval. */ const AGENT_HEARTBEAT_EVENT = "agent.heartbeat"; +// How often the client pings the agent so the agent's idle watchdog +// (StartIdleWatchdog in host.go, 60s limit) can tell a live-but-idle session +// from a vanished client. Must stay well under that limit — ~3 pings/window. +const KEEPALIVE_PING_INTERVAL_MS = 20_000; + const ReadyFrameSchema = z .object({ type: z.literal("ready"), @@ -235,6 +240,11 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe { let heartbeatWarned = false; let heartbeatWatchdogTimer: ReturnType | null = null; + // Client keepalive sender — pings the agent so its idle watchdog keeps a + // live-but-idle session alive. Started on ready (alongside the heartbeat + // watchdog), cleared on dispose/fail. + let keepaliveTimer: ReturnType | null = null; + const ready = new Promise((resolve, reject) => { resolveReady = () => { if (readySettled) return; @@ -259,6 +269,14 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe { heartbeatWarned = false; } + /** Clears the keepalive ping timer. */ + function clearKeepalive(): void { + if (keepaliveTimer !== null) { + clearInterval(keepaliveTimer); + keepaliveTimer = null; + } + } + /** Rejects all in-flight requests with the same terminal transport error. */ function rejectPendingRequests(error: Error): void { for (const requestId of Array.from(activeRequestIds)) { @@ -329,6 +347,19 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe { (timer as NodeJS.Timeout).unref(); } heartbeatWatchdogTimer = timer; + + // Start the keepalive sender on the same condition: the agent only + // runs its idle watchdog when heartbeat is enabled, so pinging is only + // needed then. Best-effort (fire) — a failed ping just means the + // channel is already tearing down. + const ping = setInterval(() => { + if (disposed || terminalError) return; + pipe.fire("ping"); + }, KEEPALIVE_PING_INTERVAL_MS); + if (typeof (ping as NodeJS.Timeout).unref === "function") { + (ping as NodeJS.Timeout).unref(); + } + keepaliveTimer = ping; } resolveReady(); return; @@ -448,7 +479,7 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe { stderrLines.push(chunk); }); - return { + const pipe: NdjsonPipe = { ready, get methods(): readonly string[] | undefined { return capabilityMethods; @@ -543,6 +574,7 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe { if (disposed) return; disposed = true; clearHeartbeatWatchdog(); + clearKeepalive(); const error = createDisposedError(); rejectReady(error); rejectPendingRequests(error); @@ -552,6 +584,7 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe { if (terminalError) return; terminalError = error; clearHeartbeatWatchdog(); + clearKeepalive(); rejectReady(error); rejectPendingRequests(error); }, @@ -561,6 +594,7 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe { return { wasReady: readySettled, stderrTail: recentStderr.join(" | ") }; }, }; + return pipe; } // === error helpers (exported for orchestrator use) === From a7ff2aceafb23cf034f8db4d2694356b97344fbc Mon Sep 17 00:00:00 2001 From: moreih29 Date: Tue, 2 Jun 2026 12:59:52 +0900 Subject: [PATCH 06/11] fix(ssh): retry atomic agent install when rename fails on first attempt sftp exits 0 even when an individual put silently fails, so a transient upload error left the temp file missing; the subsequent 'mv -f tmp final' then threw 'no such file' and aborted the whole bootstrap (intermittent 'SSH transport failed' that cleared on a manual retry). The atomic-install change (ef5c26c7) moved the failure ahead of the sha256 verify, removing the pre-existing retry resilience. Wrap each upload->rename->verify pass in try/catch so any failure (missing temp, rename error, sha mismatch) retries the full upload instead of propagating, and best-effort rm the orphaned temp between attempts so failed runs never litter .tmp. beside the installed binary. The sha256 check remains the sole correctness gate. Co-Authored-By: Claude Opus 4.8 --- .../agent/ssh/ssh-bootstrap/transport.ts | 76 +++++++++++++------ tests/unit/main/agent/ssh-bootstrap.test.ts | 61 ++++++++++++++- 2 files changed, 111 insertions(+), 26 deletions(-) diff --git a/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts b/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts index 858df8e1..9035ee94 100644 --- a/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts +++ b/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts @@ -4,10 +4,11 @@ * uploading and verifying files, and quoting arbitrary strings into safe * shell or sftp tokens. */ + +import { spawn as defaultSpawn, type SpawnOptionsWithoutStdio } from "node:child_process"; import { createHash, randomBytes } from "node:crypto"; import fs from "node:fs/promises"; import path from "node:path"; -import { type SpawnOptionsWithoutStdio, spawn as defaultSpawn } from "node:child_process"; import { createSshError } from "../../pipe"; import type { EnsureRemoteAgentOptions, @@ -118,6 +119,7 @@ export async function uploadAndVerifyFile(args: { } const progressName = args.progressName ?? path.basename(args.remotePath); + let lastError: unknown; for (let attempt = 0; attempt < 2; attempt += 1) { // Upload to a unique temp path in the same directory, then atomically // rename it into place. `mv -f` over a file that a lingering OLD agent is @@ -126,31 +128,57 @@ export async function uploadAndVerifyFile(args: { // blocks reinstall with ETXTBSY ("Text file busy"). Same-dir rename keeps // it on one filesystem, so the swap is atomic with no missing-file window. const tmpRemotePath = `${args.remotePath}.tmp.${randomBytes(6).toString("hex")}`; - args.onProgress?.({ - name: progressName, - phase: "uploading", - bytesDone: 0, - bytesTotal: payload.byteLength, - }); - await uploadFile(args.options, args.runner, args.localPath, tmpRemotePath, payload, { - executable: args.executable, - }); - await runSsh( - args.options, - args.runner, - `mv -f ${quoteShellArg(tmpRemotePath)} ${quoteShellArg(args.remotePath)}`, + try { + args.onProgress?.({ + name: progressName, + phase: "uploading", + bytesDone: 0, + bytesTotal: payload.byteLength, + }); + await uploadFile(args.options, args.runner, args.localPath, tmpRemotePath, payload, { + executable: args.executable, + }); + // `sftp` exits 0 even when an individual `put` fails (a failed transfer + // is reported on stderr but never sets a nonzero exit code), so a + // transient upload error is invisible to uploadFile() — the temp file + // may be missing or truncated. The `mv` below would then throw + // ("no such file"), aborting the whole bootstrap. We therefore treat the + // entire upload→rename→verify sequence as one fallible attempt: any + // failure (missing temp, rename error, or sha mismatch) retries the full + // upload instead of propagating, restoring the pre-atomic-install + // resilience where the sha check alone gated correctness. + await runSsh( + args.options, + args.runner, + `mv -f ${quoteShellArg(tmpRemotePath)} ${quoteShellArg(args.remotePath)}`, + ); + args.onProgress?.({ + name: progressName, + phase: "uploading", + bytesDone: payload.byteLength, + bytesTotal: payload.byteLength, + }); + args.onProgress?.({ name: progressName, phase: "verifying" }); + const remoteSha = await remoteSha256(args.options, args.runner, args.remotePath); + if (remoteSha === args.sha256) return; + lastError = createSshError( + "server.protocol-error", + new Error("remote artifact sha256 mismatch"), + ); + } catch (error) { + lastError = error; + } + // Best-effort: drop a temp file orphaned by a failed attempt so retries + // (and future bootstraps) never accumulate `.tmp.` litter alongside + // the installed binary. + await runSsh(args.options, args.runner, `rm -f ${quoteShellArg(tmpRemotePath)}`).catch( + () => undefined, ); - args.onProgress?.({ - name: progressName, - phase: "uploading", - bytesDone: payload.byteLength, - bytesTotal: payload.byteLength, - }); - args.onProgress?.({ name: progressName, phase: "verifying" }); - const remoteSha = await remoteSha256(args.options, args.runner, args.remotePath); - if (remoteSha === args.sha256) return; } - throw createSshError("server.protocol-error", new Error("remote artifact sha256 mismatch")); + throw ( + lastError ?? + createSshError("server.protocol-error", new Error("remote artifact sha256 mismatch")) + ); } /** sftp put with cat-pipe fallback when sftp is unavailable on the remote. */ diff --git a/tests/unit/main/agent/ssh-bootstrap.test.ts b/tests/unit/main/agent/ssh-bootstrap.test.ts index e1d1891b..0deb15a2 100644 --- a/tests/unit/main/agent/ssh-bootstrap.test.ts +++ b/tests/unit/main/agent/ssh-bootstrap.test.ts @@ -1,9 +1,9 @@ +import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"; import { createHash } from "node:crypto"; +import { EventEmitter } from "node:events"; import fs from "node:fs"; import os from "node:os"; import path from "node:path"; -import { EventEmitter } from "node:events"; -import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"; import { buildRemoteAgentCommand, ensureRemoteAgent, @@ -259,6 +259,63 @@ describe("ssh-bootstrap", () => { expect(calls.some((call) => call.includes("index=0; for item in $(ls -1dt"))).toBe(true); }); + it("retries the full upload when the atomic rename fails on the first attempt", async () => { + // Regression: `sftp` exits 0 even when a `put` silently fails, so the temp + // file can be missing when `mv -f tmp final` runs — the rename then throws + // "no such file" and aborts the bootstrap. The fix wraps each attempt so a + // failed rename retries the full upload instead of propagating. Here the + // first `mv` throws and the second succeeds; the bootstrap must recover. + const { distDir, sha256 } = writeDist(); + let mvCount = 0; + let rmCount = 0; + const calls: string[] = []; + const runner = mock(async (command: string, args: string[], input?: Buffer | string) => { + const remoteCommand = args.at(-1) ?? ""; + calls.push(`${command} ${remoteCommand}`); + if (command === "ssh" && remoteCommand === "uname -ms") return { stdout: "Linux x86_64\n" }; + if (command === "ssh" && remoteCommand.startsWith("printf")) { + return { stdout: "/home/user\n" }; + } + if (command === "ssh" && remoteCommand.startsWith("cat ~/.nexus-code/manifest.json")) { + return { stdout: "" }; + } + if (command === "ssh" && remoteCommand.startsWith("mkdir -p")) return { stdout: "" }; + if (command === "sftp") return { stdout: "" }; + if (command === "ssh" && remoteCommand.startsWith("mv -f")) { + mvCount += 1; + // First attempt simulates a temp file that never landed (silent sftp + // failure): the remote rename reports "no such file" and rejects. + if (mvCount === 1) { + throw new Error("zsh:1: no such file or directory: agent.tmp.deadbeef"); + } + return { stdout: "" }; + } + if (command === "ssh" && remoteCommand.startsWith("rm -f")) { + rmCount += 1; + return { stdout: "" }; + } + if (command === "ssh" && remoteCommand.startsWith("if command -v sha256sum")) { + return { stdout: `${sha256}\n` }; + } + if (command === "ssh" && remoteCommand.includes("cat > ~/.nexus-code/manifest.json")) { + expect(typeof input).toBe("string"); + return { stdout: "" }; + } + if (command === "ssh" && remoteCommand.startsWith("index=0")) return { stdout: "" }; + throw new Error(`unexpected command: ${command} ${args.join(" ")}`); + }) as SshBootstrapRunner; + + const result = await ensureRemoteAgent( + { host: "dev.example.com", remotePath: "/repo" }, + { distDir, runner, now: () => new Date("2026-05-12T00:00:00.000Z") }, + ); + + expect(result.uploaded).toBe(true); + expect(mvCount).toBe(2); // first rename failed, retry succeeded + expect(rmCount).toBe(1); // orphaned temp from the failed attempt was cleaned + expect(calls.filter((call) => call.startsWith("sftp"))).toHaveLength(2); + }); + it("uploads remote node and LSP artifacts lazily and writes a remote launcher", async () => { const { distDir, nodeSha256, lspSha256 } = writeDist(); const progress: unknown[] = []; From 86779f8b0c70c00a1eccd07db2186ffaaf51b9f1 Mon Sep 17 00:00:00 2001 From: moreih29 Date: Tue, 2 Jun 2026 13:16:13 +0900 Subject: [PATCH 07/11] feat(workspace): show SSH bootstrap progress in the add-workspace flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The add-workspace dialog only showed a static '연결 중' spinner while the agent binary uploaded/verified — the heavy work happens inside openBrowseSession, which (unlike registered workspaces' startSshProvider) never wired an onProgress callback. And it could not: progress events are keyed by workspaceId, but during 'add' no workspaceId (or sessionId) exists until the call returns. Key progress by a client-minted correlation id instead: - openBrowseSession accepts an optional progressId; the handler forwards an onProgress that broadcasts ssh.browseProgress keyed by that id. - The renderer mints the id before calling, subscribes via subscribeSshBrowseProgress, and renders the progress bar while connecting. Both add entry points are covered (new-connection form + saved-connection list). Extracted BootstrapProgressBar into a shared presentational component so the panel and the dialog render identical progress; the panel keeps its absolute-positioned placement via a className prop. Co-Authored-By: Claude Opus 4.8 --- src/main/features/ssh/ipc.ts | 50 +++++++++-- src/main/index.ts | 6 +- .../ssh-connection-list-view.tsx | 40 ++++++++- .../add-workspace/ssh-new-connection-view.tsx | 41 +++++++-- .../add-workspace/use-browse-progress.ts | 55 ++++++++++++ .../workspace/bootstrap-progress-bar.tsx | 89 +++++++++++++++++++ src/renderer/components/workspace/panel.tsx | 82 ++--------------- src/renderer/services/workspace/index.ts | 42 +++++---- src/renderer/services/workspace/ssh-browse.ts | 22 ++++- src/shared/ipc/contract.ts | 11 ++- src/shared/types/workspace.ts | 19 ++++ 11 files changed, 342 insertions(+), 115 deletions(-) create mode 100644 src/renderer/components/workspace/add-workspace/use-browse-progress.ts create mode 100644 src/renderer/components/workspace/bootstrap-progress-bar.tsx diff --git a/src/main/features/ssh/ipc.ts b/src/main/features/ssh/ipc.ts index bb9ebbcf..7f5ab258 100644 --- a/src/main/features/ssh/ipc.ts +++ b/src/main/features/ssh/ipc.ts @@ -15,6 +15,7 @@ import type { SshControlMaster } from "../../infra/agent/ssh/master"; import { type EnsureRemoteAgentOptions, ensureRemoteAgent, + type LspBootstrapProgressEvent, } from "../../infra/agent/ssh/ssh-bootstrap/index"; import { register, validateArgs } from "../../infra/ipc-router"; import { BROWSE_MAX_ENTRIES, type SshBrowseSessionRegistry } from "./browse-session-registry"; @@ -64,6 +65,10 @@ export function registerSshChannel(configPath = path.join(os.homedir(), ".ssh", export function registerSshBrowseHandlers( registry: SshBrowseSessionRegistry, promptHandler: SshAuthPromptHandler, + // Optional so existing callers/tests that don't care about progress keep + // working; when supplied, openBrowseSession streams bootstrap progress to + // the renderer via the `ssh.browseProgress` event keyed by progressId. + broadcast?: BrowseProgressBroadcast, ): () => void { register("ssh", { call: { @@ -72,7 +77,7 @@ export function registerSshBrowseHandlers( // cancellation arrives at the renderer as ipcErr("cancelled") — the // router passes the envelope silently without logging, and the // renderer uses ipcCallResult to branch on result.kind. - openBrowseSession: openBrowseSessionResultHandler(registry, promptHandler), + openBrowseSession: openBrowseSessionResultHandler(registry, promptHandler, broadcast), browseSession: browseSessionHandler(registry), closeBrowseSession: closeBrowseSessionHandler(registry), }, @@ -81,6 +86,12 @@ export function registerSshBrowseHandlers( return () => registry.dispose(); } +/** + * Broadcast fn shape used to push browse-session bootstrap progress to the + * renderer — matches the main-process forwardBroadcast signature. + */ +export type BrowseProgressBroadcast = (channelName: string, event: string, args: unknown) => void; + // --------------------------------------------------------------------------- // listConfigHosts // --------------------------------------------------------------------------- @@ -138,13 +149,15 @@ function isMissingOrPermissionError(error: unknown): boolean { export function openBrowseSessionHandler( registry: SshBrowseSessionRegistry, promptHandler: SshAuthPromptHandler, - bootstrap: (options: EnsureRemoteAgentOptions) => ReturnType = ( - options, - ) => + broadcast?: BrowseProgressBroadcast, + bootstrap: ( + options: EnsureRemoteAgentOptions, + onProgress?: (event: LspBootstrapProgressEvent) => void, + ) => ReturnType = (options, onProgress) => // The promptHandler MUST be forwarded to ensureRemoteAgent — without it // createBootstrapContext skips interactive auth and password-only hosts // fail before the agent channel is ever opened. - ensureRemoteAgent(options, { promptHandler }), + ensureRemoteAgent(options, { promptHandler, onProgress }), ): (args: unknown) => Promise<{ sessionId: string; initialPath: string; user: string }> { return async ( args: unknown, @@ -155,6 +168,23 @@ export function openBrowseSessionHandler( // name so the connection (and the saved profile) has a concrete user. const user = resolveSshUser(params.user); + // Stream agent-bootstrap progress to the renderer for this connect attempt. + // Keyed by the caller's progressId so the "add workspace" dialog can show + // the same upload/verify progress that registered workspaces already get, + // even though no workspaceId exists yet. No progressId or broadcast → no-op. + const onProgress = + params.progressId && broadcast + ? (event: LspBootstrapProgressEvent): void => { + broadcast("ssh", "browseProgress", { + progressId: params.progressId, + name: event.name, + phase: event.phase, + bytesDone: event.bytesDone, + bytesTotal: event.bytesTotal, + }); + } + : undefined; + const bootstrapOptions: EnsureRemoteAgentOptions = { host: params.host, user, @@ -181,7 +211,7 @@ export function openBrowseSessionHandler( let bootstrapResult: Awaited> | null = null; let channel: ReturnType | null = null; try { - bootstrapResult = await bootstrap(bootstrapOptions); + bootstrapResult = await bootstrap(bootstrapOptions, onProgress); if (timedOut) { bootstrapResult.dispose?.(); @@ -256,9 +286,13 @@ export function openBrowseSessionHandler( export function openBrowseSessionResultHandler( registry: SshBrowseSessionRegistry, promptHandler: SshAuthPromptHandler, - bootstrap?: (options: EnsureRemoteAgentOptions) => ReturnType, + broadcast?: BrowseProgressBroadcast, + bootstrap?: ( + options: EnsureRemoteAgentOptions, + onProgress?: (event: LspBootstrapProgressEvent) => void, + ) => ReturnType, ): (args: unknown) => Promise | ReturnType> { - const inner = openBrowseSessionHandler(registry, promptHandler, bootstrap); + const inner = openBrowseSessionHandler(registry, promptHandler, broadcast, bootstrap); return async (args: unknown) => { try { const result = await inner(args); diff --git a/src/main/index.ts b/src/main/index.ts index e18de8ac..edf6b7a1 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -219,7 +219,11 @@ registerAppStateChannel(stateService, { registerFsChannel(workspaceManager, agentFsWatcher, workspaceStorage); registerPanelChannel(workspaceStorage); registerSshChannel(); -registerSshBrowseHandlers(sshBrowseRegistry, (prompt) => sshAuthPromptHub.request(prompt)); +registerSshBrowseHandlers( + sshBrowseRegistry, + (prompt) => sshAuthPromptHub.request(prompt), + forwardBroadcast, +); registerSshAuthPromptIpcChannels(sshAuthPromptHub); registerSystemChannel({ openNewWindow: () => createMainWindow(stateService.getState()) }); registerClipboardChannel(); diff --git a/src/renderer/components/workspace/add-workspace/ssh-connection-list-view.tsx b/src/renderer/components/workspace/add-workspace/ssh-connection-list-view.tsx index 71b7e938..3dd6001e 100644 --- a/src/renderer/components/workspace/add-workspace/ssh-connection-list-view.tsx +++ b/src/renderer/components/workspace/add-workspace/ssh-connection-list-view.tsx @@ -11,9 +11,11 @@ import { } from "../../../services/workspace"; import { EmptyState } from "../../ui/empty-state"; import { Skeleton, SkeletonLine } from "../../ui/skeleton"; +import { BootstrapProgressBar } from "../bootstrap-progress-bar"; import { ErrorNotice } from "./error-notice"; import { formatProfileSubtitle } from "./ssh-helpers"; import type { SshBrowseSession, SshConnectionListViewProps } from "./types"; +import { useBrowseProgress } from "./use-browse-progress"; // --------------------------------------------------------------------------- // SshConnectionListView — T4 implementation @@ -31,6 +33,13 @@ export function SshConnectionListView({ const [connectingId, setConnectingId] = useState(null); const [errorId, setErrorId] = useState(null); const [errorHuman, setErrorHuman] = useState(null); + // Agent-bootstrap progress for the in-flight connect (keyed by a client-minted + // progressId, since no sessionId/workspaceId exists yet). + const { + progress: browseProgress, + begin: beginProgress, + clear: clearProgress, + } = useBrowseProgress(); const loadProfiles = useCallback((): (() => void) => { let cancelled = false; @@ -76,6 +85,7 @@ export function SshConnectionListView({ port: profile.port, identityFile: profile.identityFile ?? undefined, authMode: profile.authMode as "interactive" | "key-only", + progressId: beginProgress(), }); if (!result.ok) { // User cancelled the SSH auth prompt — silent stop, no error banner. @@ -107,6 +117,7 @@ export function SshConnectionListView({ onConnected(session); } finally { setConnectingId(null); + clearProgress(); } } @@ -151,6 +162,18 @@ export function SshConnectionListView({ return (
+ {/* Bootstrap progress — shown once the agent upload/verify begins for the + profile being connected, instead of only the per-row spinner. */} + {busy && browseProgress ? ( + + ) : null} + {/* Empty state — shown above New Connection row when no profiles */} {!hasContent ? ( 0 ? (
- {t("workspace.favorites")} + + {t("workspace.favorites")} +
    {favorites.map((profile) => ( @@ -187,9 +212,14 @@ export function SshConnectionListView({ {/* Recent section */} {recents.length > 0 ? ( -
    0 ? "mt-3" : undefined}> +
    0 ? "mt-3" : undefined} + >
    - {t("workspace.recent")} + + {t("workspace.recent")} +
      {recents.map((profile) => ( @@ -308,7 +338,9 @@ function ConnectionProfileRow({ From 537191e6f05f493dfa6447c5139ca934dfa90d25 Mon Sep 17 00:00:00 2001 From: moreih29 Date: Tue, 2 Jun 2026 13:47:44 +0900 Subject: [PATCH 09/11] fix(ssh): survive ETXTBSY on agent exec and sweep stale temp uploads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause of intermittent connect failures to some hosts: when a bootstrap SSH connection goes half-open (client gives up via keepalive, server has no ClientAlive so it never reaps the session), the remote sftp-server lingers holding the just-uploaded agent binary's inode open for WRITE. The next connect then either re-uploads (leaving '.tmp.' litter the dead connection's rm could not clean) or execs a binary another writer still holds — failing with exit 126 'Text file busy'. Two client-side mitigations: - buildRemoteAgentCommand wraps the spawn in 'shopt -s execfail' + a bounded exec-retry loop (~5s). A failed exec in non-interactive bash otherwise terminates the shell immediately, so execfail is required for the loop to run. exec replaces the shell on success, so the healthy path is unchanged; only a transient busy state retries. Clears ETXTBSY once the writer's fd closes within the window. - uploadAndVerifyFile sweeps stale '.tmp.*' files (find -delete, which no-ops cleanly on an empty match under any login shell) before installing, so interrupted attempts stop accumulating temp litter. Durable fix for the orphaned-writer source is server-side (ClientAliveInterval on the remote sshd); these make the client tolerate it. Co-Authored-By: Claude Opus 4.8 --- .../infra/agent/ssh/ssh-bootstrap/index.ts | 97 +++++++++---------- .../agent/ssh/ssh-bootstrap/transport.ts | 11 +++ 2 files changed, 57 insertions(+), 51 deletions(-) diff --git a/src/main/infra/agent/ssh/ssh-bootstrap/index.ts b/src/main/infra/agent/ssh/ssh-bootstrap/index.ts index 9d56e452..07879188 100644 --- a/src/main/infra/agent/ssh/ssh-bootstrap/index.ts +++ b/src/main/infra/agent/ssh/ssh-bootstrap/index.ts @@ -22,18 +22,14 @@ import { type NodeRuntimeManifestEntry, type WrapperManifestEntry, } from "../../../../../shared/agent/manifest"; +import { getAgentDistDir } from "../../getAgentBinDir"; +import { createSshError } from "../../pipe"; +import { BASHRC_CONTENT, ZSHENV_CONTENT, ZSHRC_CONTENT } from "../../runtimeDirs"; import { - BASHRC_CONTENT, - ZSHENV_CONTENT, - ZSHRC_CONTENT, -} from "../../runtimeDirs"; -import { - authenticateSshControlMaster, type AuthenticateSshControlMasterDependencies, + authenticateSshControlMaster, type SshAuthPromptHandler, } from "../auth-pty"; -import { createSshError } from "../../pipe"; -import { getAgentDistDir } from "../../getAgentBinDir"; import { absoluteRemotePath, agentArtifactKey, @@ -58,36 +54,24 @@ import { uploadAndVerifyFile, } from "./transport"; import { - LOCAL_AGENT_DIST_DIR, - LSP_BOOTSTRAP_PROGRESS_EVENT, - REMOTE_AGENT_MANIFEST, - REMOTE_AGENT_PROTOCOL_MAJOR, - REMOTE_AGENT_ROOT, - REMOTE_AGENT_VERSION, type EnsureRemoteAgentOptions, type EnsureRemoteAgentResult, type EnsureRemoteLspServerOptions, type EnsureRemoteLspServerResult, + LOCAL_AGENT_DIST_DIR, + LSP_BOOTSTRAP_PROGRESS_EVENT, type LspBootstrapProgressEvent, type LspBootstrapProgressPhase, + REMOTE_AGENT_MANIFEST, + REMOTE_AGENT_PROTOCOL_MAJOR, + REMOTE_AGENT_ROOT, + REMOTE_AGENT_VERSION, type RemoteAgentPlatform, type SshBootstrapDependencies, type SshBootstrapRunner, type SshBootstrapRunnerResult, } from "./types"; -// Re-export the stable public surface so existing callers keep their import -// paths (`"./ssh-bootstrap"`) unchanged. -export { - LOCAL_AGENT_DIST_DIR, - LSP_BOOTSTRAP_PROGRESS_EVENT, - REMOTE_AGENT_MANIFEST, - REMOTE_AGENT_PROTOCOL_MAJOR, - REMOTE_AGENT_ROOT, - REMOTE_AGENT_VERSION, - parseUname, - remoteAgentBinaryPath, -}; export type { EnsureRemoteAgentOptions, EnsureRemoteAgentResult, @@ -100,6 +84,18 @@ export type { SshBootstrapRunner, SshBootstrapRunnerResult, }; +// Re-export the stable public surface so existing callers keep their import +// paths (`"./ssh-bootstrap"`) unchanged. +export { + LOCAL_AGENT_DIST_DIR, + LSP_BOOTSTRAP_PROGRESS_EVENT, + parseUname, + REMOTE_AGENT_MANIFEST, + REMOTE_AGENT_PROTOCOL_MAJOR, + REMOTE_AGENT_ROOT, + REMOTE_AGENT_VERSION, + remoteAgentBinaryPath, +}; interface ArtifactInstallRequest { readonly key: string; @@ -327,12 +323,23 @@ export function buildRemoteAgentCommand(binaryPath: string, remotePath: string): if (!remotePath.startsWith("/")) { throw createSshError( "server.protocol-error", - new Error( - `remotePath must be an absolute path, got: ${JSON.stringify(remotePath)}`, - ), + new Error(`remotePath must be an absolute path, got: ${JSON.stringify(remotePath)}`), ); } - const script = `exec ${quoteShellArg(binaryPath)} ${quoteShellArg(remotePath)}`; + // Retry the exec on ETXTBSY ("Text file busy"): immediately after a fresh + // install (sftp/cat write + `mv` into place) the kernel can briefly refuse to + // execute the binary while a writer fd from the upload — or a lingering writer + // from a previous, half-dead bootstrap connection — is still open. `exec` + // replaces the shell on success (so the loop body never runs twice in the + // healthy case); it only returns on failure, where we retry a handful of + // times over ~5s before giving up with the conventional 126 "cannot execute". + // `shopt -s execfail` is REQUIRED: without it a failed `exec` in a + // non-interactive bash terminates the shell immediately (so the loop would + // never retry). With it, a failed exec returns control and the loop runs. + const exec = `exec ${quoteShellArg(binaryPath)} ${quoteShellArg(remotePath)}`; + const script = + `shopt -s execfail; n=0; while :; do ${exec}; n=$((n+1)); ` + + `if [ "$n" -ge 25 ]; then exit 126; fi; sleep 0.2; done`; return `bash -lc ${singleQuoteShellArg(script)}`; } @@ -431,17 +438,13 @@ async function ensureRemoteArtifact( const existing = artifactLocks.get(lockKey); if (existing) return existing; - const pending = ensureRemoteArtifactUnlocked( - options, - runner, - now, - request, - onProgress, - ).finally(() => { - if (artifactLocks.get(lockKey) === pending) { - artifactLocks.delete(lockKey); - } - }); + const pending = ensureRemoteArtifactUnlocked(options, runner, now, request, onProgress).finally( + () => { + if (artifactLocks.get(lockKey) === pending) { + artifactLocks.delete(lockKey); + } + }, + ); artifactLocks.set(lockKey, pending); return pending; } @@ -614,10 +617,7 @@ async function ensureRemoteShimFiles( remoteHome: string, workspaceId: string, ): Promise { - const remoteShimDir = absoluteRemotePath( - remoteHome, - `${REMOTE_AGENT_ROOT}/shim/${workspaceId}`, - ); + const remoteShimDir = absoluteRemotePath(remoteHome, `${REMOTE_AGENT_ROOT}/shim/${workspaceId}`); // mkdir -p first; subsequent `cat > ` will inherit the 0o700-ish // umask of the remote user. We do not chmod the files explicitly — // they are plain rc files, not executables, and the default user-owned @@ -632,12 +632,7 @@ async function ensureRemoteShimFiles( for (const file of files) { const remotePath = `${remoteShimDir}/${file.name}`; - await runSsh( - options, - runner, - `cat > ${quoteShellArg(remotePath)}`, - file.content, - ); + await runSsh(options, runner, `cat > ${quoteShellArg(remotePath)}`, file.content); } return remoteShimDir; diff --git a/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts b/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts index 9035ee94..9f59e41d 100644 --- a/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts +++ b/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts @@ -113,6 +113,17 @@ export async function uploadAndVerifyFile(args: { args.runner, `mkdir -p ${quoteShellArg(remoteDir)} && chmod 755 ${args.remoteAgentRoot} ${quoteShellArg(remoteDir)}`, ); + // Best-effort sweep of `.tmp.` files left by earlier interrupted + // installs (a connection dropped after upload-to-temp but before the rename, + // or before our per-attempt rm could run over the now-dead connection). Use + // `find -delete` rather than a shell glob so an empty match is a clean no-op + // under any login shell (zsh aborts on an unmatched glob; find does not). The + // pattern is single-quoted so the login shell passes it to find verbatim. + await runSsh( + args.options, + args.runner, + `find ${quoteShellArg(remoteDir)} -maxdepth 1 -name ${singleQuoteShellArg(`${path.posix.basename(args.remotePath)}.tmp.*`)} -delete`, + ).catch(() => undefined); const payload = await fs.readFile(args.localPath); if (sha256(payload) !== args.sha256) { throw createSshError("server.protocol-error", new Error("local artifact sha256 mismatch")); From 0fabc63a0dba6f7fdd353d327115110263f4a33c Mon Sep 17 00:00:00 2001 From: moreih29 Date: Tue, 2 Jun 2026 13:55:18 +0900 Subject: [PATCH 10/11] fix(ssh): only sweep stale (>5min) temp uploads to protect concurrent installs On a shared host, multiple workspaces/users can bootstrap the same agent binary path concurrently, each writing its own .tmp.. The unconditional sweep could delete a sibling's in-flight upload. Restrict the sweep to temp files older than 5 minutes (-mmin +5): a genuine orphan is minutes old, an in-flight upload is seconds old, so concurrent installs are never clobbered. Co-Authored-By: Claude Opus 4.8 --- .../infra/agent/ssh/ssh-bootstrap/transport.ts | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts b/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts index 9f59e41d..ad492899 100644 --- a/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts +++ b/src/main/infra/agent/ssh/ssh-bootstrap/transport.ts @@ -113,16 +113,23 @@ export async function uploadAndVerifyFile(args: { args.runner, `mkdir -p ${quoteShellArg(remoteDir)} && chmod 755 ${args.remoteAgentRoot} ${quoteShellArg(remoteDir)}`, ); - // Best-effort sweep of `.tmp.` files left by earlier interrupted + // Best-effort sweep of *stale* `.tmp.` files left by earlier interrupted // installs (a connection dropped after upload-to-temp but before the rename, - // or before our per-attempt rm could run over the now-dead connection). Use - // `find -delete` rather than a shell glob so an empty match is a clean no-op - // under any login shell (zsh aborts on an unmatched glob; find does not). The + // or before our per-attempt rm could run over the now-dead connection). + // + // `-mmin +5` is critical for multi-workspace / multi-user hosts: several + // bootstraps can run against the same shared binary path concurrently (each + // writes its own `.tmp.`), so we must NEVER delete a temp file that a + // concurrent upload is still writing. A genuine orphan is minutes old; an + // in-flight upload is seconds old, so the age filter leaves it untouched. + // + // `find -delete` (not a shell glob) makes an empty match a clean no-op under + // any login shell — zsh aborts on an unmatched glob, find does not. The // pattern is single-quoted so the login shell passes it to find verbatim. await runSsh( args.options, args.runner, - `find ${quoteShellArg(remoteDir)} -maxdepth 1 -name ${singleQuoteShellArg(`${path.posix.basename(args.remotePath)}.tmp.*`)} -delete`, + `find ${quoteShellArg(remoteDir)} -maxdepth 1 -name ${singleQuoteShellArg(`${path.posix.basename(args.remotePath)}.tmp.*`)} -mmin +5 -delete`, ).catch(() => undefined); const payload = await fs.readFile(args.localPath); if (sha256(payload) !== args.sha256) { From 7fdb64553da3f8e56f0ec8a1f44424620a89b735 Mon Sep 17 00:00:00 2001 From: moreih29 Date: Tue, 2 Jun 2026 14:00:06 +0900 Subject: [PATCH 11/11] chore: bump version to 0.5.2 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 320f2702..ef304e8d 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "nexus-code", "productName": "NexusCode", - "version": "0.5.1", + "version": "0.5.2", "description": "Multi-workspace VSCode-style editor for macOS. Monaco editor + terminal in one window.", "license": "MIT", "private": true,