From 55673df00cd30486b73a3e005fa44f6138bb8038 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 28 May 2026 00:16:01 +0800 Subject: [PATCH 1/6] feat(protocol): add flashduty_exec operation for capability-bound auth --- protocol/messages.go | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/protocol/messages.go b/protocol/messages.go index 69c97d7..5aad6d1 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -119,6 +119,12 @@ const ( TaskOpStageKnowledgeFiles TaskOperation = "stage_knowledge_files" TaskOpDeleteKnowledgeFiles TaskOperation = "delete_knowledge_files" TaskOpReconcileKnowledgeManifest TaskOperation = "reconcile_knowledge_manifest" + + // Phase 2: capability-bound Flashduty auth. flashduty_exec carries + // FLASHDUTY_APP_KEY only on this operation; generic TaskOpBash no longer + // sees it. The runner fork-execs `flashduty ` with env from + // the payload. See fc-safari docs/superpowers/plans/2026-05-27-flashduty-cli-phase2-auth.md. + TaskOpFlashdutyExec TaskOperation = "flashduty_exec" ) // TaskRequestPayload is the payload for task request messages. @@ -191,6 +197,18 @@ type BashArgs struct { Env map[string]string `json:"env,omitempty"` } +// FlashdutyExecArgs is the payload for a flashduty CLI invocation. +// Auth secrets (FLASHDUTY_APP_KEY, FLASHDUTY_API_BASE) are placed in Env and +// MUST NOT be logged. Unlike BashArgs.Command, Verb and Args are exec'd +// directly without a shell — the runner does not interpret them. +type FlashdutyExecArgs struct { + Verb string `json:"verb"` // e.g. "incident list" — passed as space-separated verb tokens + Args []string `json:"args,omitempty"` // e.g. ["--json", "--limit", "10"] + Workdir string `json:"workdir,omitempty"` + Timeout int `json:"timeout,omitempty"` // seconds; default 120 if <=0 + Env map[string]string `json:"env,omitempty"` // per-call secrets (never log) +} + // TaskOutputPayload is the payload for streaming task output. type TaskOutputPayload struct { TaskID string `json:"task_id"` @@ -280,6 +298,11 @@ type BashResult struct { TotalSize int64 `json:"total_size,omitempty"` } +// FlashdutyExecResult mirrors BashResult — output capture, truncation, and +// spill semantics are identical. Type alias keeps the protocol minimal: +// no new unmarshal code needed; safari's BashResult decoder works as-is. +type FlashdutyExecResult = BashResult + // WebFetchArgs are the arguments for webfetch operation. type WebFetchArgs struct { URL string `json:"url"` From b2e5c064b5e5217e48976758339acc2dbe73d5e9 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 28 May 2026 00:18:40 +0800 Subject: [PATCH 2/6] feat(runner): implement flashduty_exec handler with per-call env injection Adds executeFlashdutyExec that fork-execs the flashduty CLI with per-call auth env, isolated from the generic bash code path. Routes TaskOpFlashdutyExec through the WebSocket handler. Phase 2 of fc-safari CLI adoption. --- environment/environment.go | 75 +++++++++++++++++++++++++++++++++ environment/environment_test.go | 39 +++++++++++++++++ ws/handler.go | 10 +++++ 3 files changed, 124 insertions(+) diff --git a/environment/environment.go b/environment/environment.go index 1e50270..182e01f 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -651,7 +651,19 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s } cmd.Env = merged } + return runCapturedCommand(ctx, cmd) +} +// runCapturedCommand runs a prepared exec.Cmd and captures stdout/stderr with +// a 10MB per-stream cap, then assembles a BashResult with truncation and exit +// code populated. Shared between bash (`bash -c ""`) and flashduty_exec +// (`flashduty `) so the per-stream cap, deadline handling, and +// legacy stdout mirror behavior stay identical across both paths. +// +// ctx MUST be the same WithTimeout-wrapped context exec.CommandContext was +// built from — runCapturedCommand inspects ctx.Err() to distinguish a +// deadline-induced kill from a genuine non-zero exit. +func runCapturedCommand(ctx context.Context, cmd *exec.Cmd) (*protocol.BashResult, error) { // 10MB per-stream cap prevents OOM from runaway commands while leaving // plenty of headroom for normal LLM-context output. const maxOutputSize = 10 * 1024 * 1024 @@ -700,6 +712,69 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s return res, nil } +// FlashdutyExec is the public entrypoint for the flashduty_exec operation. +// Thin wrapper around executeFlashdutyExec — kept so the ws handler dispatches +// through Environment's public surface, matching Bash()'s shape. Permission +// checking is intentionally skipped here: the binary itself is fixed +// (FLASHDUTY_BINARY or "flashduty"), so there is no arbitrary-command escape +// hatch to gate. +func (e *Environment) FlashdutyExec(ctx context.Context, args *protocol.FlashdutyExecArgs) (*protocol.BashResult, error) { + workdir, err := e.resolveWorkdir(args.Workdir) + if err != nil { + return nil, err + } + timeout := e.resolveTimeout(args.Timeout) + return e.executeFlashdutyExec(ctx, args.Verb, args.Args, workdir, timeout, args.Env) +} + +// executeFlashdutyExec runs `flashduty ` with the given env +// merged on top of the runner process's own environment. The subprocess's +// envp[] dies with the process; nothing here persists across calls. This is +// the only runner path that should receive FLASHDUTY_APP_KEY. +// +// Verb may be multi-word (e.g. "incident list") — we split on whitespace +// before passing to exec, so callers don't need to know that internally the +// CLI is a multi-arg dispatch. Unlike executeBashCommand there is NO shell — +// argv reaches the binary verbatim, so quoting/expansion can't leak secrets. +// +// Output capture (10MB cap, truncation markers) and .outputs/ spill semantics +// are identical to the bash path: runCapturedCommand for capture + +// processLargeOutput for spill. Callers receive a BashResult (alias: +// FlashdutyExecResult) so safari's existing decoder works as-is. +func (e *Environment) executeFlashdutyExec( + ctx context.Context, + verb string, + args []string, + workdir string, + timeout time.Duration, + extraEnv map[string]string, +) (*protocol.BashResult, error) { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + binary := os.Getenv("FLASHDUTY_BINARY") + if binary == "" { + binary = "flashduty" + } + + cmdArgs := append(strings.Fields(verb), args...) + cmd := exec.CommandContext(ctx, binary, cmdArgs...) //nolint:gosec // G204: argv comes from audited tool path, never a shell + cmd.Dir = workdir + if len(extraEnv) > 0 { + merged := os.Environ() + for k, v := range extraEnv { + merged = append(merged, k+"="+v) + } + cmd.Env = merged + } + + result, err := runCapturedCommand(ctx, cmd) + if err != nil { + return result, err + } + return e.processLargeOutput(ctx, result, "flashduty") +} + // ErrOutputCapped is returned by LimitedWriter once its byte cap has been // reached. Surfaces the contract violation that would otherwise be hidden by // the writer pretending it accepted everything (`return len(p), nil`). diff --git a/environment/environment_test.go b/environment/environment_test.go index 2307f83..ac57a0d 100644 --- a/environment/environment_test.go +++ b/environment/environment_test.go @@ -11,6 +11,7 @@ import ( "path/filepath" "strings" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -404,6 +405,44 @@ func TestSyncSkill_ProbeHit(t *testing.T) { assert.Equal(t, resolveDir(t, dir), res.Path) } +// TestExecuteFlashdutyExec_EnvIsolatedToSubprocess confirms that env values +// the caller hands to FlashdutyExec live on cmd.Env only — they must not +// leak back into the runner process. Without this guarantee, FLASHDUTY_APP_KEY +// from one call would persist for the next call (or even the bash op). +func TestExecuteFlashdutyExec_EnvIsolatedToSubprocess(t *testing.T) { + e := newTestEnvironment(t) + ctx := context.Background() + res, err := e.executeFlashdutyExec(ctx, "version", nil, "", 5*time.Second, map[string]string{ + "FLASHDUTY_APP_KEY": "secret-fake", + }) + // Acceptable: the flashduty binary may not exist in CI → exit 127 / ENOENT. + // What matters is the call shape and env isolation. + _ = res + _ = err + if got := os.Getenv("FLASHDUTY_APP_KEY"); got == "secret-fake" { + t.Fatal("env leaked from subprocess back to parent process — exec.Cmd.Env was not used correctly") + } +} + +// TestExecuteFlashdutyExec_VerbSplitsOnWhitespace uses the FLASHDUTY_BINARY +// env hook to substitute /bin/echo, so we can capture argv as stdout. This +// is a behavior test (not a mock) that pins the verb-tokens-then-args order. +func TestExecuteFlashdutyExec_VerbSplitsOnWhitespace(t *testing.T) { + t.Setenv("FLASHDUTY_BINARY", "/bin/echo") + e := newTestEnvironment(t) + res, err := e.executeFlashdutyExec(context.Background(), "incident list", []string{"--json", "--limit", "3"}, "", 5*time.Second, nil) + if err != nil { + t.Fatalf("exec failed: %v", err) + } + want := "incident list --json --limit 3\n" + if res.Stdout != want { + t.Errorf("stdout = %q, want %q", res.Stdout, want) + } + if res.ExitCode != 0 { + t.Errorf("exit code = %d, want 0", res.ExitCode) + } +} + func TestSyncSkill_InstallOverwrites(t *testing.T) { ws := newTestEnvironment(t) dir := filepath.Join(ws.Root(), "skills", "demo") diff --git a/ws/handler.go b/ws/handler.go index 7d8a06f..2c82098 100644 --- a/ws/handler.go +++ b/ws/handler.go @@ -215,6 +215,16 @@ func (h *Handler) executeTask(ctx context.Context, req *protocol.TaskRequestPayl logger.Info("executing command", "command", args.Command, "workdir", args.Workdir) return h.ws.Bash(ctx, args) + case protocol.TaskOpFlashdutyExec: + args, err := parseArgs[protocol.FlashdutyExecArgs](req.Args) + if err != nil { + return nil, fmt.Errorf("invalid flashduty_exec args: %w", err) + } + // Log verb + args only — args.Env contains FLASHDUTY_APP_KEY and MUST + // stay out of logs. The CLI argv itself never carries the secret. + logger.Info("executing flashduty cli", "verb", args.Verb, "args", args.Args, "workdir", args.Workdir) + return h.ws.FlashdutyExec(ctx, args) + case protocol.TaskOpWebFetch: args, err := parseArgs[protocol.WebFetchArgs](req.Args) if err != nil { From 1e0a62b04649d564cf8cee7c832b6ca416ea5022 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 28 May 2026 01:17:06 +0800 Subject: [PATCH 3/6] fix(runner): scrub FLASHDUTY_* from generic bash environment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 contract is that only the `flashduty_exec` path carries per-user Flashduty credentials (FLASHDUTY_APP_KEY, FLASHDUTY_API_BASE, ...). The generic `bash` tool must not see them. The previous executeBashCommand inherited the runner's full os.Environ(), which leaked any FLASHDUTY_* keys present in the runner process — easy to hit on a dev workstation that exports them for safari, and impossible to audit in cloud sandboxes when sandbox-manager forwards arbitrary entries. Fix: scrubFlashdutySecrets() drops every FLASHDUTY_* entry from the inherited env before bash sees it. Caller-supplied extraEnv layers on top, so explicit hand-offs still work (test fixtures + non-secret overrides unchanged). The flashduty_exec path is untouched — it relies on safari's per-call extraEnv to re-add FLASHDUTY_APP_KEY in-place. Caught in Phase 2 E2E: bash subprocess reported FLASHDUTY_APP_KEY= even though Phase 2 was supposed to remove it. Re-verified with the scrub: bash sees zero FLASHDUTY_* entries; flashduty tool still returns real incident data via its own auth pathway. --- environment/environment.go | 50 ++++++++++++++++++++------- environment/environment_test.go | 61 +++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 12 deletions(-) diff --git a/environment/environment.go b/environment/environment.go index 182e01f..7d76315 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -632,28 +632,54 @@ func (e *Environment) resolveTimeout(timeoutSec int) time.Duration { // executeBashCommand executes a bash command with the given parameters. // -// extraEnv is merged on top of the runner process's own environment. Values -// may be secrets — they are NEVER logged here or in callers. The bash command -// should reference them as `$VAR_NAME` so expansion happens in-shell. +// extraEnv is merged on top of a Flashduty-scrubbed view of the runner's +// own environment: every FLASHDUTY_* key inherited from the parent process +// is dropped before the merge, so generic bash never sees per-user secrets +// like FLASHDUTY_APP_KEY. Phase 2 contract: only flashduty_exec receives +// those credentials, and only per-call via the wire payload. +// +// Caller-provided extraEnv is still honored verbatim (test fixtures and +// non-secret env hand-offs can pass anything they like); the scrub only +// removes ambient inheritance. func (e *Environment) executeBashCommand(ctx context.Context, command, workdir string, timeout time.Duration, extraEnv map[string]string) (*protocol.BashResult, error) { ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() cmd := exec.CommandContext(ctx, "bash", "-c", command) //nolint:gosec // G204: command is user-initiated via workspace tool cmd.Dir = workdir - if len(extraEnv) > 0 { - // Inherit parent env, then layer caller-provided entries on top so - // duplicates take the caller value (matches `exec.Command` semantics - // when cmd.Env is set — last KEY=VALUE wins). - merged := os.Environ() - for k, v := range extraEnv { - merged = append(merged, k+"="+v) - } - cmd.Env = merged + merged := scrubFlashdutySecrets(os.Environ()) + for k, v := range extraEnv { + merged = append(merged, k+"="+v) } + cmd.Env = merged return runCapturedCommand(ctx, cmd) } +// scrubFlashdutySecrets returns a copy of env with every entry whose key +// starts with "FLASHDUTY_" removed. The intent is to keep ambient Flashduty +// credentials (FLASHDUTY_APP_KEY, FLASHDUTY_API_BASE, …) out of generic +// bash invocations — they belong only on the flashduty_exec path, which +// re-supplies them per call via wire-level extraEnv. +// +// Implementation note: env entries are "KEY=VALUE"; we split on the first +// "=" rather than checking the full prefix to avoid false negatives on +// pathological inputs like "FLASHDUTY_=...". +func scrubFlashdutySecrets(env []string) []string { + scrubbed := make([]string, 0, len(env)) + for _, entry := range env { + eq := strings.IndexByte(entry, '=') + if eq < 0 { + scrubbed = append(scrubbed, entry) + continue + } + if strings.HasPrefix(entry[:eq], "FLASHDUTY_") { + continue + } + scrubbed = append(scrubbed, entry) + } + return scrubbed +} + // runCapturedCommand runs a prepared exec.Cmd and captures stdout/stderr with // a 10MB per-stream cap, then assembles a BashResult with truncation and exit // code populated. Shared between bash (`bash -c ""`) and flashduty_exec diff --git a/environment/environment_test.go b/environment/environment_test.go index ac57a0d..c5df667 100644 --- a/environment/environment_test.go +++ b/environment/environment_test.go @@ -329,6 +329,67 @@ func TestEnvironment_Bash_Env(t *testing.T) { assert.Equal(t, "still-here", result.Stdout) } +// TestEnvironment_Bash_ScrubsFlashdutySecrets pins the Phase 2 invariant +// caught by Boss in E2E: even when the runner inherits FLASHDUTY_APP_KEY +// from its launching shell, generic bash MUST NOT see it. Only +// flashduty_exec receives those credentials, per-call via wire extraEnv. +func TestEnvironment_Bash_ScrubsFlashdutySecrets(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + t.Setenv("FLASHDUTY_APP_KEY", "leaked-secret") + t.Setenv("FLASHDUTY_API_BASE", "http://leaked-host") + t.Setenv("KEEP_ME_AROUND", "non-secret-value") + + result, err := ws.Bash(ctx, &protocol.BashArgs{ + Command: `printf "APP=%s|BASE=%s|KEEP=%s" "${FLASHDUTY_APP_KEY:-EMPTY}" "${FLASHDUTY_API_BASE:-EMPTY}" "${KEEP_ME_AROUND:-EMPTY}"`, + }) + require.NoError(t, err) + assert.Equal(t, "APP=EMPTY|BASE=EMPTY|KEEP=non-secret-value", result.Stdout, + "generic bash must not inherit FLASHDUTY_* from the runner process — Phase 2 contract violation") + + // Same scrub applies even when extraEnv is non-empty (the scrub happens + // before the merge, so caller env is layered on top of a clean view). + result, err = ws.Bash(ctx, &protocol.BashArgs{ + Command: `printf "APP=%s|EXTRA=%s" "${FLASHDUTY_APP_KEY:-EMPTY}" "${SOME_EXTRA:-EMPTY}"`, + Env: map[string]string{"SOME_EXTRA": "from-caller"}, + }) + require.NoError(t, err) + assert.Equal(t, "APP=EMPTY|EXTRA=from-caller", result.Stdout) + + // Caller is still free to set FLASHDUTY_* explicitly through extraEnv if + // they really want to (the scrub only removes ambient inheritance). + result, err = ws.Bash(ctx, &protocol.BashArgs{ + Command: `printf %s "$FLASHDUTY_APP_KEY"`, + Env: map[string]string{"FLASHDUTY_APP_KEY": "caller-supplied"}, + }) + require.NoError(t, err) + assert.Equal(t, "caller-supplied", result.Stdout) +} + +// TestScrubFlashdutySecrets pins the helper's behavior independently of the +// bash exec path: every FLASHDUTY_* key dropped, malformed entries kept +// verbatim, non-matching keys untouched. +func TestScrubFlashdutySecrets(t *testing.T) { + in := []string{ + "FLASHDUTY_APP_KEY=secret", + "FLASHDUTY_API_BASE=http://x", + "FLASHDUTY_=edge", + "PATH=/usr/bin", + "KEEPME=ok", + "MALFORMED_ENTRY_NO_EQUALS", + "NEAR_FLASHDUTY_OK=keep", + } + out := scrubFlashdutySecrets(in) + want := []string{ + "PATH=/usr/bin", + "KEEPME=ok", + "MALFORMED_ENTRY_NO_EQUALS", + "NEAR_FLASHDUTY_OK=keep", + } + assert.Equal(t, want, out) +} + func TestEnvironment_Bash_PermissionDenied(t *testing.T) { tmpDir := t.TempDir() // Note: rules are sorted alphabetically, so "echo *" comes after "*" From f0322822100aa7fa27a7a18fc7b160fd0c6d64bf Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 28 May 2026 01:22:06 +0800 Subject: [PATCH 4/6] fix(runner/ci): gofmt protocol struct + skip POSIX-only test on windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two CI issues caught after the flashduty_exec PR push: 1. gofmt complained about the FlashdutyExecArgs struct alignment in protocol/messages.go — comment-column re-flow after the new fields. 2. windows-latest unit-test job failed because TestExecuteFlashdutyExec_ VerbSplitsOnWhitespace shells out to /bin/echo to inspect argv. There is no PowerShell equivalent that preserves the same stdout shape, so the test is now skipped on GOOS=windows (mirrors the envd POSIX-only skips landed earlier). --- environment/environment_test.go | 6 ++++++ protocol/messages.go | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/environment/environment_test.go b/environment/environment_test.go index c5df667..d39499e 100644 --- a/environment/environment_test.go +++ b/environment/environment_test.go @@ -9,6 +9,7 @@ import ( "errors" "os" "path/filepath" + "runtime" "strings" "testing" "time" @@ -488,7 +489,12 @@ func TestExecuteFlashdutyExec_EnvIsolatedToSubprocess(t *testing.T) { // TestExecuteFlashdutyExec_VerbSplitsOnWhitespace uses the FLASHDUTY_BINARY // env hook to substitute /bin/echo, so we can capture argv as stdout. This // is a behavior test (not a mock) that pins the verb-tokens-then-args order. +// /bin/echo is POSIX-only; on Windows the equivalent (PowerShell echo) doesn't +// share argv-as-stdout semantics, so the test is restricted to unix-like. func TestExecuteFlashdutyExec_VerbSplitsOnWhitespace(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("relies on /bin/echo to inspect argv; no POSIX equivalent on Windows") + } t.Setenv("FLASHDUTY_BINARY", "/bin/echo") e := newTestEnvironment(t) res, err := e.executeFlashdutyExec(context.Background(), "incident list", []string{"--json", "--limit", "3"}, "", 5*time.Second, nil) diff --git a/protocol/messages.go b/protocol/messages.go index 5aad6d1..4e5ba16 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -202,8 +202,8 @@ type BashArgs struct { // MUST NOT be logged. Unlike BashArgs.Command, Verb and Args are exec'd // directly without a shell — the runner does not interpret them. type FlashdutyExecArgs struct { - Verb string `json:"verb"` // e.g. "incident list" — passed as space-separated verb tokens - Args []string `json:"args,omitempty"` // e.g. ["--json", "--limit", "10"] + Verb string `json:"verb"` // e.g. "incident list" — passed as space-separated verb tokens + Args []string `json:"args,omitempty"` // e.g. ["--json", "--limit", "10"] Workdir string `json:"workdir,omitempty"` Timeout int `json:"timeout,omitempty"` // seconds; default 120 if <=0 Env map[string]string `json:"env,omitempty"` // per-call secrets (never log) From a86ab6cf683e53f940483689536fba48d243c093 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 28 May 2026 10:40:46 +0800 Subject: [PATCH 5/6] refactor(runner): drop flashduty_exec wire op, route auth through bash extraEnv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the dedicated flashduty_exec operation with the simpler model Safari's bash guard now drives: when Safari recognizes a `flashduty` CLI invocation, it injects FLASHDUTY_APP_KEY into the bash payload's `env` map for that one subprocess. The runner just honors extraEnv on bash like it already does. Removed surface: - `TaskOpFlashdutyExec` const + `FlashdutyExecArgs` struct (protocol) - `case TaskOpFlashdutyExec` dispatch (ws/handler.go) - `Environment.FlashdutyExec()` public + `executeFlashdutyExec()` impl - Two unit tests covering the deleted path Kept (and tightened comments): - `scrubFlashdutySecrets` on the bash path — defense-in-depth so the runner's own ambient FLASHDUTY_* never leaks into a bash subprocess even on BYOC dev workstations where the operator's shell exports them. Safari's per-call extraEnv overlay still wins the merge for genuine CLI invocations because extraEnv is layered last. - All bash output capture / 10MB cap / .outputs spill semantics unchanged. New test `TestEnvironment_Bash_ExtraEnvFlashdutyInjection` pins the overlay-wins-scrub contract end-to-end on the bash path. Trade-off vs the dedicated tool: argv-isolated `flashduty` invocations no longer exist — the CLI runs inside a real `bash -c "..."` shell, so a malicious model could in principle pipe through `xxd` or similar. Safari's bash guard (separate PR) handles that by rejecting any command that references `FLASHDUTY_APP_KEY` by name or bulk-dumps the env. Residual risk is accepted for BYOC/sandbox isolation where the runner already runs under customer-owned credentials in a customer-controlled environment. --- environment/environment.go | 87 +++++---------------------------- environment/environment_test.go | 59 +++++++--------------- protocol/messages.go | 23 --------- ws/handler.go | 10 ---- 4 files changed, 29 insertions(+), 150 deletions(-) diff --git a/environment/environment.go b/environment/environment.go index 7d76315..e48bda6 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -634,13 +634,11 @@ func (e *Environment) resolveTimeout(timeoutSec int) time.Duration { // // extraEnv is merged on top of a Flashduty-scrubbed view of the runner's // own environment: every FLASHDUTY_* key inherited from the parent process -// is dropped before the merge, so generic bash never sees per-user secrets -// like FLASHDUTY_APP_KEY. Phase 2 contract: only flashduty_exec receives -// those credentials, and only per-call via the wire payload. -// -// Caller-provided extraEnv is still honored verbatim (test fixtures and -// non-secret env hand-offs can pass anything they like); the scrub only -// removes ambient inheritance. +// is dropped before the merge, so the runner's own shell pollution never +// reaches the subprocess. Per-call Flashduty credentials reach this path +// only when safari's bash-side guard explicitly injects them into extraEnv +// after recognizing a whitelisted CLI in the command — they win the merge +// since extraEnv is layered last. func (e *Environment) executeBashCommand(ctx context.Context, command, workdir string, timeout time.Duration, extraEnv map[string]string) (*protocol.BashResult, error) { ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -657,9 +655,11 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s // scrubFlashdutySecrets returns a copy of env with every entry whose key // starts with "FLASHDUTY_" removed. The intent is to keep ambient Flashduty -// credentials (FLASHDUTY_APP_KEY, FLASHDUTY_API_BASE, …) out of generic -// bash invocations — they belong only on the flashduty_exec path, which -// re-supplies them per call via wire-level extraEnv. +// credentials (FLASHDUTY_APP_KEY, FLASHDUTY_API_BASE, …) out of bash +// invocations even when the runner process itself was launched from a +// shell that exports them (typical on BYOC dev workstations). Safari's +// bash guard re-supplies them per call via wire-level extraEnv on calls +// that genuinely invoke a Flashduty CLI. // // Implementation note: env entries are "KEY=VALUE"; we split on the first // "=" rather than checking the full prefix to avoid false negatives on @@ -682,9 +682,7 @@ func scrubFlashdutySecrets(env []string) []string { // runCapturedCommand runs a prepared exec.Cmd and captures stdout/stderr with // a 10MB per-stream cap, then assembles a BashResult with truncation and exit -// code populated. Shared between bash (`bash -c ""`) and flashduty_exec -// (`flashduty `) so the per-stream cap, deadline handling, and -// legacy stdout mirror behavior stay identical across both paths. +// code populated. // // ctx MUST be the same WithTimeout-wrapped context exec.CommandContext was // built from — runCapturedCommand inspects ctx.Err() to distinguish a @@ -738,69 +736,6 @@ func runCapturedCommand(ctx context.Context, cmd *exec.Cmd) (*protocol.BashResul return res, nil } -// FlashdutyExec is the public entrypoint for the flashduty_exec operation. -// Thin wrapper around executeFlashdutyExec — kept so the ws handler dispatches -// through Environment's public surface, matching Bash()'s shape. Permission -// checking is intentionally skipped here: the binary itself is fixed -// (FLASHDUTY_BINARY or "flashduty"), so there is no arbitrary-command escape -// hatch to gate. -func (e *Environment) FlashdutyExec(ctx context.Context, args *protocol.FlashdutyExecArgs) (*protocol.BashResult, error) { - workdir, err := e.resolveWorkdir(args.Workdir) - if err != nil { - return nil, err - } - timeout := e.resolveTimeout(args.Timeout) - return e.executeFlashdutyExec(ctx, args.Verb, args.Args, workdir, timeout, args.Env) -} - -// executeFlashdutyExec runs `flashduty ` with the given env -// merged on top of the runner process's own environment. The subprocess's -// envp[] dies with the process; nothing here persists across calls. This is -// the only runner path that should receive FLASHDUTY_APP_KEY. -// -// Verb may be multi-word (e.g. "incident list") — we split on whitespace -// before passing to exec, so callers don't need to know that internally the -// CLI is a multi-arg dispatch. Unlike executeBashCommand there is NO shell — -// argv reaches the binary verbatim, so quoting/expansion can't leak secrets. -// -// Output capture (10MB cap, truncation markers) and .outputs/ spill semantics -// are identical to the bash path: runCapturedCommand for capture + -// processLargeOutput for spill. Callers receive a BashResult (alias: -// FlashdutyExecResult) so safari's existing decoder works as-is. -func (e *Environment) executeFlashdutyExec( - ctx context.Context, - verb string, - args []string, - workdir string, - timeout time.Duration, - extraEnv map[string]string, -) (*protocol.BashResult, error) { - ctx, cancel := context.WithTimeout(ctx, timeout) - defer cancel() - - binary := os.Getenv("FLASHDUTY_BINARY") - if binary == "" { - binary = "flashduty" - } - - cmdArgs := append(strings.Fields(verb), args...) - cmd := exec.CommandContext(ctx, binary, cmdArgs...) //nolint:gosec // G204: argv comes from audited tool path, never a shell - cmd.Dir = workdir - if len(extraEnv) > 0 { - merged := os.Environ() - for k, v := range extraEnv { - merged = append(merged, k+"="+v) - } - cmd.Env = merged - } - - result, err := runCapturedCommand(ctx, cmd) - if err != nil { - return result, err - } - return e.processLargeOutput(ctx, result, "flashduty") -} - // ErrOutputCapped is returned by LimitedWriter once its byte cap has been // reached. Surfaces the contract violation that would otherwise be hidden by // the writer pretending it accepted everything (`return len(p), nil`). diff --git a/environment/environment_test.go b/environment/environment_test.go index d39499e..02c99fc 100644 --- a/environment/environment_test.go +++ b/environment/environment_test.go @@ -9,10 +9,8 @@ import ( "errors" "os" "path/filepath" - "runtime" "strings" "testing" - "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -467,47 +465,26 @@ func TestSyncSkill_ProbeHit(t *testing.T) { assert.Equal(t, resolveDir(t, dir), res.Path) } -// TestExecuteFlashdutyExec_EnvIsolatedToSubprocess confirms that env values -// the caller hands to FlashdutyExec live on cmd.Env only — they must not -// leak back into the runner process. Without this guarantee, FLASHDUTY_APP_KEY -// from one call would persist for the next call (or even the bash op). -func TestExecuteFlashdutyExec_EnvIsolatedToSubprocess(t *testing.T) { - e := newTestEnvironment(t) +// TestEnvironment_Bash_ExtraEnvFlashdutyInjection confirms that the bash +// path accepts injected FLASHDUTY_* values via extraEnv (the new safari +// bash-guard path) and that they reach the subprocess even though the +// scrub strips ambient inheritance. This pins the merge-order invariant +// in executeBashCommand: scrubbed os.Environ first, extraEnv overlay last. +func TestEnvironment_Bash_ExtraEnvFlashdutyInjection(t *testing.T) { + ws := newTestEnvironment(t) ctx := context.Background() - res, err := e.executeFlashdutyExec(ctx, "version", nil, "", 5*time.Second, map[string]string{ - "FLASHDUTY_APP_KEY": "secret-fake", - }) - // Acceptable: the flashduty binary may not exist in CI → exit 127 / ENOENT. - // What matters is the call shape and env isolation. - _ = res - _ = err - if got := os.Getenv("FLASHDUTY_APP_KEY"); got == "secret-fake" { - t.Fatal("env leaked from subprocess back to parent process — exec.Cmd.Env was not used correctly") - } -} -// TestExecuteFlashdutyExec_VerbSplitsOnWhitespace uses the FLASHDUTY_BINARY -// env hook to substitute /bin/echo, so we can capture argv as stdout. This -// is a behavior test (not a mock) that pins the verb-tokens-then-args order. -// /bin/echo is POSIX-only; on Windows the equivalent (PowerShell echo) doesn't -// share argv-as-stdout semantics, so the test is restricted to unix-like. -func TestExecuteFlashdutyExec_VerbSplitsOnWhitespace(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("relies on /bin/echo to inspect argv; no POSIX equivalent on Windows") - } - t.Setenv("FLASHDUTY_BINARY", "/bin/echo") - e := newTestEnvironment(t) - res, err := e.executeFlashdutyExec(context.Background(), "incident list", []string{"--json", "--limit", "3"}, "", 5*time.Second, nil) - if err != nil { - t.Fatalf("exec failed: %v", err) - } - want := "incident list --json --limit 3\n" - if res.Stdout != want { - t.Errorf("stdout = %q, want %q", res.Stdout, want) - } - if res.ExitCode != 0 { - t.Errorf("exit code = %d, want 0", res.ExitCode) - } + // Parent process exports FLASHDUTY_APP_KEY — must be scrubbed. + t.Setenv("FLASHDUTY_APP_KEY", "ambient-leaked") + + // Safari guard supplies a per-call key via extraEnv — must land in subprocess. + result, err := ws.Bash(ctx, &protocol.BashArgs{ + Command: `printf %s "$FLASHDUTY_APP_KEY"`, + Env: map[string]string{"FLASHDUTY_APP_KEY": "per-call-injected"}, + }) + require.NoError(t, err) + assert.Equal(t, "per-call-injected", result.Stdout, + "extraEnv overlay must win over the scrub — that's the inject-on-whitelist path") } func TestSyncSkill_InstallOverwrites(t *testing.T) { diff --git a/protocol/messages.go b/protocol/messages.go index 4e5ba16..69c97d7 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -119,12 +119,6 @@ const ( TaskOpStageKnowledgeFiles TaskOperation = "stage_knowledge_files" TaskOpDeleteKnowledgeFiles TaskOperation = "delete_knowledge_files" TaskOpReconcileKnowledgeManifest TaskOperation = "reconcile_knowledge_manifest" - - // Phase 2: capability-bound Flashduty auth. flashduty_exec carries - // FLASHDUTY_APP_KEY only on this operation; generic TaskOpBash no longer - // sees it. The runner fork-execs `flashduty ` with env from - // the payload. See fc-safari docs/superpowers/plans/2026-05-27-flashduty-cli-phase2-auth.md. - TaskOpFlashdutyExec TaskOperation = "flashduty_exec" ) // TaskRequestPayload is the payload for task request messages. @@ -197,18 +191,6 @@ type BashArgs struct { Env map[string]string `json:"env,omitempty"` } -// FlashdutyExecArgs is the payload for a flashduty CLI invocation. -// Auth secrets (FLASHDUTY_APP_KEY, FLASHDUTY_API_BASE) are placed in Env and -// MUST NOT be logged. Unlike BashArgs.Command, Verb and Args are exec'd -// directly without a shell — the runner does not interpret them. -type FlashdutyExecArgs struct { - Verb string `json:"verb"` // e.g. "incident list" — passed as space-separated verb tokens - Args []string `json:"args,omitempty"` // e.g. ["--json", "--limit", "10"] - Workdir string `json:"workdir,omitempty"` - Timeout int `json:"timeout,omitempty"` // seconds; default 120 if <=0 - Env map[string]string `json:"env,omitempty"` // per-call secrets (never log) -} - // TaskOutputPayload is the payload for streaming task output. type TaskOutputPayload struct { TaskID string `json:"task_id"` @@ -298,11 +280,6 @@ type BashResult struct { TotalSize int64 `json:"total_size,omitempty"` } -// FlashdutyExecResult mirrors BashResult — output capture, truncation, and -// spill semantics are identical. Type alias keeps the protocol minimal: -// no new unmarshal code needed; safari's BashResult decoder works as-is. -type FlashdutyExecResult = BashResult - // WebFetchArgs are the arguments for webfetch operation. type WebFetchArgs struct { URL string `json:"url"` diff --git a/ws/handler.go b/ws/handler.go index 2c82098..7d8a06f 100644 --- a/ws/handler.go +++ b/ws/handler.go @@ -215,16 +215,6 @@ func (h *Handler) executeTask(ctx context.Context, req *protocol.TaskRequestPayl logger.Info("executing command", "command", args.Command, "workdir", args.Workdir) return h.ws.Bash(ctx, args) - case protocol.TaskOpFlashdutyExec: - args, err := parseArgs[protocol.FlashdutyExecArgs](req.Args) - if err != nil { - return nil, fmt.Errorf("invalid flashduty_exec args: %w", err) - } - // Log verb + args only — args.Env contains FLASHDUTY_APP_KEY and MUST - // stay out of logs. The CLI argv itself never carries the secret. - logger.Info("executing flashduty cli", "verb", args.Verb, "args", args.Args, "workdir", args.Workdir) - return h.ws.FlashdutyExec(ctx, args) - case protocol.TaskOpWebFetch: args, err := parseArgs[protocol.WebFetchArgs](req.Args) if err != nil { From 6c898876e72796d55d822ecdb711e825173d6963 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 28 May 2026 14:20:27 +0800 Subject: [PATCH 6/6] feat(runner): prepend bundled-tools dir to bash PATH (hermetic flashduty CLI) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A BYOC runner executes the agent's bash commands in the customer's own environment, where `flashduty` may already be on PATH at a different version — or be an unrelated binary of the same name. Resolving the CLI via the ambient PATH is therefore not hermetic. The runner now prepends its bundled-tools directory to every bash subprocess's PATH so OUR flashduty shadows any same-named host binary. The dir is FLASHDUTY_RUNNER_BIN_DIR when set, else the directory the runner executable lives in (the cloud sandbox image and the host installer both place the CLI next to the runner binary). No-op when the dir can't be determined, so existing deployments without a bundled CLI are unaffected. This makes renaming the CLI to a private name (e.g. `fduty`) unnecessary: PATH precedence guarantees our version wins even on a bare `flashduty` invocation, so the public brand name is preserved. withBundledToolsPath is unit-tested across prepend / already-first / absent-PATH / empty-value / empty-dir cases. Note: shipping the CLI into the BYOC host install (install.sh) is tracked separately — it needs a CLI-version-pinning policy decision. The cloud sandbox already bakes the CLI next to the runner, so that path is complete. --- environment/bundled_tools_path_test.go | 61 ++++++++++++++++++++++++++ environment/environment.go | 52 +++++++++++++++++++++- 2 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 environment/bundled_tools_path_test.go diff --git a/environment/bundled_tools_path_test.go b/environment/bundled_tools_path_test.go new file mode 100644 index 0000000..61acd39 --- /dev/null +++ b/environment/bundled_tools_path_test.go @@ -0,0 +1,61 @@ +package environment + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// withBundledToolsPath must put the bundled-tools dir first on PATH so our +// flashduty CLI shadows any same-named binary later on a BYOC host's PATH — +// while staying a no-op when there's nothing to do. +func TestWithBundledToolsPath(t *testing.T) { + cases := []struct { + name string + env []string + dir string + want []string + }{ + { + name: "prepends to existing PATH", + env: []string{"FOO=bar", "PATH=/usr/bin:/bin"}, + dir: "/opt/fd/bin", + want: []string{"FOO=bar", "PATH=/opt/fd/bin:/usr/bin:/bin"}, + }, + { + name: "already first is left untouched", + env: []string{"PATH=/opt/fd/bin:/usr/bin"}, + dir: "/opt/fd/bin", + want: []string{"PATH=/opt/fd/bin:/usr/bin"}, + }, + { + name: "synthesizes PATH when absent", + env: []string{"FOO=bar"}, + dir: "/opt/fd/bin", + want: []string{"FOO=bar", "PATH=/opt/fd/bin"}, + }, + { + name: "empty PATH value gets the dir", + env: []string{"PATH="}, + dir: "/opt/fd/bin", + want: []string{"PATH=/opt/fd/bin"}, + }, + { + name: "empty dir is a no-op", + env: []string{"PATH=/usr/bin"}, + dir: "", + want: []string{"PATH=/usr/bin"}, + }, + { + name: "dir present but not first is still prepended", + env: []string{"PATH=/usr/bin:/opt/fd/bin"}, + dir: "/opt/fd/bin", + want: []string{"PATH=/opt/fd/bin:/usr/bin:/opt/fd/bin"}, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, withBundledToolsPath(tc.env, tc.dir)) + }) + } +} diff --git a/environment/environment.go b/environment/environment.go index e48bda6..a47ea45 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -649,10 +649,60 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s for k, v := range extraEnv { merged = append(merged, k+"="+v) } - cmd.Env = merged + cmd.Env = withBundledToolsPath(merged, bundledToolsDir()) return runCapturedCommand(ctx, cmd) } +// bundledToolsDir returns the directory holding the CLIs the runner ships with +// it — the flashduty CLI in particular. Bash subprocesses must resolve OUR +// bundled flashduty rather than whatever a BYOC host happens to have earlier on +// PATH (a different version, or an unrelated binary of the same name). +// FLASHDUTY_RUNNER_BIN_DIR overrides the location; the default is the directory +// the runner executable lives in, since both the container image and install.sh +// place the CLI next to the runner binary. Returns "" when the path can't be +// determined, in which case PATH is left untouched. +func bundledToolsDir() string { + if d := os.Getenv("FLASHDUTY_RUNNER_BIN_DIR"); d != "" { + return d + } + exe, err := os.Executable() + if err != nil { + return "" + } + return filepath.Dir(exe) +} + +// withBundledToolsPath returns env with dir prepended to its PATH entry so the +// bundled CLI shadows any same-named binary later on the host PATH. It is a +// no-op when dir is empty or already the first PATH element, and synthesizes a +// PATH entry if env had none. +func withBundledToolsPath(env []string, dir string) []string { + if dir == "" { + return env + } + out := make([]string, 0, len(env)+1) + found := false + for _, e := range env { + if cur, ok := strings.CutPrefix(e, "PATH="); ok { + found = true + switch { + case cur == dir || strings.HasPrefix(cur, dir+":"): + out = append(out, e) // already first — leave as-is + case cur == "": + out = append(out, "PATH="+dir) + default: + out = append(out, "PATH="+dir+":"+cur) + } + continue + } + out = append(out, e) + } + if !found { + out = append(out, "PATH="+dir) + } + return out +} + // scrubFlashdutySecrets returns a copy of env with every entry whose key // starts with "FLASHDUTY_" removed. The intent is to keep ambient Flashduty // credentials (FLASHDUTY_APP_KEY, FLASHDUTY_API_BASE, …) out of bash