Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 112 additions & 11 deletions environment/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -632,26 +632,64 @@ func (e *Environment) resolveTimeout(timeoutSec int) time.Duration {

// executeBashCommand executes a bash command with the given parameters.
//
// extraEnv is merged on top of the runner process's own environment. Values
// may be secrets — they are NEVER logged here or in callers. The bash command
// should reference them as `$VAR_NAME` so expansion happens in-shell.
// extraEnv is merged on top of a Flashduty-scrubbed view of the runner's
// own environment: every FLASHDUTY_* key inherited from the parent process
// is dropped before the merge, so generic bash never sees per-user secrets
// like FLASHDUTY_APP_KEY. Phase 2 contract: only flashduty_exec receives
// those credentials, and only per-call via the wire payload.
//
// Caller-provided extraEnv is still honored verbatim (test fixtures and
// non-secret env hand-offs can pass anything they like); the scrub only
// removes ambient inheritance.
func (e *Environment) executeBashCommand(ctx context.Context, command, workdir string, timeout time.Duration, extraEnv map[string]string) (*protocol.BashResult, error) {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

cmd := exec.CommandContext(ctx, "bash", "-c", command) //nolint:gosec // G204: command is user-initiated via workspace tool
cmd.Dir = workdir
if len(extraEnv) > 0 {
// Inherit parent env, then layer caller-provided entries on top so
// duplicates take the caller value (matches `exec.Command` semantics
// when cmd.Env is set — last KEY=VALUE wins).
merged := os.Environ()
for k, v := range extraEnv {
merged = append(merged, k+"="+v)
merged := scrubFlashdutySecrets(os.Environ())
for k, v := range extraEnv {
merged = append(merged, k+"="+v)
}
cmd.Env = merged
return runCapturedCommand(ctx, cmd)
}

// scrubFlashdutySecrets returns a copy of env with every entry whose key
// starts with "FLASHDUTY_" removed. The intent is to keep ambient Flashduty
// credentials (FLASHDUTY_APP_KEY, FLASHDUTY_API_BASE, …) out of generic
// bash invocations — they belong only on the flashduty_exec path, which
// re-supplies them per call via wire-level extraEnv.
//
// Implementation note: env entries are "KEY=VALUE"; we split on the first
// "=" rather than checking the full prefix to avoid false negatives on
// pathological inputs like "FLASHDUTY_=...".
func scrubFlashdutySecrets(env []string) []string {
scrubbed := make([]string, 0, len(env))
for _, entry := range env {
eq := strings.IndexByte(entry, '=')
if eq < 0 {
scrubbed = append(scrubbed, entry)
continue
}
cmd.Env = merged
if strings.HasPrefix(entry[:eq], "FLASHDUTY_") {
continue
}
scrubbed = append(scrubbed, entry)
}
return scrubbed
}

// runCapturedCommand runs a prepared exec.Cmd and captures stdout/stderr with
// a 10MB per-stream cap, then assembles a BashResult with truncation and exit
// code populated. Shared between bash (`bash -c "<cmd>"`) and flashduty_exec
// (`flashduty <verb> <args...>`) so the per-stream cap, deadline handling, and
// legacy stdout mirror behavior stay identical across both paths.
//
// ctx MUST be the same WithTimeout-wrapped context exec.CommandContext was
// built from — runCapturedCommand inspects ctx.Err() to distinguish a
// deadline-induced kill from a genuine non-zero exit.
func runCapturedCommand(ctx context.Context, cmd *exec.Cmd) (*protocol.BashResult, error) {
// 10MB per-stream cap prevents OOM from runaway commands while leaving
// plenty of headroom for normal LLM-context output.
const maxOutputSize = 10 * 1024 * 1024
Expand Down Expand Up @@ -700,6 +738,69 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s
return res, nil
}

// FlashdutyExec is the public entrypoint for the flashduty_exec operation.
// Thin wrapper around executeFlashdutyExec — kept so the ws handler dispatches
// through Environment's public surface, matching Bash()'s shape. Permission
// checking is intentionally skipped here: the binary itself is fixed
// (FLASHDUTY_BINARY or "flashduty"), so there is no arbitrary-command escape
// hatch to gate.
func (e *Environment) FlashdutyExec(ctx context.Context, args *protocol.FlashdutyExecArgs) (*protocol.BashResult, error) {
workdir, err := e.resolveWorkdir(args.Workdir)
if err != nil {
return nil, err
}
timeout := e.resolveTimeout(args.Timeout)
return e.executeFlashdutyExec(ctx, args.Verb, args.Args, workdir, timeout, args.Env)
}

// executeFlashdutyExec runs `flashduty <verb> <args...>` with the given env
// merged on top of the runner process's own environment. The subprocess's
// envp[] dies with the process; nothing here persists across calls. This is
// the only runner path that should receive FLASHDUTY_APP_KEY.
//
// Verb may be multi-word (e.g. "incident list") — we split on whitespace
// before passing to exec, so callers don't need to know that internally the
// CLI is a multi-arg dispatch. Unlike executeBashCommand there is NO shell —
// argv reaches the binary verbatim, so quoting/expansion can't leak secrets.
//
// Output capture (10MB cap, truncation markers) and .outputs/ spill semantics
// are identical to the bash path: runCapturedCommand for capture +
// processLargeOutput for spill. Callers receive a BashResult (alias:
// FlashdutyExecResult) so safari's existing decoder works as-is.
func (e *Environment) executeFlashdutyExec(
ctx context.Context,
verb string,
args []string,
workdir string,
timeout time.Duration,
extraEnv map[string]string,
) (*protocol.BashResult, error) {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

binary := os.Getenv("FLASHDUTY_BINARY")
if binary == "" {
binary = "flashduty"
}

cmdArgs := append(strings.Fields(verb), args...)
cmd := exec.CommandContext(ctx, binary, cmdArgs...) //nolint:gosec // G204: argv comes from audited tool path, never a shell
cmd.Dir = workdir
if len(extraEnv) > 0 {
merged := os.Environ()
for k, v := range extraEnv {
merged = append(merged, k+"="+v)
}
cmd.Env = merged
}

result, err := runCapturedCommand(ctx, cmd)
if err != nil {
return result, err
}
return e.processLargeOutput(ctx, result, "flashduty")
}

// ErrOutputCapped is returned by LimitedWriter once its byte cap has been
// reached. Surfaces the contract violation that would otherwise be hidden by
// the writer pretending it accepted everything (`return len(p), nil`).
Expand Down
106 changes: 106 additions & 0 deletions environment/environment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ import (
"errors"
"os"
"path/filepath"
"runtime"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -328,6 +330,67 @@ func TestEnvironment_Bash_Env(t *testing.T) {
assert.Equal(t, "still-here", result.Stdout)
}

// TestEnvironment_Bash_ScrubsFlashdutySecrets pins the Phase 2 invariant
// caught by Boss in E2E: even when the runner inherits FLASHDUTY_APP_KEY
// from its launching shell, generic bash MUST NOT see it. Only
// flashduty_exec receives those credentials, per-call via wire extraEnv.
func TestEnvironment_Bash_ScrubsFlashdutySecrets(t *testing.T) {
ws := newTestEnvironment(t)
ctx := context.Background()

t.Setenv("FLASHDUTY_APP_KEY", "leaked-secret")
t.Setenv("FLASHDUTY_API_BASE", "http://leaked-host")
t.Setenv("KEEP_ME_AROUND", "non-secret-value")

result, err := ws.Bash(ctx, &protocol.BashArgs{
Command: `printf "APP=%s|BASE=%s|KEEP=%s" "${FLASHDUTY_APP_KEY:-EMPTY}" "${FLASHDUTY_API_BASE:-EMPTY}" "${KEEP_ME_AROUND:-EMPTY}"`,
})
require.NoError(t, err)
assert.Equal(t, "APP=EMPTY|BASE=EMPTY|KEEP=non-secret-value", result.Stdout,
"generic bash must not inherit FLASHDUTY_* from the runner process — Phase 2 contract violation")

// Same scrub applies even when extraEnv is non-empty (the scrub happens
// before the merge, so caller env is layered on top of a clean view).
result, err = ws.Bash(ctx, &protocol.BashArgs{
Command: `printf "APP=%s|EXTRA=%s" "${FLASHDUTY_APP_KEY:-EMPTY}" "${SOME_EXTRA:-EMPTY}"`,
Env: map[string]string{"SOME_EXTRA": "from-caller"},
})
require.NoError(t, err)
assert.Equal(t, "APP=EMPTY|EXTRA=from-caller", result.Stdout)

// Caller is still free to set FLASHDUTY_* explicitly through extraEnv if
// they really want to (the scrub only removes ambient inheritance).
result, err = ws.Bash(ctx, &protocol.BashArgs{
Command: `printf %s "$FLASHDUTY_APP_KEY"`,
Env: map[string]string{"FLASHDUTY_APP_KEY": "caller-supplied"},
})
require.NoError(t, err)
assert.Equal(t, "caller-supplied", result.Stdout)
}

// TestScrubFlashdutySecrets pins the helper's behavior independently of the
// bash exec path: every FLASHDUTY_* key dropped, malformed entries kept
// verbatim, non-matching keys untouched.
func TestScrubFlashdutySecrets(t *testing.T) {
in := []string{
"FLASHDUTY_APP_KEY=secret",
"FLASHDUTY_API_BASE=http://x",
"FLASHDUTY_=edge",
"PATH=/usr/bin",
"KEEPME=ok",
"MALFORMED_ENTRY_NO_EQUALS",
"NEAR_FLASHDUTY_OK=keep",
}
out := scrubFlashdutySecrets(in)
want := []string{
"PATH=/usr/bin",
"KEEPME=ok",
"MALFORMED_ENTRY_NO_EQUALS",
"NEAR_FLASHDUTY_OK=keep",
}
assert.Equal(t, want, out)
}

func TestEnvironment_Bash_PermissionDenied(t *testing.T) {
tmpDir := t.TempDir()
// Note: rules are sorted alphabetically, so "echo *" comes after "*"
Expand Down Expand Up @@ -404,6 +467,49 @@ func TestSyncSkill_ProbeHit(t *testing.T) {
assert.Equal(t, resolveDir(t, dir), res.Path)
}

// TestExecuteFlashdutyExec_EnvIsolatedToSubprocess confirms that env values
// the caller hands to FlashdutyExec live on cmd.Env only — they must not
// leak back into the runner process. Without this guarantee, FLASHDUTY_APP_KEY
// from one call would persist for the next call (or even the bash op).
func TestExecuteFlashdutyExec_EnvIsolatedToSubprocess(t *testing.T) {
e := newTestEnvironment(t)
ctx := context.Background()
res, err := e.executeFlashdutyExec(ctx, "version", nil, "", 5*time.Second, map[string]string{
"FLASHDUTY_APP_KEY": "secret-fake",
})
// Acceptable: the flashduty binary may not exist in CI → exit 127 / ENOENT.
// What matters is the call shape and env isolation.
_ = res
_ = err
if got := os.Getenv("FLASHDUTY_APP_KEY"); got == "secret-fake" {
t.Fatal("env leaked from subprocess back to parent process — exec.Cmd.Env was not used correctly")
}
}

// TestExecuteFlashdutyExec_VerbSplitsOnWhitespace uses the FLASHDUTY_BINARY
// env hook to substitute /bin/echo, so we can capture argv as stdout. This
// is a behavior test (not a mock) that pins the verb-tokens-then-args order.
// /bin/echo is POSIX-only; on Windows the equivalent (PowerShell echo) doesn't
// share argv-as-stdout semantics, so the test is restricted to unix-like.
func TestExecuteFlashdutyExec_VerbSplitsOnWhitespace(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("relies on /bin/echo to inspect argv; no POSIX equivalent on Windows")
}
t.Setenv("FLASHDUTY_BINARY", "/bin/echo")
e := newTestEnvironment(t)
res, err := e.executeFlashdutyExec(context.Background(), "incident list", []string{"--json", "--limit", "3"}, "", 5*time.Second, nil)
if err != nil {
t.Fatalf("exec failed: %v", err)
}
want := "incident list --json --limit 3\n"
if res.Stdout != want {
t.Errorf("stdout = %q, want %q", res.Stdout, want)
}
if res.ExitCode != 0 {
t.Errorf("exit code = %d, want 0", res.ExitCode)
}
}

func TestSyncSkill_InstallOverwrites(t *testing.T) {
ws := newTestEnvironment(t)
dir := filepath.Join(ws.Root(), "skills", "demo")
Expand Down
23 changes: 23 additions & 0 deletions protocol/messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,12 @@ const (
TaskOpStageKnowledgeFiles TaskOperation = "stage_knowledge_files"
TaskOpDeleteKnowledgeFiles TaskOperation = "delete_knowledge_files"
TaskOpReconcileKnowledgeManifest TaskOperation = "reconcile_knowledge_manifest"

// Phase 2: capability-bound Flashduty auth. flashduty_exec carries
// FLASHDUTY_APP_KEY only on this operation; generic TaskOpBash no longer
// sees it. The runner fork-execs `flashduty <verb> <args...>` with env from
// the payload. See fc-safari docs/superpowers/plans/2026-05-27-flashduty-cli-phase2-auth.md.
TaskOpFlashdutyExec TaskOperation = "flashduty_exec"
)

// TaskRequestPayload is the payload for task request messages.
Expand Down Expand Up @@ -191,6 +197,18 @@ type BashArgs struct {
Env map[string]string `json:"env,omitempty"`
}

// FlashdutyExecArgs is the payload for a flashduty CLI invocation.
// Auth secrets (FLASHDUTY_APP_KEY, FLASHDUTY_API_BASE) are placed in Env and
// MUST NOT be logged. Unlike BashArgs.Command, Verb and Args are exec'd
// directly without a shell — the runner does not interpret them.
type FlashdutyExecArgs struct {
Verb string `json:"verb"` // e.g. "incident list" — passed as space-separated verb tokens
Args []string `json:"args,omitempty"` // e.g. ["--json", "--limit", "10"]
Workdir string `json:"workdir,omitempty"`
Timeout int `json:"timeout,omitempty"` // seconds; default 120 if <=0
Env map[string]string `json:"env,omitempty"` // per-call secrets (never log)
}

// TaskOutputPayload is the payload for streaming task output.
type TaskOutputPayload struct {
TaskID string `json:"task_id"`
Expand Down Expand Up @@ -280,6 +298,11 @@ type BashResult struct {
TotalSize int64 `json:"total_size,omitempty"`
}

// FlashdutyExecResult mirrors BashResult — output capture, truncation, and
// spill semantics are identical. Type alias keeps the protocol minimal:
// no new unmarshal code needed; safari's BashResult decoder works as-is.
type FlashdutyExecResult = BashResult

// WebFetchArgs are the arguments for webfetch operation.
type WebFetchArgs struct {
URL string `json:"url"`
Expand Down
10 changes: 10 additions & 0 deletions ws/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,16 @@ func (h *Handler) executeTask(ctx context.Context, req *protocol.TaskRequestPayl
logger.Info("executing command", "command", args.Command, "workdir", args.Workdir)
return h.ws.Bash(ctx, args)

case protocol.TaskOpFlashdutyExec:
args, err := parseArgs[protocol.FlashdutyExecArgs](req.Args)
if err != nil {
return nil, fmt.Errorf("invalid flashduty_exec args: %w", err)
}
// Log verb + args only — args.Env contains FLASHDUTY_APP_KEY and MUST
// stay out of logs. The CLI argv itself never carries the secret.
logger.Info("executing flashduty cli", "verb", args.Verb, "args", args.Args, "workdir", args.Workdir)
return h.ws.FlashdutyExec(ctx, args)

case protocol.TaskOpWebFetch:
args, err := parseArgs[protocol.WebFetchArgs](req.Args)
if err != nil {
Expand Down
Loading