diff --git a/backend/internal/lifecycle/reactions.go b/backend/internal/lifecycle/reactions.go index 544f152f..72841510 100644 --- a/backend/internal/lifecycle/reactions.go +++ b/backend/internal/lifecycle/reactions.go @@ -53,7 +53,6 @@ const ( // or the session terminal). Only ci-failed is persistent, so a flapping // CI (fail→pending→fail) keeps draining one shared retry budget. type reactionConfig struct { - auto bool action actionKind message string priority ports.EventPriority @@ -69,32 +68,34 @@ type reactionConfig struct { // but no default row uses it. var defaultReactions = map[reactionKey]reactionConfig{ reactionCIFailed: { - auto: true, action: actionSendToAgent, persistent: true, retries: 2, + action: actionSendToAgent, persistent: true, retries: 2, message: "CI is failing on your PR. Review the failing output below and push a fix.", eventType: "reaction.ci-failed", priority: ports.PriorityAction, }, reactionChangesRequested: { - auto: true, action: actionSendToAgent, escalateAfter: 30 * time.Minute, + action: actionSendToAgent, escalateAfter: 30 * time.Minute, message: "A reviewer requested changes on your PR. Address the comments and push.", eventType: "reaction.changes-requested", priority: ports.PriorityAction, }, reactionBugbotComments: { - auto: true, action: actionSendToAgent, escalateAfter: 30 * time.Minute, + action: actionSendToAgent, escalateAfter: 30 * time.Minute, message: "An automated reviewer left comments on your PR. Address them and push.", eventType: "reaction.bugbot-comments", priority: ports.PriorityAction, }, reactionMergeConflicts: { - auto: true, action: actionSendToAgent, escalateAfter: 15 * time.Minute, + action: actionSendToAgent, escalateAfter: 15 * time.Minute, message: "Your PR has merge conflicts. Rebase onto the base branch and resolve them.", eventType: "reaction.merge-conflicts", priority: ports.PriorityAction, }, reactionAgentIdle: { - auto: true, action: actionSendToAgent, retries: 2, escalateAfter: 15 * time.Minute, + action: actionSendToAgent, retries: 2, escalateAfter: 15 * time.Minute, message: "You appear idle. Continue the task or explain what is blocking you.", eventType: "reaction.agent-idle", priority: ports.PriorityWarning, }, reactionApprovedAndGreen: { - auto: false, action: actionNotify, priority: ports.PriorityAction, + // notify-only: a green, approved PR is the human-decision path — the human + // decides to merge (no auto-merge by default). + action: actionNotify, priority: ports.PriorityAction, message: "PR is approved and green — ready to merge.", eventType: "reaction.approved-and-green", }, diff --git a/backend/internal/ports/inbound.go b/backend/internal/ports/inbound.go index 9f2e2bb4..30ab7559 100644 --- a/backend/internal/ports/inbound.go +++ b/backend/internal/ports/inbound.go @@ -43,12 +43,14 @@ type SessionManager interface { } type SpawnConfig struct { - ProjectID domain.ProjectID - IssueID domain.IssueID - Kind domain.SessionKind - Branch string - Prompt string - AgentRules string + ProjectID domain.ProjectID + IssueID domain.IssueID + Kind domain.SessionKind + Branch string + Prompt string + AgentRules string + // OpenTerminal is reserved for a later lane (open a terminal tab on spawn). + // Spawn does NOT honor it yet — setting it has no effect. OpenTerminal bool } diff --git a/backend/internal/session/manager.go b/backend/internal/session/manager.go index 62736739..e2723d26 100644 --- a/backend/internal/session/manager.go +++ b/backend/internal/session/manager.go @@ -27,6 +27,16 @@ import ( // ErrNotFound is returned by Get/Restore when no record exists for the id. var ErrNotFound = errors.New("session: not found") +// ErrNotRestorable is returned by Restore when the session is not torn down. +// Restoring a live session would spin up a second runtime/workspace for the same +// id, duplicating the agent and risking data loss. +var ErrNotRestorable = errors.New("session: not restorable (not terminal)") + +// ErrIncompleteTeardownMetadata is returned when a record's teardown handles are +// missing (empty workspace path or runtime handle), so calling a real adapter's +// Destroy could act on empty args — an unsafe delete. The teardown is skipped. +var ErrIncompleteTeardownMetadata = errors.New("session: incomplete teardown metadata") + // Env vars a spawned process reads to learn who it is (distillation §5.4). const ( EnvSessionID = "AO_SESSION_ID" @@ -167,13 +177,25 @@ func (m *Manager) Kill(ctx context.Context, id domain.SessionID, opts ports.Kill return ports.KillResult{ID: id}, fmt.Errorf("kill %s: metadata: %w", id, err) } + // Validate the teardown handles BEFORE recording intent or touching an + // adapter: a corrupted/partially-seeded record with empty handles must never + // reach Destroy (empty path / handle could be an unsafe delete). + rtHandle := runtimeHandle(meta) + wsInfo := workspaceInfo(rec, meta) + if !validRuntimeHandle(rtHandle) { + return ports.KillResult{ID: id}, fmt.Errorf("kill %s: %w: runtime handle", id, ErrIncompleteTeardownMetadata) + } + if !validWorkspaceInfo(wsInfo) { + return ports.KillResult{ID: id}, fmt.Errorf("kill %s: %w: workspace path", id, ErrIncompleteTeardownMetadata) + } + if err := m.lcm.OnKillRequested(ctx, id, ports.KillReason{Kind: opts.Reason, Detail: opts.Detail}); err != nil { return ports.KillResult{ID: id}, fmt.Errorf("kill %s: on kill requested: %w", id, err) } - if err := m.runtime.Destroy(ctx, runtimeHandle(meta)); err != nil { + if err := m.runtime.Destroy(ctx, rtHandle); err != nil { return ports.KillResult{ID: id}, fmt.Errorf("kill %s: runtime destroy: %w", id, err) } - if err := m.workspace.Destroy(ctx, workspaceInfo(rec, meta)); err != nil { + if err := m.workspace.Destroy(ctx, wsInfo); err != nil { return ports.KillResult{ID: id, WorkspaceFreed: false}, fmt.Errorf("kill %s: workspace destroy: %w", id, err) } return ports.KillResult{ID: id, WorkspaceFreed: true}, nil @@ -234,6 +256,11 @@ func (m *Manager) Restore(ctx context.Context, id domain.SessionID) (domain.Sess if !ok { return domain.Session{}, fmt.Errorf("restore %s: %w", id, ErrNotFound) } + // Only a torn-down session may be restored. Reopening a live one would spawn a + // duplicate runtime/workspace for the same id and reset its lifecycle. + if !isTerminalSession(rec.Lifecycle.Session.State) { + return domain.Session{}, fmt.Errorf("restore %s: %w", id, ErrNotRestorable) + } meta, err := m.store.GetMetadata(ctx, id) if err != nil { return domain.Session{}, fmt.Errorf("restore %s: metadata: %w", id, err) @@ -313,8 +340,17 @@ func (m *Manager) Cleanup(ctx context.Context, project domain.ProjectID) (ports. if err != nil { return res, fmt.Errorf("cleanup %s: metadata %s: %w", project, rec.ID, err) } - _ = m.runtime.Destroy(ctx, runtimeHandle(meta)) // best effort; usually already gone - if err := m.workspace.Destroy(ctx, workspaceInfo(rec, meta)); err != nil { + wsInfo := workspaceInfo(rec, meta) + if !validWorkspaceInfo(wsInfo) { + // No workspace path to reclaim — skip rather than hand empty args to a + // real adapter's Destroy (an unsafe delete). + res.Skipped = append(res.Skipped, rec.ID) + continue + } + if rtHandle := runtimeHandle(meta); validRuntimeHandle(rtHandle) { + _ = m.runtime.Destroy(ctx, rtHandle) // best effort; usually already gone + } + if err := m.workspace.Destroy(ctx, wsInfo); err != nil { res.Skipped = append(res.Skipped, rec.ID) continue } @@ -395,6 +431,19 @@ func workspaceInfo(rec domain.SessionRecord, meta map[string]string) ports.Works } } +// validRuntimeHandle reports whether the handle identifies a runtime to destroy. +// An adapter needs the handle id to target the right process; an empty handle +// would be ambiguous, so we refuse to call Destroy with one. +func validRuntimeHandle(h ports.RuntimeHandle) bool { + return h.ID != "" +} + +// validWorkspaceInfo reports whether there is a concrete path to reclaim. An +// empty path handed to a worktree-remove could resolve to an unsafe target. +func validWorkspaceInfo(w ports.WorkspaceInfo) bool { + return w.Path != "" +} + func defaultNewID(cfg ports.SpawnConfig) domain.SessionID { base := string(cfg.IssueID) if base == "" { diff --git a/backend/internal/session/manager_test.go b/backend/internal/session/manager_test.go index bda7e988..702a735e 100644 --- a/backend/internal/session/manager_test.go +++ b/backend/internal/session/manager_test.go @@ -212,6 +212,83 @@ func TestKill_WorktreeRemoveRefusalSurfaced(t *testing.T) { } } +func TestKill_IncompleteMetadata_RefusesTeardown(t *testing.T) { + h := newHarness("sess-1") + ctx := context.Background() + // A record with no teardown metadata (empty runtime handle + workspace path), + // e.g. a partially-seeded or corrupted record. + if err := h.store.Seed(ctx, domain.SessionRecord{ + ID: "sess-1", ProjectID: testProject, + Lifecycle: lc(domain.SessionWorking, domain.ReasonTaskInProgress, domain.PRNone, ""), + }); err != nil { + t.Fatalf("seed: %v", err) + } + + if _, err := h.sm.Kill(ctx, "sess-1", ports.KillOptions{Reason: ports.KillManual}); !errors.Is(err, ErrIncompleteTeardownMetadata) { + t.Fatalf("kill: err = %v, want ErrIncompleteTeardownMetadata", err) + } + // Nothing destroyed with empty args, and no intent recorded. + if len(h.runtime.destroyed) != 0 || len(h.workspace.destroyed) != 0 { + t.Errorf("teardown ran despite incomplete metadata: rt=%v ws=%v", h.runtime.destroyed, h.workspace.destroyed) + } + if h.log.indexOf("OnKillRequested") != -1 { + t.Error("kill intent recorded despite incomplete metadata") + } +} + +func TestCleanup_IncompleteMetadata_Skipped(t *testing.T) { + h := newHarness("unused") + ctx := context.Background() + // Terminal session but no workspace path persisted — must be skipped, never + // handed to Destroy with an empty path. + if err := h.store.Seed(ctx, domain.SessionRecord{ + ID: "orphan-1", ProjectID: testProject, + Lifecycle: lc(domain.SessionTerminated, domain.ReasonManuallyKilled, domain.PRNone, ""), + }); err != nil { + t.Fatalf("seed: %v", err) + } + + res, err := h.sm.Cleanup(ctx, testProject) + if err != nil { + t.Fatalf("cleanup: %v", err) + } + if !equalIDSet(res.Skipped, []domain.SessionID{"orphan-1"}) { + t.Errorf("skipped = %v, want [orphan-1]", res.Skipped) + } + if len(res.Cleaned) != 0 { + t.Errorf("cleaned = %v, want none", res.Cleaned) + } + if len(h.workspace.destroyed) != 0 { + t.Errorf("workspace.destroyed = %v, want none (empty path must not reach Destroy)", h.workspace.destroyed) + } +} + +func TestRestore_LiveSession_Rejected(t *testing.T) { + h := newHarness("sess-1") + ctx := context.Background() + if _, err := h.sm.Spawn(ctx, spawnCfg()); err != nil { + t.Fatalf("spawn: %v", err) + } + // The session is live (never torn down). Capture an agent id so the only thing + // blocking restore is the non-terminal lifecycle, not missing metadata. + if err := h.store.PatchMetadata(ctx, "sess-1", map[string]string{lifecycle.MetaAgentSessionID: "agent-xyz"}); err != nil { + t.Fatalf("patch metadata: %v", err) + } + createdBefore := len(h.runtime.created) + restoresBefore := len(h.workspace.restoredID) + + if _, err := h.sm.Restore(ctx, "sess-1"); !errors.Is(err, ErrNotRestorable) { + t.Fatalf("restore: err = %v, want ErrNotRestorable", err) + } + // No second runtime/workspace spun up for the still-live session. + if len(h.runtime.created) != createdBefore { + t.Error("runtime created for a live-session restore") + } + if len(h.workspace.restoredID) != restoresBefore { + t.Error("workspace restored for a live-session restore") + } +} + func TestListAndGet_DeriveStatus(t *testing.T) { cases := []struct { name string