diff --git a/api/runstore.go b/api/runstore.go new file mode 100644 index 0000000..c0d02f5 --- /dev/null +++ b/api/runstore.go @@ -0,0 +1,122 @@ +package api + +import ( + "sync" + "time" + + "github.com/iamangus/OpenDev/orchestrator" +) + +// RunStatus represents the lifecycle state of an orchestration run. +type RunStatus string + +const ( + RunStatusPending RunStatus = "pending" + RunStatusRunning RunStatus = "running" + RunStatusDone RunStatus = "done" + RunStatusFailed RunStatus = "failed" +) + +// Run holds the state of a single orchestration run. +type Run struct { + ID string `json:"id"` + Status RunStatus `json:"status"` + Request string `json:"request"` + Repo string `json:"repo"` + Spec *orchestrator.SystemSpec `json:"spec,omitempty"` + Features []orchestrator.Feature `json:"features,omitempty"` + Contracts []orchestrator.Contract `json:"contracts,omitempty"` + Jobs []featureLeadJobSummary `json:"jobs,omitempty"` + Error string `json:"error,omitempty"` + PRNumber int `json:"pr_number,omitempty"` + PRURL string `json:"pr_url,omitempty"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +// RunStore is a thread-safe in-memory store for orchestration run state. +type RunStore struct { + mu sync.RWMutex + runs map[string]*Run +} + +// NewRunStore creates an empty RunStore. +func NewRunStore() *RunStore { + return &RunStore{runs: make(map[string]*Run)} +} + +// Create adds a new run in the pending state and returns it. +func (s *RunStore) Create(id, request, repo string) *Run { + now := time.Now() + r := &Run{ + ID: id, + Status: RunStatusPending, + Request: request, + Repo: repo, + CreatedAt: now, + UpdatedAt: now, + } + s.mu.Lock() + s.runs[id] = r + s.mu.Unlock() + return r +} + +// SetRunning marks the run as running. +func (s *RunStore) SetRunning(id string) { + s.mu.Lock() + defer s.mu.Unlock() + if r, ok := s.runs[id]; ok { + r.Status = RunStatusRunning + r.UpdatedAt = time.Now() + } +} + +// SetDone marks the run as done and stores the orchestrator result. +func (s *RunStore) SetDone(id string, result *orchestrator.Result, jobs []featureLeadJobSummary) { + s.mu.Lock() + defer s.mu.Unlock() + if r, ok := s.runs[id]; ok { + r.Status = RunStatusDone + r.Spec = &result.Spec + r.Features = result.Features + r.Contracts = result.Contracts + r.Jobs = jobs + r.UpdatedAt = time.Now() + } +} + +// SetFailed marks the run as failed with an error message. +func (s *RunStore) SetFailed(id string, err error) { + s.mu.Lock() + defer s.mu.Unlock() + if r, ok := s.runs[id]; ok { + r.Status = RunStatusFailed + r.Error = err.Error() + r.UpdatedAt = time.Now() + } +} + +// SetPR stores the GitHub PR number and URL on the run. +func (s *RunStore) SetPR(id string, number int, url string) { + s.mu.Lock() + defer s.mu.Unlock() + if r, ok := s.runs[id]; ok { + r.PRNumber = number + r.PRURL = url + r.UpdatedAt = time.Now() + } +} + +// Get returns the run for the given ID, or (nil, false) if not found. +func (s *RunStore) Get(id string) (*Run, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + r, ok := s.runs[id] + if !ok { + return nil, false + } + // Return a shallow copy to avoid races on the caller's side. + copy := *r + return ©, true +} diff --git a/api/server.go b/api/server.go new file mode 100644 index 0000000..7595b5b --- /dev/null +++ b/api/server.go @@ -0,0 +1,604 @@ +// Package api provides the unified HTTP API server for OpenDev. +// +// Routes: +// +// POST /api/jobs – submit a new request. A UUID run_id is +// generated and returned immediately (202). +// The orchestrator runs in the background; +// poll GET /api/runs/{runID} for status. +// GET /api/jobs – list all feature lead jobs and their status. +// GET /api/jobs/{id} – fetch a single job by ID (includes result when done). +// GET /api/runs/{runID} – get the status of an orchestration run. +// GET /api/repos – list repos in code-mcp (proxy). +// POST /api/repos – clone a new repo in code-mcp (proxy). +// POST /{runID}/mcp – MCP streamable-HTTP endpoint for a single run +// (handled by the mcp.Registry; active only during +// spec generation). +// GET /{runID}/api/questions/pending – list unanswered ask_user questions for a run. +// POST /{runID}/api/questions/{qID}/answer – post an answer for a pending question in a run. +package api + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "net/http" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/google/uuid" + + "github.com/iamangus/OpenDev/featurelead" + mcppkg "github.com/iamangus/OpenDev/mcp" + "github.com/iamangus/OpenDev/orchestrator" + "github.com/iamangus/OpenDev/questions" +) + +// Server is the unified HTTP API server that wires together the orchestrator, +// the feature lead engine, and the per-run MCP registry. +type Server struct { + orch *orchestrator.Orchestrator + engine *featurelead.Engine + store *featurelead.Store + runStore *RunStore + registry *mcppkg.Registry + publicURL string // base URL advertised to agents, e.g. "https://example.com" + codeMCPURL string // base URL of code-mcp, used to proxy /api/repos + codeMCPClient featurelead.CodeMCPClient + idGen func(lead orchestrator.FeatureLead) string + log *log.Logger + + // runStores holds the per-run questions.Store, keyed by run ID. + // It is accessed only while the run's MCP handler is registered, so + // the lifetime mirrors the registry entry. + runMu sync.RWMutex + runStores map[string]*questions.Store +} + +// NewServer creates a new Server. +// - registry is the mcp.Registry that routes /{runID}/mcp requests. +// - publicURL is the externally reachable base URL (e.g. "https://example.com"). +// It is used to construct per-run MCP URLs passed to the orchestrator. +// - codeMCPURL is the base URL of the code-mcp service, used for proxying +// GET /api/repos and POST /api/repos. +// - codeMCPClient is the typed client used for GetCommits during PR description +// updates and for CreatePR/UpdatePR/PromotePR. Pass nil to skip PR integration. +// - logger is written to for INFO/WARN/ERROR events. Pass nil to discard logs. +func NewServer( + orch *orchestrator.Orchestrator, + engine *featurelead.Engine, + store *featurelead.Store, + registry *mcppkg.Registry, + publicURL string, + codeMCPURL string, + codeMCPClient featurelead.CodeMCPClient, + logger *log.Logger, +) *Server { + if logger == nil { + logger = log.New(io.Discard, "", 0) + } + return &Server{ + orch: orch, + engine: engine, + store: store, + runStore: NewRunStore(), + registry: registry, + publicURL: strings.TrimRight(publicURL, "/"), + codeMCPURL: strings.TrimRight(codeMCPURL, "/"), + codeMCPClient: codeMCPClient, + idGen: defaultID, + log: logger, + runStores: make(map[string]*questions.Store), + } +} + +// Handler returns an http.Handler with all API routes registered. +func (s *Server) Handler() http.Handler { + mux := http.NewServeMux() + + // Per-run routes — the registry and question endpoints share the /{runID}/ + // prefix. The registry handles /mcp; we handle /api/questions/... directly. + mux.Handle("/", s) // catch-all: routes /{runID}/... requests + + // Global job routes. + mux.HandleFunc("POST /api/jobs", s.handlePostJobs) + mux.HandleFunc("GET /api/jobs", s.handleListJobs) + mux.HandleFunc("GET /api/jobs/{id}", s.handleGetJob) + + // Run status route. + mux.HandleFunc("GET /api/runs/{runID}", s.handleGetRun) + + // Repo proxy routes (forward to code-mcp). + mux.HandleFunc("GET /api/repos", s.handleGetRepos) + mux.HandleFunc("POST /api/repos", s.handlePostRepos) + + return mux +} + +// ServeHTTP handles the catch-all /{runID}/... routes. It dispatches: +// - /{runID}/mcp[/*] → mcp.Registry +// - GET /{runID}/api/questions/pending → handleGetPendingQuestions +// - POST /{runID}/api/questions/{qID}/answer → handlePostAnswer +func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + + // Must have at least two segments: /{runID}/something + path = "/" + strings.TrimLeft(path, "/") + rest := path[1:] + slash := strings.IndexByte(rest, '/') + if slash < 0 { + http.NotFound(w, r) + return + } + runID := rest[:slash] + suffix := rest[slash:] // e.g. "/mcp" or "/api/questions/pending" + if runID == "" { + http.NotFound(w, r) + return + } + + // /{runID}/mcp — delegate to the registry (which does its own path rewrite). + if suffix == "/mcp" || strings.HasPrefix(suffix, "/mcp/") { + s.registry.ServeHTTP(w, r) + return + } + + // /{runID}/api/questions/pending + if r.Method == http.MethodGet && suffix == "/api/questions/pending" { + s.handleGetPendingQuestions(w, r, runID) + return + } + + // /{runID}/api/questions/{qID}/answer + if r.Method == http.MethodPost { + const prefix = "/api/questions/" + const suffix2 = "/answer" + if strings.HasPrefix(suffix, prefix) && strings.HasSuffix(suffix, suffix2) { + qID := suffix[len(prefix) : len(suffix)-len(suffix2)] + if qID != "" { + s.handlePostAnswer(w, r, runID, qID) + return + } + } + } + + http.NotFound(w, r) +} + +// ── POST /api/jobs ──────────────────────────────────────────────────────────── + +// postJobsRequest is the expected request body for POST /api/jobs. +type postJobsRequest struct { + Request string `json:"request"` + Repo string `json:"repo"` +} + +// postJobsResponse is the immediate response body for POST /api/jobs. +// The orchestrator runs asynchronously; poll GET /api/runs/{run_id} for status. +type postJobsResponse struct { + RunID string `json:"run_id"` +} + +type featureLeadJobSummary struct { + JobID string `json:"job_id"` + FeatureLead orchestrator.FeatureLead `json:"feature_lead"` + Status featurelead.JobStatus `json:"status"` + MergeStatus featurelead.MergeStatus `json:"merge_status,omitempty"` +} + +func (s *Server) handlePostJobs(w http.ResponseWriter, r *http.Request) { + var req postJobsRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeError(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err)) + return + } + if req.Request == "" { + writeError(w, http.StatusBadRequest, "request field is required") + return + } + if req.Repo == "" { + writeError(w, http.StatusBadRequest, "repo field is required") + return + } + + // ── Per-run setup ───────────────────────────────────────────────────────── + runID := uuid.New().String() + + // Register the run in the run store immediately so GET /api/runs/{runID} + // can return a "pending" response right away. + s.runStore.Create(runID, req.Request, req.Repo) + + // Build the per-run questions store and MCP handler. + qs := questions.NewStore() + askUser := mcppkg.NewAskUserServer(qs) + s.registry.Register(runID, askUser.Handler()) + + // Store the questions.Store so the questions endpoints can reach it. + s.runMu.Lock() + s.runStores[runID] = qs + s.runMu.Unlock() + + // Construct the per-run MCP base URL. The orchestrator appends /mcp. + // Shape: / + runBaseURL := fmt.Sprintf("%s/%s", s.publicURL, runID) + + // ── Run the orchestrator in the background ──────────────────────────────── + // Use context.Background() so the orchestrator (and its ask_user calls) + // are NOT cancelled when the HTTP request completes. This is what allows + // the TUI to navigate to the dashboard immediately and poll for questions + // while the orchestrator is still running. + go s.runOrchestrator(context.Background(), runID, req.Request, runBaseURL, req.Repo) + + // Return the run ID immediately so the TUI can start polling. + writeJSON(w, http.StatusAccepted, postJobsResponse{RunID: runID}) +} + +// runOrchestrator drives the full orchestration pipeline in the background, +// updating the RunStore on completion or failure. +func (s *Server) runOrchestrator(ctx context.Context, runID, request, runBaseURL, repo string) { + s.runStore.SetRunning(runID) + + result, err := s.orch.RunWithMCPURL(ctx, request, runBaseURL, repo) + + // Deregister the MCP handler — ask_user is only needed during spec generation. + s.registry.Deregister(runID) + s.runMu.Lock() + delete(s.runStores, runID) + s.runMu.Unlock() + + if err != nil { + s.log.Printf("[ERROR] orchestrator run %s failed: %v", runID, err) + s.runStore.SetFailed(runID, err) + return + } + + // ── Open a draft PR immediately after the orch branch is created ────────── + prNumber := 0 + if s.codeMCPClient != nil && result.OrchBranchName != "" { + title := result.Spec.Title + if title == "" { + title = "OpenDev: " + request + } + num, prURL, prErr := s.codeMCPClient.CreatePR(repo, title, result.OrchBranchName, "main", "_OpenDev is implementing this change — description will be updated as features land._") + if prErr != nil { + s.log.Printf("[WARN] run %s: failed to create draft PR: %v", runID, prErr) + } else { + prNumber = num + s.runStore.SetPR(runID, num, prURL) + s.log.Printf("[INFO] run %s: draft PR #%d opened: %s", runID, num, prURL) + } + } + + // ── Spawn feature lead jobs ─────────────────────────────────────────────── + // Track how many feature leads are still running so the monitor goroutine + // knows when to stop and promote the PR. + var pending atomic.Int32 + pending.Store(int32(len(result.FeatureLeads))) + + // done is closed when the last feature lead finishes, signalling the + // monitor goroutine to stop polling and promote the PR. + done := make(chan struct{}) + + // Start the background monitor goroutine if PR integration is active. + if s.codeMCPClient != nil && prNumber > 0 && result.OrchBranchName != "" { + go s.monitorOrchBranch(ctx, repo, result.OrchBranchName, runID, prNumber, done) + } + + summaries := make([]featureLeadJobSummary, 0, len(result.FeatureLeads)) + for _, lead := range result.FeatureLeads { + id := s.idGen(lead) + job := s.store.Create(id, lead) + + capturedLead := lead + capturedID := id + capturedRepo := repo + go s.runFeatureLead(capturedID, capturedLead, capturedRepo, runID, prNumber, &pending, done) + + summaries = append(summaries, featureLeadJobSummary{ + JobID: job.ID, + FeatureLead: job.FeatureLead, + Status: job.Status, + MergeStatus: job.MergeStatus, + }) + } + + s.runStore.SetDone(runID, result, summaries) +} + +// runFeatureLead executes the feature lead engine for a single job in the +// background and updates the store on completion or failure. +// When all feature leads for a run finish (pending counter reaches zero), +// done is closed to signal the monitor goroutine. +func (s *Server) runFeatureLead(id string, lead orchestrator.FeatureLead, repo, runID string, prNumber int, pending *atomic.Int32, done chan struct{}) { + s.store.SetRunning(id) + + onTasksPlanned := func(tasks []featurelead.Task) { + s.store.SetTasks(id, tasks) + } + onTaskDone := func(tr featurelead.TaskResult) { + s.store.AppendTaskResult(id, tr) + } + + result, err := s.engine.Run(context.Background(), lead, repo, onTasksPlanned, onTaskDone) + if err != nil { + // Check for a persistent merge conflict: the tasks completed but the + // merge failed. Record the partial result and mark as conflict-failed. + var conflictErr *featurelead.MergeConflictFailedError + if errors.As(err, &conflictErr) { + s.store.SetDone(id, conflictErr.Result) + s.store.SetMergeConflictFailed(id, conflictErr.ConflictOutput) + s.maybeSignalDone(pending, done) + return + } + s.log.Printf("[ERROR] feature lead job %s failed: %v", id, err) + s.store.SetFailed(id, err) + s.maybeSignalDone(pending, done) + return + } + s.store.SetDone(id, result) + if lead.OrchBranchName != "" { + s.store.SetMerged(id) + } + s.maybeSignalDone(pending, done) +} + +// maybeSignalDone decrements the pending counter and closes done when it +// reaches zero (i.e. all feature leads have finished). +func (s *Server) maybeSignalDone(pending *atomic.Int32, done chan struct{}) { + if pending.Add(-1) == 0 { + close(done) + } +} + +// monitorOrchBranch polls the orch branch for new commits every ~15 seconds, +// generates a PR description using the LLM, and updates the PR body via +// code-mcp. It exits when done is closed, then promotes the PR to +// ready-for-review. +func (s *Server) monitorOrchBranch(ctx context.Context, repo, orchBranch, runID string, prNumber int, done <-chan struct{}) { + ticker := time.NewTicker(15 * time.Second) + defer ticker.Stop() + + var lastHash string // hash of most recent commit seen; avoids redundant updates + + updateIfChanged := func() { + commits, err := s.codeMCPClient.GetCommits(repo, orchBranch) + if err != nil { + s.log.Printf("[WARN] run %s: monitorOrchBranch: GetCommits failed: %v", runID, err) + return + } + if len(commits) == 0 { + return + } + if commits[0].Hash == lastHash { + return // nothing new + } + lastHash = commits[0].Hash + s.updatePRDescription(ctx, repo, orchBranch, runID, prNumber, commits) + } + + for { + select { + case <-done: + // All feature leads finished — do one final update then promote. + updateIfChanged() + if err := s.codeMCPClient.PromotePR(repo, prNumber); err != nil { + s.log.Printf("[WARN] run %s: failed to promote PR #%d to ready-for-review: %v", runID, prNumber, err) + } else { + s.log.Printf("[INFO] run %s: PR #%d promoted to ready-for-review", runID, prNumber) + } + return + case <-ticker.C: + updateIfChanged() + } + } +} + +// updatePRDescription asks the PR summary agent to produce a description for +// the given commits, then patches the PR body via code-mcp. +func (s *Server) updatePRDescription(ctx context.Context, repo, orchBranch, runID string, prNumber int, commits []featurelead.CommitInfo) { + prSummaryAgent := s.orch.AgentNames().PRSummary + if prSummaryAgent == "" { + return // no summary agent configured + } + + // Build a prompt listing the commits. + var sb strings.Builder + sb.WriteString("Write a concise GitHub pull request description (Markdown) for the following commits. Focus on what changed and why. Do not include a title line — just the body.\n\nCommits:\n") + for _, c := range commits { + hash := c.Hash + if len(hash) > 8 { + hash = hash[:8] + } + sb.WriteString(fmt.Sprintf("- %s %s\n", hash, c.Subject)) + } + + body, err := s.orch.AgentClient().Chat(ctx, prSummaryAgent, sb.String(), nil) + if err != nil { + s.log.Printf("[WARN] run %s: PR summary agent failed: %v", runID, err) + return + } + + if updateErr := s.codeMCPClient.UpdatePR(repo, prNumber, body); updateErr != nil { + s.log.Printf("[WARN] run %s: UpdatePR (description) failed: %v", runID, updateErr) + } +} + +// ── GET /api/repos ──────────────────────────────────────────────────────────── + +func (s *Server) handleGetRepos(w http.ResponseWriter, r *http.Request) { + s.proxyToCodeMCP(w, r, http.MethodGet, "/api/repos", nil) +} + +// ── POST /api/repos ─────────────────────────────────────────────────────────── + +func (s *Server) handlePostRepos(w http.ResponseWriter, r *http.Request) { + s.proxyToCodeMCP(w, r, http.MethodPost, "/api/repos", r.Body) +} + +// proxyToCodeMCP forwards a request to code-mcp and relays the response. +func (s *Server) proxyToCodeMCP(w http.ResponseWriter, r *http.Request, method, path string, body io.Reader) { + if s.codeMCPURL == "" { + writeError(w, http.StatusServiceUnavailable, "code-mcp URL not configured") + return + } + url := s.codeMCPURL + path + req, err := http.NewRequestWithContext(r.Context(), method, url, body) + if err != nil { + writeError(w, http.StatusInternalServerError, fmt.Sprintf("building proxy request: %v", err)) + return + } + if r.Header.Get("Content-Type") != "" { + req.Header.Set("Content-Type", r.Header.Get("Content-Type")) + } else if body != nil { + req.Header.Set("Content-Type", "application/json") + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + writeError(w, http.StatusBadGateway, fmt.Sprintf("code-mcp unreachable: %v", err)) + return + } + defer resp.Body.Close() + + w.Header().Set("Content-Type", resp.Header.Get("Content-Type")) + w.WriteHeader(resp.StatusCode) + _, _ = io.Copy(w, resp.Body) +} + +// ── GET /api/runs/{runID} ───────────────────────────────────────────────────── + +func (s *Server) handleGetRun(w http.ResponseWriter, r *http.Request) { + runID := r.PathValue("runID") + run, ok := s.runStore.Get(runID) + if !ok { + writeError(w, http.StatusNotFound, fmt.Sprintf("run %q not found", runID)) + return + } + writeJSON(w, http.StatusOK, run) +} + +// ── GET /api/jobs ───────────────────────────────────────────────────────────── + +func (s *Server) handleListJobs(w http.ResponseWriter, r *http.Request) { + writeJSON(w, http.StatusOK, s.store.List()) +} + +// ── GET /api/jobs/{id} ──────────────────────────────────────────────────────── + +func (s *Server) handleGetJob(w http.ResponseWriter, r *http.Request) { + id := r.PathValue("id") + job, ok := s.store.Get(id) + if !ok { + writeError(w, http.StatusNotFound, fmt.Sprintf("job %q not found", id)) + return + } + writeJSON(w, http.StatusOK, job) +} + +// ── GET /{runID}/api/questions/pending ──────────────────────────────────────── + +func (s *Server) handleGetPendingQuestions(w http.ResponseWriter, r *http.Request, runID string) { + s.runMu.RLock() + qs, ok := s.runStores[runID] + s.runMu.RUnlock() + + if !ok { + // Run has finished (MCP deregistered) or never existed. + writeError(w, http.StatusNotFound, fmt.Sprintf("run %q not found or no longer active", runID)) + return + } + writeJSON(w, http.StatusOK, qs.Pending()) +} + +// ── POST /{runID}/api/questions/{qID}/answer ────────────────────────────────── + +type postAnswerRequest struct { + Answer string `json:"answer"` +} + +func (s *Server) handlePostAnswer(w http.ResponseWriter, r *http.Request, runID, qID string) { + s.runMu.RLock() + qs, ok := s.runStores[runID] + s.runMu.RUnlock() + + if !ok { + writeError(w, http.StatusNotFound, fmt.Sprintf("run %q not found or no longer active", runID)) + return + } + + var req postAnswerRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeError(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err)) + return + } + if req.Answer == "" { + writeError(w, http.StatusBadRequest, "answer field is required and must be non-empty") + return + } + + if err := qs.Answer(qID, req.Answer); err != nil { + switch err { + case questions.ErrNotFound: + writeError(w, http.StatusNotFound, fmt.Sprintf("question %q not found", qID)) + case questions.ErrAlreadyAnswered: + writeError(w, http.StatusConflict, fmt.Sprintf("question %q has already been answered", qID)) + default: + writeError(w, http.StatusInternalServerError, fmt.Sprintf("answering question: %v", err)) + } + return + } + + w.WriteHeader(http.StatusNoContent) +} + +// ── Test hooks ──────────────────────────────────────────────────────────────── + +// RegisterTestRun injects a questions.Store for runID directly into the +// server's internal map. This is intended for use in tests only, to simulate +// an active run without going through the orchestrator. +func (s *Server) RegisterTestRun(runID string, qs *questions.Store) { + s.runMu.Lock() + defer s.runMu.Unlock() + s.runStores[runID] = qs +} + +// DeregisterTestRun removes the questions.Store for runID. Call this in test +// cleanup to avoid state leaking between tests. +func (s *Server) DeregisterTestRun(runID string) { + s.runMu.Lock() + defer s.runMu.Unlock() + delete(s.runStores, runID) +} + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +type errorResponse struct { + Error string `json:"error"` +} + +func writeJSON(w http.ResponseWriter, status int, v any) { + var buf bytes.Buffer + if err := json.NewEncoder(&buf).Encode(v); err != nil { + http.Error(w, "internal encoding error", http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _, _ = buf.WriteTo(w) +} + +func writeError(w http.ResponseWriter, status int, msg string) { + writeJSON(w, status, errorResponse{Error: msg}) +} + +// defaultID derives a stable job ID from the feature lead's branch name, which +// is already unique per feature within a run. +func defaultID(lead orchestrator.FeatureLead) string { + return lead.BranchName +} diff --git a/api/server_questions_test.go b/api/server_questions_test.go new file mode 100644 index 0000000..16a581d --- /dev/null +++ b/api/server_questions_test.go @@ -0,0 +1,256 @@ +package api_test + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "sync" + "testing" + "time" + + "github.com/iamangus/OpenDev/api" + "github.com/iamangus/OpenDev/featurelead" + mcppkg "github.com/iamangus/OpenDev/mcp" + "github.com/iamangus/OpenDev/questions" +) + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +// newTestServer builds an api.Server with a real mcp.Registry and stub +// orchestrator/engine/featurelead store. +func newTestServer(t *testing.T) (http.Handler, *mcppkg.Registry) { + t.Helper() + registry := mcppkg.NewRegistry() + srv := api.NewServer(nil, nil, featurelead.NewStore(), registry, "http://localhost", "", nil, nil) + return srv.Handler(), registry +} + +// registerRun wires up a questions.Store for a fake runID directly into the +// registry and the server's internal runStores map by using the public +// API: Server.RegisterRun (exposed via api.Server for testing). +// +// Since Server.runStores is unexported, we instead exercise the real path: +// the test manually registers an MCP handler in the registry AND injects the +// questions store by calling the helper below that bypasses the orchestrator. +// +// For simplicity in these tests we use a test-only helper on the server. +// Because we can't access runStores directly, we exercise the routes by +// having the test spin up a fake run through the exported test hook. + +// pendingQuestion mirrors the JSON shape returned by the pending endpoint. +type pendingQuestion struct { + ID string `json:"id"` + Text string `json:"text"` + CreatedAt time.Time `json:"created_at"` + Answer *string `json:"answer"` +} + +// ── GET /{runID}/api/questions/pending — unknown run ───────────────────────── + +func TestGetPendingQuestions_UnknownRun(t *testing.T) { + h, _ := newTestServer(t) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/no-such-run/api/questions/pending", nil) + h.ServeHTTP(rec, req) + + if rec.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d", rec.Code) + } +} + +// ── POST /{runID}/api/questions/{qID}/answer — unknown run ─────────────────── + +func TestPostAnswer_UnknownRun(t *testing.T) { + h, _ := newTestServer(t) + + body := bytes.NewBufferString(`{"answer":"hello"}`) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/no-such-run/api/questions/some-q/answer", body) + h.ServeHTTP(rec, req) + + if rec.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d", rec.Code) + } +} + +// ── Per-run questions store — integration via registry ──────────────────────── +// +// The questions endpoints are only reachable while a run's store is registered +// (between Register and Deregister). We test this by wiring a real +// questions.Store through the exported api.TestRun helper if available, or +// by using the Server's own HandleFunc path directly. +// +// Since we can't inject a runStore without going through handlePostJobs (which +// requires a real orchestrator), we test the end-to-end path via a thin +// exported hook: api.Server exposes RegisterTestRun / DeregisterTestRun for +// use in tests only. + +// TestRunQuestionsRoundTrip exercises the full per-run question lifecycle by +// calling the server's test hooks to inject a run, then hitting the HTTP +// endpoints. +func TestRunQuestionsRoundTrip(t *testing.T) { + registry := mcppkg.NewRegistry() + srv := api.NewServer(nil, nil, featurelead.NewStore(), registry, "http://localhost", "", nil, nil) + h := srv.Handler() + + runID := "test-run-abc" + qs := questions.NewStore() + srv.RegisterTestRun(runID, qs) + defer srv.DeregisterTestRun(runID) + + // Ask a question in the background. + var wg sync.WaitGroup + wg.Add(1) + var gotAnswer string + go func() { + defer wg.Done() + ans, _ := qs.Ask(context.Background(), "What language?") + gotAnswer = ans + }() + + // Poll until the question appears. + deadline := time.Now().Add(time.Second) + for { + if len(qs.Pending()) > 0 { + break + } + if time.Now().After(deadline) { + t.Fatal("timed out waiting for question to appear") + } + time.Sleep(5 * time.Millisecond) + } + + // GET /{runID}/api/questions/pending + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, fmt.Sprintf("/%s/api/questions/pending", runID), nil) + h.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("GET pending: expected 200, got %d — %s", rec.Code, rec.Body.String()) + } + + var pending []pendingQuestion + if err := json.NewDecoder(rec.Body).Decode(&pending); err != nil { + t.Fatalf("decode pending: %v", err) + } + if len(pending) != 1 { + t.Fatalf("expected 1 pending question, got %d", len(pending)) + } + qID := pending[0].ID + + // POST /{runID}/api/questions/{qID}/answer + ansBody := bytes.NewBufferString(`{"answer":"Go"}`) + rec2 := httptest.NewRecorder() + req2 := httptest.NewRequest(http.MethodPost, fmt.Sprintf("/%s/api/questions/%s/answer", runID, qID), ansBody) + h.ServeHTTP(rec2, req2) + + if rec2.Code != http.StatusNoContent { + t.Fatalf("POST answer: expected 204, got %d — %s", rec2.Code, rec2.Body.String()) + } + + wg.Wait() + if gotAnswer != "Go" { + t.Errorf("Ask returned %q, want %q", gotAnswer, "Go") + } +} + +func TestPostAnswer_AlreadyAnswered(t *testing.T) { + registry := mcppkg.NewRegistry() + srv := api.NewServer(nil, nil, featurelead.NewStore(), registry, "http://localhost", "", nil, nil) + h := srv.Handler() + + runID := "test-run-dup" + qs := questions.NewStore() + srv.RegisterTestRun(runID, qs) + defer srv.DeregisterTestRun(runID) + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + _, _ = qs.Ask(context.Background(), "Once?") + }() + + deadline := time.Now().Add(time.Second) + for len(qs.Pending()) == 0 { + if time.Now().After(deadline) { + t.Fatal("timed out") + } + time.Sleep(5 * time.Millisecond) + } + qID := qs.Pending()[0].ID + + postAnswer := func() int { + body := bytes.NewBufferString(`{"answer":"yes"}`) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, fmt.Sprintf("/%s/api/questions/%s/answer", runID, qID), body) + h.ServeHTTP(rec, req) + return rec.Code + } + + if code := postAnswer(); code != http.StatusNoContent { + t.Fatalf("first answer: expected 204, got %d", code) + } + wg.Wait() + if code := postAnswer(); code != http.StatusConflict { + t.Fatalf("second answer: expected 409, got %d", code) + } +} + +func TestPostAnswer_BadBody(t *testing.T) { + registry := mcppkg.NewRegistry() + srv := api.NewServer(nil, nil, featurelead.NewStore(), registry, "http://localhost", "", nil, nil) + h := srv.Handler() + + runID := "test-run-bad" + qs := questions.NewStore() + srv.RegisterTestRun(runID, qs) + defer srv.DeregisterTestRun(runID) + + body := bytes.NewBufferString(`not json`) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, fmt.Sprintf("/%s/api/questions/some-q/answer", runID), body) + h.ServeHTTP(rec, req) + + if rec.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rec.Code) + } +} + +func TestPostAnswer_EmptyAnswer(t *testing.T) { + registry := mcppkg.NewRegistry() + srv := api.NewServer(nil, nil, featurelead.NewStore(), registry, "http://localhost", "", nil, nil) + h := srv.Handler() + + runID := "test-run-empty" + qs := questions.NewStore() + srv.RegisterTestRun(runID, qs) + defer srv.DeregisterTestRun(runID) + + body := bytes.NewBufferString(`{"answer":""}`) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, fmt.Sprintf("/%s/api/questions/some-q/answer", runID), body) + h.ServeHTTP(rec, req) + + if rec.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d", rec.Code) + } +} + +// ── Existing job route unaffected ───────────────────────────────────────────── + +func TestExistingJobRouteUnaffected(t *testing.T) { + h, _ := newTestServer(t) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/api/jobs", nil) + h.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("GET /api/jobs: expected 200, got %d — %s", rec.Code, rec.Body.String()) + } +} diff --git a/featurelead/codemcp.go b/featurelead/codemcp.go new file mode 100644 index 0000000..bb11ff7 --- /dev/null +++ b/featurelead/codemcp.go @@ -0,0 +1,268 @@ +// Package featurelead — code-mcp HTTP client for branch and test operations. +package featurelead + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +// ExecResult holds the result of running a registered test via code-mcp. +type ExecResult struct { + ExitCode int `json:"exit_code"` + Stdout string `json:"stdout"` + Stderr string `json:"stderr"` + TimedOut bool `json:"timed_out"` +} + +// CommitInfo describes a single commit returned by GetCommits. +type CommitInfo struct { + Hash string `json:"hash"` + Subject string `json:"subject"` +} + +// CodeMCPClient is the interface for communicating with code-mcp's REST API. +// The feature-lead engine uses this to create branches, run registered tests, +// and merge feature branches back into the orchestration branch. +// It also handles GitHub PR lifecycle via code-mcp's PR endpoints. +type CodeMCPClient interface { + // CreateBranch creates a new git worktree/branch via code-mcp. + // When base is non-empty it is used as the start point for the new branch. + CreateBranch(repo, branch, base string) error + // RunTest executes the registered test command for the given repo/branch. + // Returns ErrNoTestRegistered if no test has been registered. + RunTest(repo, branch string) (*ExecResult, error) + // MergeBranch merges sourceBranch into targetBranch via code-mcp. + // On a merge conflict it returns a non-empty conflictOutput string and a + // nil error. Any other failure returns an error. + MergeBranch(repo, sourceBranch, targetBranch string) (conflictOutput string, err error) + // GetCommits returns the commits reachable from branch but not from the + // default branch of the repo (i.e. the commits unique to branch). + GetCommits(repo, branch string) ([]CommitInfo, error) + // CreatePR creates a draft pull request on GitHub via code-mcp. + // Returns the PR number and HTML URL. + CreatePR(repo, title, head, base, body string) (prNumber int, prURL string, err error) + // UpdatePR patches the body of an existing PR via code-mcp. + UpdatePR(repo string, number int, body string) error + // PromotePR converts a draft PR to ready-for-review via code-mcp. + PromotePR(repo string, number int) error +} + +// ErrNoTestRegistered is returned by RunTest when no test command has been +// registered for the given branch. +var ErrNoTestRegistered = fmt.Errorf("no test registered") + +// HTTPCodeMCPClient is an HTTP-based implementation of CodeMCPClient. +type HTTPCodeMCPClient struct { + baseURL string + httpClient *http.Client +} + +// NewHTTPCodeMCPClient creates a new HTTPCodeMCPClient targeting the given +// code-mcp base URL (e.g. "http://localhost:8080"). +func NewHTTPCodeMCPClient(baseURL string) *HTTPCodeMCPClient { + return &HTTPCodeMCPClient{ + baseURL: baseURL, + httpClient: &http.Client{ + Timeout: 5 * time.Minute, // tests may take a while + }, + } +} + +// CreateBranch calls POST /api/repos/{repo}/branches with {"branch": branch} +// and optionally {"base": base} when base is non-empty. +func (c *HTTPCodeMCPClient) CreateBranch(repo, branch, base string) error { + body := map[string]string{"branch": branch} + if base != "" { + body["base"] = base + } + payload, _ := json.Marshal(body) + url := fmt.Sprintf("%s/api/repos/%s/branches", c.baseURL, repo) + + resp, err := c.httpClient.Post(url, "application/json", bytes.NewReader(payload)) + if err != nil { + return fmt.Errorf("creating branch: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusCreated { + body2, _ := io.ReadAll(resp.Body) + return fmt.Errorf("creating branch %q: status %d: %s", branch, resp.StatusCode, body2) + } + return nil +} + +// RunTest calls POST /api/repos/{repo}/branches/{branch}/test/run. +// Returns ErrNoTestRegistered if the endpoint returns 404 (no test registered). +func (c *HTTPCodeMCPClient) RunTest(repo, branch string) (*ExecResult, error) { + url := fmt.Sprintf("%s/api/repos/%s/branches/%s/test/run", c.baseURL, repo, branch) + + resp, err := c.httpClient.Post(url, "application/json", nil) + if err != nil { + return nil, fmt.Errorf("running test: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("reading test response: %w", err) + } + + if resp.StatusCode == http.StatusNotFound { + return nil, ErrNoTestRegistered + } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("running test: status %d: %s", resp.StatusCode, body) + } + + var result ExecResult + if err := json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("parsing test result: %w", err) + } + return &result, nil +} + +// MergeBranch calls POST /api/repos/{repo}/branches/{sourceBranch}/merge with +// {"into": targetBranch}. On a 409 Conflict response, it returns the conflict +// output as the first return value with a nil error. Any other non-200 status +// returns an error. +func (c *HTTPCodeMCPClient) MergeBranch(repo, sourceBranch, targetBranch string) (conflictOutput string, err error) { + payload, _ := json.Marshal(map[string]string{"into": targetBranch}) + url := fmt.Sprintf("%s/api/repos/%s/branches/%s/merge", c.baseURL, repo, sourceBranch) + + resp, err := c.httpClient.Post(url, "application/json", bytes.NewReader(payload)) + if err != nil { + return "", fmt.Errorf("merging branch: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("reading merge response: %w", err) + } + + if resp.StatusCode == http.StatusConflict { + // Return conflict output; caller decides what to do. + var errResp struct { + Error string `json:"error"` + } + if jsonErr := json.Unmarshal(body, &errResp); jsonErr == nil && errResp.Error != "" { + return errResp.Error, nil + } + return string(body), nil + } + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("merging branch %q into %q: status %d: %s", sourceBranch, targetBranch, resp.StatusCode, body) + } + return "", nil +} + +// GetCommits calls GET /api/repos/{repo}/branches/{branch}/commits and +// returns the list of commits reachable from branch but not from the default +// branch. +func (c *HTTPCodeMCPClient) GetCommits(repo, branch string) ([]CommitInfo, error) { + url := fmt.Sprintf("%s/api/repos/%s/branches/%s/commits", c.baseURL, repo, branch) + + resp, err := c.httpClient.Get(url) + if err != nil { + return nil, fmt.Errorf("getting commits: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("reading commits response: %w", err) + } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("getting commits for branch %q: status %d: %s", branch, resp.StatusCode, body) + } + + var result struct { + Commits []CommitInfo `json:"commits"` + } + if err := json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("parsing commits response: %w", err) + } + return result.Commits, nil +} + +// CreatePR calls POST /api/repos/{repo}/pulls to create a draft PR via code-mcp. +// Returns the PR number and HTML URL. +func (c *HTTPCodeMCPClient) CreatePR(repo, title, head, base, body string) (int, string, error) { + payload, _ := json.Marshal(map[string]any{ + "title": title, + "head": head, + "base": base, + "body": body, + "draft": true, + }) + url := fmt.Sprintf("%s/api/repos/%s/pulls", c.baseURL, repo) + + resp, err := c.httpClient.Post(url, "application/json", bytes.NewReader(payload)) + if err != nil { + return 0, "", fmt.Errorf("creating PR: %w", err) + } + defer resp.Body.Close() + + respBody, _ := io.ReadAll(resp.Body) + if resp.StatusCode != http.StatusCreated { + return 0, "", fmt.Errorf("creating PR: status %d: %s", resp.StatusCode, respBody) + } + + var result struct { + PRNumber int `json:"pr_number"` + PRURL string `json:"pr_url"` + } + if err := json.Unmarshal(respBody, &result); err != nil { + return 0, "", fmt.Errorf("parsing CreatePR response: %w", err) + } + return result.PRNumber, result.PRURL, nil +} + +// UpdatePR calls PATCH /api/repos/{repo}/pulls/{number} to update the PR body. +func (c *HTTPCodeMCPClient) UpdatePR(repo string, number int, body string) error { + payload, _ := json.Marshal(map[string]any{ + "body": body, + "draft": false, + }) + url := fmt.Sprintf("%s/api/repos/%s/pulls/%d", c.baseURL, repo, number) + + req, err := http.NewRequest(http.MethodPatch, url, bytes.NewReader(payload)) + if err != nil { + return fmt.Errorf("building UpdatePR request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return fmt.Errorf("updating PR: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(resp.Body) + return fmt.Errorf("updating PR #%d: status %d: %s", number, resp.StatusCode, respBody) + } + return nil +} + +// PromotePR calls POST /api/repos/{repo}/pulls/{number}/ready to convert +// a draft PR to ready-for-review. +func (c *HTTPCodeMCPClient) PromotePR(repo string, number int) error { + url := fmt.Sprintf("%s/api/repos/%s/pulls/%d/ready", c.baseURL, repo, number) + + resp, err := c.httpClient.Post(url, "application/json", nil) + if err != nil { + return fmt.Errorf("promoting PR: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(resp.Body) + return fmt.Errorf("promoting PR #%d: status %d: %s", number, resp.StatusCode, respBody) + } + return nil +} diff --git a/featurelead/featurelead.go b/featurelead/featurelead.go new file mode 100644 index 0000000..1754932 --- /dev/null +++ b/featurelead/featurelead.go @@ -0,0 +1,463 @@ +// Package featurelead contains the feature-lead engine for the OpenDev system. +// +// The feature-lead engine is invoked for each FeatureLead produced by the +// orchestrator. It receives the feature's specification and its resolved +// contracts, then drives a three-step TDD workflow per task: +// +// 1. Task Breakdown – a task-breaker agent decomposes the feature into a list +// of discrete, independently-implementable Tasks, each with a clear +// description and acceptance criteria. +// 2. Per-task TDD cycle (for each task): +// a. Write Tests – a test-writer agent writes tests and registers them +// via the register_test MCP tool. +// b. Implement – a coder agent implements the task to make the tests pass. +// c. Verify – OpenDev calls code-mcp's REST API to run the registered +// tests deterministically (no LLM involved). +// d. Retry – if verification fails, the coder is retried once with +// the failure output. If still fails, the failure is recorded and the +// engine moves on. +package featurelead + +import ( + "context" + "encoding/json" + "fmt" + "log" + + "github.com/iamangus/OpenDev/agent" + "github.com/iamangus/OpenDev/internal/jsonutil" + "github.com/iamangus/OpenDev/orchestrator" +) + +// ── Feature-lead domain types ───────────────────────────────────────────────── + +// Task is a discrete, implementable unit of work derived from a Feature. +type Task struct { + // Name is a short, unique identifier for the task. + Name string `json:"name"` + // Description explains what must be implemented. + Description string `json:"description"` + // AcceptanceCriteria is a list of conditions that must hold for the task to + // be considered complete. + AcceptanceCriteria []string `json:"acceptance_criteria"` + // Files lists the source files expected to be created or modified. + Files []string `json:"files"` +} + +// TaskResult captures the full TDD cycle outcome for a single task. +type TaskResult struct { + // Task is the task that was processed. + Task Task `json:"task"` + // TestWriterOutput is the test-writer agent's raw response. + TestWriterOutput string `json:"test_writer_output"` + // CoderOutput is the coder agent's raw response (first attempt). + CoderOutput string `json:"coder_output"` + // Verification is the result of running the registered tests after the + // first coder attempt. Nil if no test was registered. + Verification *ExecResult `json:"verification,omitempty"` + // RetryCoderOutput is the coder agent's response on the retry attempt. + // Empty if no retry was needed. + RetryCoderOutput string `json:"retry_coder_output,omitempty"` + // RetryVerification is the result of running the registered tests after + // the retry. Nil if no retry was performed or no test was registered. + RetryVerification *ExecResult `json:"retry_verification,omitempty"` + // Passed indicates whether the task's tests ultimately passed. + Passed bool `json:"passed"` +} + +// Result is the complete output of a single Engine run for one FeatureLead. +type Result struct { + // FeatureLead is the lead that was executed. + FeatureLead orchestrator.FeatureLead `json:"feature_lead"` + // Tasks are the implementable units produced by the task-breakdown step. + Tasks []Task `json:"tasks"` + // TaskResults are the TDD cycle outcomes, one per task. + TaskResults []TaskResult `json:"task_results"` + // MergeConflictOutput is the raw conflict output from the first merge + // attempt, when a conflict was detected. + MergeConflictOutput string `json:"merge_conflict_output,omitempty"` + // MergeResolutionOutput is the coder agent's response when it was asked + // to resolve the merge conflict. Empty if no conflict occurred or the + // conflict was not recoverable. + MergeResolutionOutput string `json:"merge_resolution_output,omitempty"` +} + +// ── Engine ──────────────────────────────────────────────────────────────────── + +// AgentNames holds the agentfile agent name for each feature-lead step. +type AgentNames struct { + // TaskBreaker is the agent that decomposes a FeatureLead into Tasks. + TaskBreaker string + // TestWriter is the agent that writes tests for a task and registers them. + TestWriter string + // Coder is the agent that implements a single Task. + Coder string +} + +// Engine drives the task-breakdown, test-writing, coder, and verification +// agents to turn a FeatureLead into working, tested code. +type Engine struct { + agentClient agent.Client + agentNames AgentNames + codeMCPClient CodeMCPClient + codeMCPURL string // base URL for constructing ephemeral MCP server URLs +} + +// New creates a new Engine. +// +// - client is the agent client used for all AI-assisted steps. +// - names specifies which agentfile agent handles each step. +// - codeMCPClient is the client for code-mcp REST API (branch/test operations). +// - codeMCPURL is the base URL of code-mcp (for ephemeral MCP server injection). +func New(client agent.Client, names AgentNames, codeMCPClient CodeMCPClient, codeMCPURL string) *Engine { + return &Engine{ + agentClient: client, + agentNames: names, + codeMCPClient: codeMCPClient, + codeMCPURL: codeMCPURL, + } +} + +// Run executes the full TDD feature-lead workflow for the given lead and +// returns the aggregated result. +// +// onTasksPlanned is called once after the task-breakdown phase completes, +// with the full list of planned tasks. It may be nil. +// +// onTaskDone is called once per task immediately after its TDD cycle +// finishes (whether it passed or not). It may be nil. +func (e *Engine) Run( + ctx context.Context, + lead orchestrator.FeatureLead, + repo string, + onTasksPlanned func([]Task), + onTaskDone func(TaskResult), +) (*Result, error) { + // Step 1 – Task Breakdown + tasks, err := e.breakdownTasks(ctx, lead) + if err != nil { + return nil, fmt.Errorf("step 1 (task breakdown): %w", err) + } + if onTasksPlanned != nil { + onTasksPlanned(tasks) + } + + // Step 2 – Per-task TDD cycle + taskResults := make([]TaskResult, 0, len(tasks)) + for _, task := range tasks { + tr, err := e.processTask(ctx, lead, task, repo) + if err != nil { + return nil, fmt.Errorf("task %q: %w", task.Name, err) + } + if onTaskDone != nil { + onTaskDone(*tr) + } + taskResults = append(taskResults, *tr) + } + + result := &Result{ + FeatureLead: lead, + Tasks: tasks, + TaskResults: taskResults, + } + + // Step 3 – Merge feature branch into orch branch (if orch branch is set). + if lead.OrchBranchName != "" { + conflictOutput, mergeErr := e.codeMCPClient.MergeBranch(repo, lead.BranchName, lead.OrchBranchName) + if mergeErr != nil { + return nil, fmt.Errorf("merging %q into %q: %w", lead.BranchName, lead.OrchBranchName, mergeErr) + } + + if conflictOutput != "" { + // Conflict on first attempt — ask the coder to resolve it. + result.MergeConflictOutput = conflictOutput + resolutionOutput, resolveErr := e.resolveMergeConflict(ctx, lead, conflictOutput, repo) + if resolveErr != nil { + return nil, fmt.Errorf("asking coder to resolve merge conflict: %w", resolveErr) + } + result.MergeResolutionOutput = resolutionOutput + + // Retry the merge once. + retryConflict, retryErr := e.codeMCPClient.MergeBranch(repo, lead.BranchName, lead.OrchBranchName) + if retryErr != nil { + return nil, fmt.Errorf("retry merge %q into %q: %w", lead.BranchName, lead.OrchBranchName, retryErr) + } + if retryConflict != "" { + // Still conflicting after coder retry — mark as conflict-failed via sentinel error. + return nil, &MergeConflictFailedError{ + Result: result, + ConflictOutput: retryConflict, + } + } + } + } + + return result, nil +} + +// ── Step 1: Task Breakdown ──────────────────────────────────────────────────── + +func (e *Engine) breakdownTasks(ctx context.Context, lead orchestrator.FeatureLead) ([]Task, error) { + leadJSON, _ := json.Marshal(lead) + + message := fmt.Sprintf(`You are a senior software architect. Given the following Feature Lead, decompose it into +a list of discrete, independently-implementable tasks for a coder agent. +Each task must have a single, well-defined responsibility. +Return ONLY a valid JSON array – no markdown, no explanation. + +Required JSON structure (array of task objects): +[ + { + "name": "", + "description": "", + "acceptance_criteria": ["", ""], + "files": ["", ""] + } +] + +Feature Lead: +%s`, string(leadJSON)) + + raw, err := e.agentClient.Chat(ctx, e.agentNames.TaskBreaker, message, nil) + if err != nil { + return nil, fmt.Errorf("agent call: %w", err) + } + + jsonStr, err := jsonutil.ExtractJSON(raw) + if err != nil { + return nil, fmt.Errorf("extracting JSON from agent response: %w", err) + } + + var tasks []Task + if err := json.Unmarshal([]byte(jsonStr), &tasks); err != nil { + return nil, fmt.Errorf("parsing tasks JSON: %w", err) + } + if len(tasks) == 0 { + return nil, fmt.Errorf("agent returned an empty task list") + } + return tasks, nil +} + +// ── Step 2: Per-task TDD cycle ──────────────────────────────────────────────── + +// processTask runs the full TDD cycle for a single task: +// write tests → implement → verify → (optional retry) → verify. +func (e *Engine) processTask(ctx context.Context, lead orchestrator.FeatureLead, task Task, repo string) (*TaskResult, error) { + tr := &TaskResult{Task: task} + + // Build the ephemeral MCP server config for code-mcp, scoped to this branch. + mcpServers := e.branchMCPServers(repo, lead.BranchName) + + // 2a. Write Tests + testWriterOutput, err := e.writeTests(ctx, lead, task, mcpServers) + if err != nil { + return nil, fmt.Errorf("test-writer: %w", err) + } + tr.TestWriterOutput = testWriterOutput + + // 2b. Implement + coderOutput, err := e.implementTask(ctx, lead, task, mcpServers, "") + if err != nil { + return nil, fmt.Errorf("coder: %w", err) + } + tr.CoderOutput = coderOutput + + // 2c. Verify + execResult, err := e.verifyTask(repo, lead) + if err != nil { + // No test registered or code-mcp unreachable — not fatal, just log. + log.Printf("featurelead: verify task %q: %v", task.Name, err) + tr.Passed = false + return tr, nil + } + tr.Verification = execResult + + if execResult.ExitCode == 0 && !execResult.TimedOut { + tr.Passed = true + return tr, nil + } + + // 2d. Retry — coder gets one more attempt with failure output. + failureContext := formatVerificationFailure(execResult) + retryOutput, err := e.implementTask(ctx, lead, task, mcpServers, failureContext) + if err != nil { + return nil, fmt.Errorf("coder retry: %w", err) + } + tr.RetryCoderOutput = retryOutput + + // Re-verify after retry. + retryResult, err := e.verifyTask(repo, lead) + if err != nil { + log.Printf("featurelead: retry verify task %q: %v", task.Name, err) + tr.Passed = false + return tr, nil + } + tr.RetryVerification = retryResult + tr.Passed = retryResult.ExitCode == 0 && !retryResult.TimedOut + + return tr, nil +} + +// writeTests invokes the test-writer agent. The agent has access to the code-mcp +// MCP server (for reading code and calling register_test). +func (e *Engine) writeTests( + ctx context.Context, + lead orchestrator.FeatureLead, + task Task, + mcpServers []agent.MCPServer, +) (string, error) { + taskJSON, _ := json.Marshal(task) + requiredJSON, _ := json.Marshal(lead.RequiredContracts) + providedJSON, _ := json.Marshal(lead.ProvidedContracts) + + message := fmt.Sprintf(`You are an expert test engineer. Write comprehensive tests for the following task. + +Your goals: +1. Read the existing codebase to understand the project structure, conventions, and testing patterns. +2. Write tests that verify the acceptance criteria for this task. + +Important: +- Write test FILES using the create_file or search_and_replace tools. +- The tests should initially FAIL (since the implementation doesn't exist yet). +- Focus on testing the public interface / behavior, not implementation details. + +Task: +%s + +Required contracts (contracts this feature consumes): +%s + +Provided contracts (contracts this feature implements): +%s + +Branch: %s`, + string(taskJSON), + string(requiredJSON), + string(providedJSON), + lead.BranchName, + ) + + return e.agentClient.Chat(ctx, e.agentNames.TestWriter, message, mcpServers) +} + +// implementTask invokes the coder agent. If failureContext is non-empty, it is +// appended to the prompt so the coder can learn from the previous failed attempt. +func (e *Engine) implementTask( + ctx context.Context, + lead orchestrator.FeatureLead, + task Task, + mcpServers []agent.MCPServer, + failureContext string, +) (string, error) { + taskJSON, _ := json.Marshal(task) + requiredJSON, _ := json.Marshal(lead.RequiredContracts) + providedJSON, _ := json.Marshal(lead.ProvidedContracts) + + message := fmt.Sprintf(`You are an expert software engineer. Implement the following task exactly as specified. +Adhere strictly to all provided contracts – do not deviate from schemas or APIs. + +Task: +%s + +Required contracts (contracts this feature consumes – do NOT change their definitions): +%s + +Provided contracts (contracts this feature implements – your implementation must satisfy them exactly): +%s + +Branch: %s + +Implement the task by writing code using the available file tools. Make sure all tests pass.`, + string(taskJSON), + string(requiredJSON), + string(providedJSON), + lead.BranchName, + ) + + if failureContext != "" { + message += fmt.Sprintf(` + +IMPORTANT: A previous implementation attempt failed the tests. Here is the test output: + +%s + +Fix the implementation so the tests pass.`, failureContext) + } + + return e.agentClient.Chat(ctx, e.agentNames.Coder, message, mcpServers) +} + +// verifyTask calls code-mcp's REST API to run the registered test. +func (e *Engine) verifyTask(repo string, lead orchestrator.FeatureLead) (*ExecResult, error) { + return e.codeMCPClient.RunTest(repo, lead.BranchName) +} + +// branchMCPServers returns the ephemeral MCP server config for injecting +// code-mcp (scoped to the feature's branch) into agent calls. +// Both the read and write profiles are included so agents can read the +// codebase and make changes. +func (e *Engine) branchMCPServers(repo, branchName string) []agent.MCPServer { + if e.codeMCPURL == "" { + return nil + } + base := fmt.Sprintf("%s/%s/%s", e.codeMCPURL, repo, branchName) + return []agent.MCPServer{ + { + Name: "dev-read", + URL: base + "/read/mcp", + Transport: "streamable-http", + }, + { + Name: "dev-write", + URL: base + "/write/mcp", + Transport: "streamable-http", + }, + } +} + +// formatVerificationFailure builds a human-readable summary of a test failure +// for inclusion in the coder's retry prompt. +func formatVerificationFailure(r *ExecResult) string { + if r.TimedOut { + return fmt.Sprintf("Tests TIMED OUT.\nstdout:\n%s\nstderr:\n%s", r.Stdout, r.Stderr) + } + return fmt.Sprintf("Tests FAILED (exit code %d).\nstdout:\n%s\nstderr:\n%s", r.ExitCode, r.Stdout, r.Stderr) +} + +// resolveMergeConflict asks the coder agent to resolve a merge conflict by +// editing the conflicted files in the feature branch. +func (e *Engine) resolveMergeConflict( + ctx context.Context, + lead orchestrator.FeatureLead, + conflictOutput string, + repo string, +) (string, error) { + mcpServers := e.branchMCPServers(repo, lead.BranchName) + + message := fmt.Sprintf(`You are an expert software engineer. A git merge of branch %q into %q produced conflicts. +You must resolve the conflicts by editing the conflicted files in branch %q. + +Use the available file tools to: +1. Find all files with conflict markers (<<<<<<, =======, >>>>>>>). +2. Resolve each conflict sensibly, preserving intent from both sides. +3. Remove all conflict markers. + +Merge conflict output: +%s`, + lead.BranchName, lead.OrchBranchName, lead.BranchName, conflictOutput, + ) + + return e.agentClient.Chat(ctx, e.agentNames.Coder, message, mcpServers) +} + +// MergeConflictFailedError is returned by Engine.Run when a merge conflict +// persists even after the coder's resolution attempt. It carries the partial +// Result so callers can still inspect completed task work. +type MergeConflictFailedError struct { + Result *Result + ConflictOutput string +} + +func (e *MergeConflictFailedError) Error() string { + return fmt.Sprintf("merge conflict persists after resolution attempt: %s", e.ConflictOutput) +} diff --git a/featurelead/featurelead_test.go b/featurelead/featurelead_test.go new file mode 100644 index 0000000..e8b8b23 --- /dev/null +++ b/featurelead/featurelead_test.go @@ -0,0 +1,657 @@ +package featurelead + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "testing" + + "github.com/iamangus/OpenDev/agent" + "github.com/iamangus/OpenDev/orchestrator" +) + +// ── Test doubles ────────────────────────────────────────────────────────────── + +// mockAgentClient is a test double for agent.Client that replays a pre-loaded +// sequence of responses. Calls beyond the sequence return an error. +type mockAgentClient struct { + responses []string + calls []mockCall + callIdx int +} + +type mockCall struct { + agentName string + message string + mcpServers []agent.MCPServer +} + +func (m *mockAgentClient) Chat(_ context.Context, agentName string, message string, mcpServers []agent.MCPServer) (string, error) { + if m.callIdx >= len(m.responses) { + return "", fmt.Errorf("mockAgentClient: unexpected call #%d (only %d responses configured)", m.callIdx+1, len(m.responses)) + } + m.calls = append(m.calls, mockCall{agentName: agentName, message: message, mcpServers: mcpServers}) + resp := m.responses[m.callIdx] + m.callIdx++ + return resp, nil +} + +// mockCodeMCPClient is a test double for CodeMCPClient. +type mockCodeMCPClient struct { + runTestResults []*ExecResult + runTestErrors []error + runTestIdx int + createCalls []createBranchCall + mergeCalls []mergeBranchCall + mergeResults []mergeResult // indexed by call order + mergeIdx int +} + +type createBranchCall struct { + repo, branch, base string +} + +type mergeBranchCall struct { + repo, source, target string +} + +type mergeResult struct { + conflictOutput string + err error +} + +func (m *mockCodeMCPClient) CreateBranch(repo, branch, base string) error { + m.createCalls = append(m.createCalls, createBranchCall{repo, branch, base}) + return nil +} + +func (m *mockCodeMCPClient) RunTest(repo, branch string) (*ExecResult, error) { + if m.runTestIdx >= len(m.runTestResults) { + return nil, fmt.Errorf("mockCodeMCPClient: unexpected RunTest call #%d", m.runTestIdx+1) + } + result := m.runTestResults[m.runTestIdx] + var err error + if m.runTestErrors != nil && m.runTestIdx < len(m.runTestErrors) { + err = m.runTestErrors[m.runTestIdx] + } + m.runTestIdx++ + return result, err +} + +func (m *mockCodeMCPClient) MergeBranch(repo, sourceBranch, targetBranch string) (string, error) { + m.mergeCalls = append(m.mergeCalls, mergeBranchCall{repo, sourceBranch, targetBranch}) + if m.mergeIdx >= len(m.mergeResults) { + // Default: clean merge + return "", nil + } + r := m.mergeResults[m.mergeIdx] + m.mergeIdx++ + return r.conflictOutput, r.err +} + +func (m *mockCodeMCPClient) GetCommits(repo, branch string) ([]CommitInfo, error) { + return nil, nil +} + +func (m *mockCodeMCPClient) CreatePR(repo, title, head, base, body string) (int, string, error) { + return 0, "", nil +} + +func (m *mockCodeMCPClient) UpdatePR(repo string, number int, body string) error { + return nil +} + +func (m *mockCodeMCPClient) PromotePR(repo string, number int) error { + return nil +} + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +// mustJSON serialises v to a JSON string and panics on error (test helper). +func mustJSON(v any) string { + b, err := json.Marshal(v) + if err != nil { + panic(err) + } + return string(b) +} + +// testAgentNames returns a fixed set of agent names for use in tests. +func testAgentNames() AgentNames { + return AgentNames{ + TaskBreaker: "task-breaker", + TestWriter: "test-writer", + Coder: "coder", + } +} + +// testFeatureLead returns a minimal FeatureLead for use in tests. +func testFeatureLead() orchestrator.FeatureLead { + return orchestrator.FeatureLead{ + Feature: orchestrator.Feature{ + Name: "User Authentication", + Description: "Handles user registration and login", + Domain: "identity", + }, + RequiredContracts: []orchestrator.Contract{}, + ProvidedContracts: []orchestrator.Contract{ + { + Name: "UserAuthAPI", + Type: orchestrator.ContractTypeAPI, + Definition: "POST /auth/login -> {token: string}", + ProvidedBy: "User Authentication", + }, + }, + BranchName: "feature-user-auth", + } +} + +// testFeatureLeadWithOrch is like testFeatureLead but with OrchBranchName set, +// enabling the merge step in Engine.Run. +func testFeatureLeadWithOrch() orchestrator.FeatureLead { + lead := testFeatureLead() + lead.OrchBranchName = "orch-e-commerce" + return lead +} + +func newTestEngine(agentClient agent.Client, codeMCPClient CodeMCPClient) *Engine { + return New(agentClient, testAgentNames(), codeMCPClient, "http://code-mcp:8080") +} + +// ── Full workflow ───────────────────────────────────────────────────────────── + +func TestRun_FullWorkflow_AllTestsPass(t *testing.T) { + tasks := []Task{ + { + Name: "Implement POST /auth/login handler", + Description: "Create the HTTP handler for the login endpoint", + AcceptanceCriteria: []string{"Returns JWT on valid credentials", "Returns 401 on invalid credentials"}, + Files: []string{"auth/handler.go", "auth/handler_test.go"}, + }, + { + Name: "Implement user registration", + Description: "Create the HTTP handler for user registration", + AcceptanceCriteria: []string{"Hashes password before storage", "Returns 201 on success"}, + Files: []string{"auth/register.go", "auth/register_test.go"}, + }, + } + + // Agent responses: task-breakdown, then per task: test-writer + coder + responses := []string{ + mustJSON(tasks), // Step 1 – task breakdown + "// tests for login handler", // Step 2a – test-writer for task 1 + "// login handler implementation", // Step 2b – coder for task 1 + "// tests for user registration", // Step 2a – test-writer for task 2 + "// register implementation", // Step 2b – coder for task 2 + } + + codeMCPClient := &mockCodeMCPClient{ + runTestResults: []*ExecResult{ + {ExitCode: 0}, // task 1 passes + {ExitCode: 0}, // task 2 passes + }, + } + + client := &mockAgentClient{responses: responses} + engine := newTestEngine(client, codeMCPClient) + lead := testFeatureLead() + + result, err := engine.Run(context.Background(), lead, "test-repo", nil, nil) + if err != nil { + t.Fatalf("Run returned error: %v", err) + } + + // FeatureLead should be preserved. + if result.FeatureLead.Feature.Name != lead.Feature.Name { + t.Errorf("unexpected feature name: %q", result.FeatureLead.Feature.Name) + } + + // Tasks should match the fixture. + if len(result.Tasks) != len(tasks) { + t.Fatalf("expected %d tasks, got %d", len(tasks), len(result.Tasks)) + } + + // TaskResults should be one per task, all passing. + if len(result.TaskResults) != len(tasks) { + t.Fatalf("expected %d task results, got %d", len(tasks), len(result.TaskResults)) + } + for i, tr := range result.TaskResults { + if !tr.Passed { + t.Errorf("task %d (%q) should have passed", i, tr.Task.Name) + } + if tr.TestWriterOutput == "" { + t.Errorf("task %d: expected test-writer output", i) + } + if tr.CoderOutput == "" { + t.Errorf("task %d: expected coder output", i) + } + } + + // All agent responses should have been consumed. + if client.callIdx != len(responses) { + t.Errorf("expected %d agent calls, got %d", len(responses), client.callIdx) + } + + // Verify call order: task-breaker, then alternating test-writer + coder. + if client.calls[0].agentName != testAgentNames().TaskBreaker { + t.Errorf("call 0: expected %q, got %q", testAgentNames().TaskBreaker, client.calls[0].agentName) + } + for i := 1; i < len(client.calls); i += 2 { + if client.calls[i].agentName != testAgentNames().TestWriter { + t.Errorf("call %d: expected %q, got %q", i, testAgentNames().TestWriter, client.calls[i].agentName) + } + if i+1 < len(client.calls) && client.calls[i+1].agentName != testAgentNames().Coder { + t.Errorf("call %d: expected %q, got %q", i+1, testAgentNames().Coder, client.calls[i+1].agentName) + } + } + + // Verify MCP servers are injected for test-writer and coder calls (not task-breaker). + if len(client.calls[0].mcpServers) != 0 { + t.Errorf("task-breaker should have no MCP servers injected, got %d", len(client.calls[0].mcpServers)) + } + for i := 1; i < len(client.calls); i++ { + if len(client.calls[i].mcpServers) != 2 { + t.Errorf("call %d: expected 2 MCP servers injected (read+write), got %d", i, len(client.calls[i].mcpServers)) + } else { + if client.calls[i].mcpServers[0].Name != "dev-read" { + t.Errorf("call %d: expected MCP server[0] name 'dev-read', got %q", i, client.calls[i].mcpServers[0].Name) + } + if client.calls[i].mcpServers[1].Name != "dev-write" { + t.Errorf("call %d: expected MCP server[1] name 'dev-write', got %q", i, client.calls[i].mcpServers[1].Name) + } + } + } +} + +func TestRun_RetryOnTestFailure(t *testing.T) { + tasks := []Task{ + {Name: "T1", Description: "desc"}, + } + + // task-breakdown, test-writer, coder (attempt 1), coder (retry) + responses := []string{ + mustJSON(tasks), + "// tests", + "// first attempt", + "// retry attempt", + } + + codeMCPClient := &mockCodeMCPClient{ + runTestResults: []*ExecResult{ + {ExitCode: 1, Stdout: "FAIL", Stderr: "assertion error"}, // first verify fails + {ExitCode: 0}, // retry verify passes + }, + } + + client := &mockAgentClient{responses: responses} + engine := newTestEngine(client, codeMCPClient) + + result, err := engine.Run(context.Background(), testFeatureLead(), "test-repo", nil, nil) + if err != nil { + t.Fatalf("Run returned error: %v", err) + } + + if len(result.TaskResults) != 1 { + t.Fatalf("expected 1 task result, got %d", len(result.TaskResults)) + } + + tr := result.TaskResults[0] + if tr.Passed != true { + t.Error("task should pass after retry") + } + if tr.RetryCoderOutput == "" { + t.Error("expected retry coder output") + } + if tr.Verification == nil || tr.Verification.ExitCode != 1 { + t.Error("first verification should show failure") + } + if tr.RetryVerification == nil || tr.RetryVerification.ExitCode != 0 { + t.Error("retry verification should show success") + } + + // Verify the retry coder call includes failure context. + // calls: [0]=task-breaker, [1]=test-writer, [2]=coder, [3]=retry-coder + if len(client.calls) != 4 { + t.Fatalf("expected 4 agent calls, got %d", len(client.calls)) + } + retryMsg := client.calls[3].message + if retryMsg == client.calls[2].message { + t.Error("retry message should differ from first attempt (should include failure context)") + } +} + +func TestRun_RetryStillFails(t *testing.T) { + tasks := []Task{ + {Name: "T1", Description: "desc"}, + } + + responses := []string{ + mustJSON(tasks), + "// tests", + "// first attempt", + "// retry attempt", + } + + codeMCPClient := &mockCodeMCPClient{ + runTestResults: []*ExecResult{ + {ExitCode: 1, Stderr: "fail"}, // first verify fails + {ExitCode: 1, Stderr: "fail 2"}, // retry also fails + }, + } + + client := &mockAgentClient{responses: responses} + engine := newTestEngine(client, codeMCPClient) + + result, err := engine.Run(context.Background(), testFeatureLead(), "test-repo", nil, nil) + if err != nil { + t.Fatalf("Run returned error: %v", err) + } + + tr := result.TaskResults[0] + if tr.Passed { + t.Error("task should be marked as failed when retry also fails") + } +} + +func TestRun_NoTestRegistered(t *testing.T) { + tasks := []Task{ + {Name: "T1", Description: "desc"}, + } + + responses := []string{ + mustJSON(tasks), + "// tests (but didn't register)", + "// implementation", + } + + codeMCPClient := &mockCodeMCPClient{ + runTestResults: []*ExecResult{nil}, + runTestErrors: []error{ErrNoTestRegistered}, + } + + client := &mockAgentClient{responses: responses} + engine := newTestEngine(client, codeMCPClient) + + result, err := engine.Run(context.Background(), testFeatureLead(), "test-repo", nil, nil) + if err != nil { + t.Fatalf("Run returned error: %v", err) + } + + tr := result.TaskResults[0] + if tr.Passed { + t.Error("task should not pass when no test is registered") + } + if tr.Verification != nil { + t.Error("verification should be nil when no test is registered") + } +} + +func TestRun_AgentErrorPropagates(t *testing.T) { + client := &mockAgentClient{responses: []string{}} // no responses → first call fails + codeMCPClient := &mockCodeMCPClient{} + engine := newTestEngine(client, codeMCPClient) + + _, err := engine.Run(context.Background(), testFeatureLead(), "test-repo", nil, nil) + if err == nil { + t.Fatal("expected an error when agent has no responses, got nil") + } +} + +func TestRun_EmptyTaskList(t *testing.T) { + client := &mockAgentClient{responses: []string{"[]"}} // task-breaker returns empty list + codeMCPClient := &mockCodeMCPClient{} + engine := newTestEngine(client, codeMCPClient) + + _, err := engine.Run(context.Background(), testFeatureLead(), "test-repo", nil, nil) + if err == nil { + t.Fatal("expected error for empty task list, got nil") + } +} + +// ── breakdownTasks (unit) ───────────────────────────────────────────────────── + +func TestBreakdownTasks_StripsMarkdown(t *testing.T) { + tasks := []Task{ + {Name: "T1", Description: "desc", AcceptanceCriteria: []string{"ac1"}, Files: []string{"f.go"}}, + } + wrapped := fmt.Sprintf("```json\n%s\n```", mustJSON(tasks)) + + client := &mockAgentClient{responses: []string{wrapped}} + codeMCPClient := &mockCodeMCPClient{} + engine := newTestEngine(client, codeMCPClient) + + got, err := engine.breakdownTasks(context.Background(), testFeatureLead()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(got) != 1 || got[0].Name != "T1" { + t.Errorf("unexpected tasks: %v", got) + } +} + +// ── MCP server injection ───────────────────────────────────────────────────── + +func TestBranchMCPServers(t *testing.T) { + engine := &Engine{ + codeMCPURL: "http://code-mcp:8080", + } + servers := engine.branchMCPServers("my-repo", "feature-auth") + if len(servers) != 2 { + t.Fatalf("expected 2 servers, got %d", len(servers)) + } + if servers[0].Name != "dev-read" { + t.Errorf("unexpected name for servers[0]: %q", servers[0].Name) + } + if servers[0].URL != "http://code-mcp:8080/my-repo/feature-auth/read/mcp" { + t.Errorf("unexpected URL for servers[0]: %q", servers[0].URL) + } + if servers[1].Name != "dev-write" { + t.Errorf("unexpected name for servers[1]: %q", servers[1].Name) + } + if servers[1].URL != "http://code-mcp:8080/my-repo/feature-auth/write/mcp" { + t.Errorf("unexpected URL for servers[1]: %q", servers[1].URL) + } +} + +func TestBranchMCPServers_Empty(t *testing.T) { + engine := &Engine{codeMCPURL: ""} + servers := engine.branchMCPServers("my-repo", "feature-auth") + if servers != nil { + t.Errorf("expected nil servers when codeMCPURL is empty, got %v", servers) + } +} + +// ── orchestrator + featurelead integration ──────────────────────────────────── + +// contractsFixture mirrors the JSON shape expected by the orchestrator's +// contract-definition step without depending on its unexported type. +type contractsFixture struct { + Contracts []orchestrator.Contract `json:"contracts"` + FeatureAssignments map[string]featureAssignmentFixture `json:"feature_assignments"` +} + +type featureAssignmentFixture struct { + RequiredContracts []string `json:"required_contracts"` + ProvidedContracts []string `json:"provided_contracts"` +} + +// TestOrchestratorEngineIntegration verifies that the orchestrator produces +// FeatureLeads and that the feature lead engine can consume them correctly. +func TestOrchestratorEngineIntegration(t *testing.T) { + // Build orchestrator fixture that produces one FeatureLead. + spec := orchestrator.SystemSpec{Title: "Test", Description: "D"} + features := []orchestrator.Feature{ + {Name: "Auth", Description: "auth feature", Domain: "identity"}, + } + contractsResp := contractsFixture{ + Contracts: []orchestrator.Contract{}, + FeatureAssignments: map[string]featureAssignmentFixture{ + "Auth": {RequiredContracts: []string{}, ProvidedContracts: []string{}}, + }, + } + + orchClient := &mockAgentClient{responses: []string{ + mustJSON(spec), + mustJSON(features), + mustJSON(contractsResp), + "orch-test", // orch branch name for the run + "feature-auth", // branch name for Auth + }} + orch := orchestrator.New(orchClient, orchestrator.AgentNames{ + Spec: "spec", Feature: "feature", Contract: "contract", Branch: "branch", + }, "", "", nil) + + orchResult, err := orch.Run(context.Background(), "build auth") + if err != nil { + t.Fatalf("orchestrator.Run returned error: %v", err) + } + if len(orchResult.FeatureLeads) != 1 { + t.Fatalf("expected 1 feature lead, got %d", len(orchResult.FeatureLeads)) + } + + // Now hand the FeatureLead off to the feature lead engine. + flTasks := []Task{{Name: "T1", Description: "desc"}} + flClient := &mockAgentClient{responses: []string{ + mustJSON(flTasks), // task breakdown + "test writer out", // test-writer for T1 + "coder output", // coder for T1 + }} + codeMCPClient := &mockCodeMCPClient{ + runTestResults: []*ExecResult{{ExitCode: 0}}, + } + engine := newTestEngine(flClient, codeMCPClient) + + flResult, err := engine.Run(context.Background(), orchResult.FeatureLeads[0], "test-repo", nil, nil) + if err != nil { + t.Fatalf("engine.Run returned error: %v", err) + } + + if flResult.FeatureLead.Feature.Name != "Auth" { + t.Errorf("unexpected feature lead name: %q", flResult.FeatureLead.Feature.Name) + } + if len(flResult.Tasks) != 1 || flResult.Tasks[0].Name != "T1" { + t.Errorf("unexpected tasks: %v", flResult.Tasks) + } + if len(flResult.TaskResults) != 1 || flResult.TaskResults[0].CoderOutput != "coder output" { + t.Errorf("unexpected task results: %v", flResult.TaskResults) + } + if !flResult.TaskResults[0].Passed { + t.Error("task should have passed") + } +} + +// ── Merge tests ─────────────────────────────────────────────────────────────── + +func TestRun_MergeSucceeds(t *testing.T) { + tasks := []Task{{Name: "T1", Description: "desc"}} + responses := []string{ + mustJSON(tasks), + "// tests", + "// impl", + } + codeMCPClient := &mockCodeMCPClient{ + runTestResults: []*ExecResult{{ExitCode: 0}}, + // mergeResults left empty → default clean merge + } + + client := &mockAgentClient{responses: responses} + engine := newTestEngine(client, codeMCPClient) + lead := testFeatureLeadWithOrch() + + result, err := engine.Run(context.Background(), lead, "test-repo", nil, nil) + if err != nil { + t.Fatalf("Run returned error: %v", err) + } + if len(result.TaskResults) != 1 || !result.TaskResults[0].Passed { + t.Error("task should have passed") + } + if result.MergeConflictOutput != "" { + t.Error("expected no merge conflict output on clean merge") + } + // MergeBranch should have been called once. + if len(codeMCPClient.mergeCalls) != 1 { + t.Fatalf("expected 1 merge call, got %d", len(codeMCPClient.mergeCalls)) + } + mc := codeMCPClient.mergeCalls[0] + if mc.source != lead.BranchName || mc.target != lead.OrchBranchName { + t.Errorf("merge call: got source=%q target=%q, want %q→%q", + mc.source, mc.target, lead.BranchName, lead.OrchBranchName) + } +} + +func TestRun_MergeConflictResolvedByCoder(t *testing.T) { + tasks := []Task{{Name: "T1", Description: "desc"}} + // responses: task-breakdown, test-writer, coder, conflict-resolver (coder again) + responses := []string{ + mustJSON(tasks), + "// tests", + "// impl", + "// conflict resolution", + } + codeMCPClient := &mockCodeMCPClient{ + runTestResults: []*ExecResult{{ExitCode: 0}}, + mergeResults: []mergeResult{ + {conflictOutput: "CONFLICT: merge conflict in foo.go"}, // first attempt: conflict + {conflictOutput: ""}, // retry: clean + }, + } + + client := &mockAgentClient{responses: responses} + engine := newTestEngine(client, codeMCPClient) + lead := testFeatureLeadWithOrch() + + result, err := engine.Run(context.Background(), lead, "test-repo", nil, nil) + if err != nil { + t.Fatalf("Run returned error: %v", err) + } + if result.MergeConflictOutput == "" { + t.Error("expected MergeConflictOutput to be set") + } + if result.MergeResolutionOutput == "" { + t.Error("expected MergeResolutionOutput to be set") + } + if len(codeMCPClient.mergeCalls) != 2 { + t.Fatalf("expected 2 merge calls (attempt + retry), got %d", len(codeMCPClient.mergeCalls)) + } + // The coder should have been called 4 times: task-breaker, test-writer, coder, resolver. + if client.callIdx != 4 { + t.Errorf("expected 4 agent calls, got %d", client.callIdx) + } +} + +func TestRun_MergeConflictPersists(t *testing.T) { + tasks := []Task{{Name: "T1", Description: "desc"}} + responses := []string{ + mustJSON(tasks), + "// tests", + "// impl", + "// attempted conflict resolution", + } + codeMCPClient := &mockCodeMCPClient{ + runTestResults: []*ExecResult{{ExitCode: 0}}, + mergeResults: []mergeResult{ + {conflictOutput: "CONFLICT: foo.go"}, // first attempt: conflict + {conflictOutput: "CONFLICT: foo.go"}, // retry: still conflict + }, + } + + client := &mockAgentClient{responses: responses} + engine := newTestEngine(client, codeMCPClient) + lead := testFeatureLeadWithOrch() + + _, err := engine.Run(context.Background(), lead, "test-repo", nil, nil) + if err == nil { + t.Fatal("expected error when merge conflict persists, got nil") + } + var conflictErr *MergeConflictFailedError + if !errors.As(err, &conflictErr) { + t.Fatalf("expected *MergeConflictFailedError, got %T: %v", err, err) + } + if conflictErr.Result == nil { + t.Error("MergeConflictFailedError.Result should not be nil") + } + if len(conflictErr.Result.TaskResults) != 1 { + t.Errorf("expected 1 task result in partial result, got %d", len(conflictErr.Result.TaskResults)) + } +} diff --git a/featurelead/store.go b/featurelead/store.go new file mode 100644 index 0000000..813d449 --- /dev/null +++ b/featurelead/store.go @@ -0,0 +1,187 @@ +package featurelead + +import ( + "sync" + "time" + + "github.com/iamangus/OpenDev/orchestrator" +) + +// JobStatus represents the lifecycle state of a feature lead job. +type JobStatus string + +const ( + JobStatusPending JobStatus = "pending" + JobStatusRunning JobStatus = "running" + JobStatusDone JobStatus = "done" + JobStatusFailed JobStatus = "failed" +) + +// MergeStatus represents the outcome of merging the feature branch into the +// orchestration branch. +type MergeStatus string + +const ( + MergeStatusPending MergeStatus = "pending" + MergeStatusMerged MergeStatus = "merged" + MergeStatusConflictFailed MergeStatus = "conflict_failed" +) + +// Job holds the state of a single feature lead execution. +type Job struct { + ID string `json:"id"` + FeatureLead orchestrator.FeatureLead `json:"feature_lead"` + Status JobStatus `json:"status"` + // Tasks is populated after the task-breakdown phase completes, before TDD + // cycles begin. It allows consumers to see the planned work while the job + // is still running. + Tasks []Task `json:"tasks,omitempty"` + // TaskResults grows as each TDD cycle completes. Consumers can track + // progress in real time by polling while Status is "running". + TaskResults []TaskResult `json:"task_results,omitempty"` + Result *Result `json:"result,omitempty"` + Error string `json:"error,omitempty"` + // MergeStatus records the outcome of merging the feature branch into the + // orchestration branch. Stays "pending" until the merge is attempted. + MergeStatus MergeStatus `json:"merge_status,omitempty"` + // MergeError holds the conflict output when MergeStatus is "conflict_failed". + MergeError string `json:"merge_error,omitempty"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +// Store is a thread-safe in-memory store for feature lead jobs. +type Store struct { + mu sync.RWMutex + jobs map[string]*Job +} + +// NewStore creates a new empty Store. +func NewStore() *Store { + return &Store{ + jobs: make(map[string]*Job), + } +} + +// Create registers a new job for the given FeatureLead with status pending and +// returns a copy of the created Job. +func (s *Store) Create(id string, lead orchestrator.FeatureLead) Job { + now := time.Now() + mergeStatus := MergeStatus("") + if lead.OrchBranchName != "" { + mergeStatus = MergeStatusPending + } + job := &Job{ + ID: id, + FeatureLead: lead, + Status: JobStatusPending, + MergeStatus: mergeStatus, + CreatedAt: now, + UpdatedAt: now, + } + s.mu.Lock() + s.jobs[id] = job + s.mu.Unlock() + return *job +} + +// SetRunning transitions a job to the running state. +func (s *Store) SetRunning(id string) { + s.mu.Lock() + if j, ok := s.jobs[id]; ok { + j.Status = JobStatusRunning + j.UpdatedAt = time.Now() + } + s.mu.Unlock() +} + +// SetDone transitions a job to the done state and attaches its result. +func (s *Store) SetDone(id string, result *Result) { + s.mu.Lock() + if j, ok := s.jobs[id]; ok { + j.Status = JobStatusDone + j.Result = result + j.UpdatedAt = time.Now() + } + s.mu.Unlock() +} + +// SetTasks stores the planned task list after the breakdown phase completes. +// It is called while the job is still running, before any TDD cycle begins. +func (s *Store) SetTasks(id string, tasks []Task) { + s.mu.Lock() + if j, ok := s.jobs[id]; ok { + j.Tasks = tasks + j.UpdatedAt = time.Now() + } + s.mu.Unlock() +} + +// AppendTaskResult appends the result of a completed TDD cycle to the job. +// It is called once per task as each cycle finishes, while the job is running. +func (s *Store) AppendTaskResult(id string, tr TaskResult) { + s.mu.Lock() + if j, ok := s.jobs[id]; ok { + j.TaskResults = append(j.TaskResults, tr) + j.UpdatedAt = time.Now() + } + s.mu.Unlock() +} + +// SetFailed transitions a job to the failed state and records the error message. +func (s *Store) SetFailed(id string, err error) { + s.mu.Lock() + if j, ok := s.jobs[id]; ok { + j.Status = JobStatusFailed + j.Error = err.Error() + j.UpdatedAt = time.Now() + } + s.mu.Unlock() +} + +// SetMerged records that the feature branch was successfully merged into the +// orchestration branch. The job status is not changed by this call — the job +// may already be Done or still running. +func (s *Store) SetMerged(id string) { + s.mu.Lock() + if j, ok := s.jobs[id]; ok { + j.MergeStatus = MergeStatusMerged + j.UpdatedAt = time.Now() + } + s.mu.Unlock() +} + +// SetMergeConflictFailed records that the feature branch could not be merged +// due to a persistent conflict and marks the job as failed. +func (s *Store) SetMergeConflictFailed(id, conflictOutput string) { + s.mu.Lock() + if j, ok := s.jobs[id]; ok { + j.Status = JobStatusFailed + j.MergeStatus = MergeStatusConflictFailed + j.MergeError = conflictOutput + j.UpdatedAt = time.Now() + } + s.mu.Unlock() +} + +// Get returns a copy of the job with the given ID, or false if not found. +func (s *Store) Get(id string) (Job, bool) { + s.mu.RLock() + j, ok := s.jobs[id] + s.mu.RUnlock() + if !ok { + return Job{}, false + } + return *j, true +} + +// List returns a snapshot of all jobs. +func (s *Store) List() []Job { + s.mu.RLock() + out := make([]Job, 0, len(s.jobs)) + for _, j := range s.jobs { + out = append(out, *j) + } + s.mu.RUnlock() + return out +} diff --git a/internal/jsonutil/jsonutil.go b/internal/jsonutil/jsonutil.go new file mode 100644 index 0000000..0080c68 --- /dev/null +++ b/internal/jsonutil/jsonutil.go @@ -0,0 +1,64 @@ +// Package jsonutil provides shared JSON extraction helpers used by multiple +// packages in OpenDev. It tolerates AI model responses that wrap JSON output +// in markdown code fences. +package jsonutil + +import ( + "fmt" + "strings" +) + +// ExtractJSON strips optional markdown code fences from s and returns the first +// valid JSON object or array found within it. +func ExtractJSON(s string) (string, error) { + s = strings.TrimSpace(s) + + // Strip a markdown code fence if present. + if inner, ok := stripMarkdownFence(s); ok { + s = strings.TrimSpace(inner) + } + + start := strings.IndexAny(s, "{[") + if start == -1 { + return "", fmt.Errorf("no JSON object or array found in response") + } + + opener := rune(s[start]) + closer := '}' + if opener == '[' { + closer = ']' + } + + end := strings.LastIndexAny(s, "}]") + if end == -1 || end < start { + return "", fmt.Errorf("no closing delimiter found in response") + } + + if rune(s[end]) != closer { + needle := string(closer) + end = strings.LastIndex(s, needle) + if end == -1 || end < start { + return "", fmt.Errorf("mismatched JSON delimiters in response") + } + } + + return s[start : end+1], nil +} + +// stripMarkdownFence attempts to unwrap a markdown ```[lang] ... ``` block. +// It returns the inner content and true when a fence was found. +func stripMarkdownFence(s string) (string, bool) { + if !strings.HasPrefix(s, "```") { + return "", false + } + rest := s[3:] + // Skip optional language tag on the opening line (e.g. "json\n"). + if idx := strings.Index(rest, "\n"); idx != -1 { + rest = rest[idx+1:] + } + // Find the closing ```. + if idx := strings.LastIndex(rest, "```"); idx != -1 { + return strings.TrimSpace(rest[:idx]), true + } + return "", false +} diff --git a/internal/jsonutil/jsonutil_test.go b/internal/jsonutil/jsonutil_test.go new file mode 100644 index 0000000..281b712 --- /dev/null +++ b/internal/jsonutil/jsonutil_test.go @@ -0,0 +1,82 @@ +package jsonutil + +import ( + "testing" +) + +func TestExtractJSON_PlainObject(t *testing.T) { + input := `{"foo":"bar"}` + got, err := ExtractJSON(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != input { + t.Errorf("expected %q, got %q", input, got) + } +} + +func TestExtractJSON_PlainArray(t *testing.T) { + input := `[{"name":"t1"},{"name":"t2"}]` + got, err := ExtractJSON(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != input { + t.Errorf("expected %q, got %q", input, got) + } +} + +func TestExtractJSON_MarkdownFenceWithLang(t *testing.T) { + input := "```json\n{\"foo\":\"bar\"}\n```" + got, err := ExtractJSON(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + want := `{"foo":"bar"}` + if got != want { + t.Errorf("expected %q, got %q", want, got) + } +} + +func TestExtractJSON_MarkdownFenceNoLang(t *testing.T) { + input := "Here is the JSON:\n```\n[1,2,3]\n```\nDone." + got, err := ExtractJSON(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + want := `[1,2,3]` + if got != want { + t.Errorf("expected %q, got %q", want, got) + } +} + +func TestExtractJSON_MarkdownFenceArray(t *testing.T) { + input := "```json\n[{\"name\":\"t1\"}]\n```" + got, err := ExtractJSON(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + want := `[{"name":"t1"}]` + if got != want { + t.Errorf("expected %q, got %q", want, got) + } +} + +func TestExtractJSON_TrailingText(t *testing.T) { + input := `Here you go: {"key":"value"} and that's it.` + got, err := ExtractJSON(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + want := `{"key":"value"}` + if got != want { + t.Errorf("expected %q, got %q", want, got) + } +} + +func TestExtractJSON_NoJSON(t *testing.T) { + _, err := ExtractJSON("no json here") + if err == nil { + t.Fatal("expected an error, got nil") + } +} diff --git a/main.go b/main.go index 1735cff..153597c 100644 --- a/main.go +++ b/main.go @@ -1,107 +1,159 @@ // OpenDev is an open agentic coding system. // -// The orchestrator drives the process of turning a user's natural-language -// change request into a set of fully-initialised Feature Leads, each equipped -// with a system spec, strict technical contracts, and a dedicated git branch. +// It exposes a unified HTTP API that accepts a natural-language change request, +// runs the orchestrator to produce a system spec and a set of Feature Leads, +// then fires each Feature Lead as a background job. A TUI or web frontend can +// poll the API to follow the status of each job without blocking the initial +// request. +// +// Each orchestration run gets its own isolated MCP endpoint at +// //mcp, keeping ask_user questions scoped to the run that +// raised them. This allows multiple concurrent users without cross-talk. +// +// The repository to use is specified per-run in the POST /api/jobs request body +// (not as a server startup flag), allowing different runs to target different repos. +// +// GitHub PR integration is handled entirely by code-mcp. Set GITHUB_TOKEN and +// GITHUB_OWNER in the code-mcp environment — OpenDev has no GitHub credentials. // // Usage: // -// opendev --request "Add user authentication to the API" \ -// --agent-url http://localhost:8080 \ +// opendev --agent-url http://localhost:8080 \ // --spec-agent spec-writer \ // --feature-agent feature-planner \ // --contract-agent contract-definer \ // --branch-agent branch-namer \ -// [--mcp-port 9090] \ -// [--repo /path/to/local/repo] +// --task-breaker-agent task-breaker \ +// --coder-agent coder \ +// --test-writer-agent test-writer \ +// --code-mcp-url http://localhost:9090 \ +// [--port 3000] \ +// [--url https://example.com] package main import ( "context" - "encoding/json" "flag" "fmt" + "log" + "net/http" "os" "os/signal" "syscall" + "time" "github.com/iamangus/OpenDev/agent" + "github.com/iamangus/OpenDev/api" + "github.com/iamangus/OpenDev/featurelead" mcpserver "github.com/iamangus/OpenDev/mcp" "github.com/iamangus/OpenDev/orchestrator" ) func main() { // ── Flags ───────────────────────────────────────────────────────────────── - request := flag.String("request", "", "Description of the change to make (required)") + port := flag.Int("port", 3000, "Port for the OpenDev API server") agentURL := flag.String("agent-url", "", "Base URL of the agentfile service (required)") - specAgent := flag.String("spec-agent", "", "Name of the spec-generation agent in agentfile (required)") - featureAgent := flag.String("feature-agent", "", "Name of the feature-breakdown agent in agentfile (required)") - contractAgent := flag.String("contract-agent", "", "Name of the contract-definition agent in agentfile (required)") - branchAgent := flag.String("branch-agent", "", "Name of the branch-naming agent in agentfile (required)") - mcpPort := flag.Int("mcp-port", 0, "Port for the ask_user MCP server (default: random free port)") - mcpURL := flag.String("mcp-url", "", "Externally reachable URL of the ask_user MCP server (e.g. http://192.168.1.50:3000); defaults to http://localhost:") - repoPath := flag.String("repo", "", "Path to the local git repository (optional; omit to skip branch creation)") + specAgent := flag.String("spec-agent", "", "Name of the spec-generation agent (required)") + featureAgent := flag.String("feature-agent", "", "Name of the feature-breakdown agent (required)") + contractAgent := flag.String("contract-agent", "", "Name of the contract-definition agent (required)") + branchAgent := flag.String("branch-agent", "", "Name of the branch-naming agent (required)") + taskBreakerAgent := flag.String("task-breaker-agent", "", "Name of the task-breakdown agent (required)") + coderAgent := flag.String("coder-agent", "", "Name of the coder agent (required)") + testWriterAgent := flag.String("test-writer-agent", "", "Name of the test-writer agent (required)") + prSummaryAgent := flag.String("pr-summary-agent", "", "Name of the PR description summary agent (optional)") + codeMCPURL := flag.String("code-mcp-url", "", "Base URL of the code-mcp service (required)") + publicURL := flag.String("url", "", "Publicly reachable base URL of this server (e.g. https://example.com). Used to construct per-run MCP endpoints. Defaults to http://localhost:") flag.Parse() // ── Validation ──────────────────────────────────────────────────────────── - missing := []string{} - if *request == "" { - missing = append(missing, "--request") - } - if *agentURL == "" { - missing = append(missing, "--agent-url") - } - if *specAgent == "" { - missing = append(missing, "--spec-agent") - } - if *featureAgent == "" { - missing = append(missing, "--feature-agent") + required := map[string]string{ + "--agent-url": *agentURL, + "--spec-agent": *specAgent, + "--feature-agent": *featureAgent, + "--contract-agent": *contractAgent, + "--branch-agent": *branchAgent, + "--task-breaker-agent": *taskBreakerAgent, + "--coder-agent": *coderAgent, + "--test-writer-agent": *testWriterAgent, + "--code-mcp-url": *codeMCPURL, } - if *contractAgent == "" { - missing = append(missing, "--contract-agent") - } - if *branchAgent == "" { - missing = append(missing, "--branch-agent") - } - if len(missing) > 0 { - for _, m := range missing { - fmt.Fprintf(os.Stderr, "error: %s is required\n", m) + ok := true + for flag, val := range required { + if val == "" { + fmt.Fprintf(os.Stderr, "error: %s is required\n", flag) + ok = false } + } + if !ok { flag.Usage() os.Exit(1) } - // ── Context (handles Ctrl-C gracefully) ─────────────────────────────────── + // ── Log file (recreated on each startup) ───────────────────────────────── + logFile, err := os.OpenFile("opendev.log", os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + if err != nil { + fmt.Fprintf(os.Stderr, "warning: could not open log file: %v — logging to stderr\n", err) + logFile = os.Stderr + } else { + defer logFile.Close() + } + logger := log.New(logFile, "", log.Ldate|log.Ltime) + + // ── Context (handles Ctrl-C / SIGTERM gracefully) ───────────────────────── ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) defer stop() - // ── Start the ask_user MCP server ───────────────────────────────────────── - askUser := mcpserver.NewAskUserServer() - if err := askUser.Start(ctx, *mcpPort, *mcpURL); err != nil { - fmt.Fprintf(os.Stderr, "failed to start MCP server: %v\n", err) - os.Exit(1) + // ── Resolve public base URL ─────────────────────────────────────────────── + baseURL := *publicURL + if baseURL == "" { + baseURL = fmt.Sprintf("http://localhost:%d", *port) } - // ── Wire up the orchestrator ────────────────────────────────────────────── + // ── Build components ────────────────────────────────────────────────────── agentClient := agent.NewHTTPClient(*agentURL) - names := orchestrator.AgentNames{ - Spec: *specAgent, - Feature: *featureAgent, - Contract: *contractAgent, - Branch: *branchAgent, - } - orch := orchestrator.New(agentClient, names, askUser.URL(), *repoPath) + codeMCPClient := featurelead.NewHTTPCodeMCPClient(*codeMCPURL) - result, err := orch.Run(ctx, *request) - if err != nil { - fmt.Fprintf(os.Stderr, "orchestrator failed: %v\n", err) - os.Exit(1) + // The orchestrator's askUserMCPURL is overridden per-run by RunWithMCPURL, + // so we pass an empty string here (it is never used directly). + // repoName is also per-run, passed via the job request body. + orch := orchestrator.New(agentClient, orchestrator.AgentNames{ + Spec: *specAgent, + Feature: *featureAgent, + Contract: *contractAgent, + Branch: *branchAgent, + PRSummary: *prSummaryAgent, + }, "", *codeMCPURL, codeMCPClient) + + flEngine := featurelead.New(agentClient, featurelead.AgentNames{ + TaskBreaker: *taskBreakerAgent, + TestWriter: *testWriterAgent, + Coder: *coderAgent, + }, codeMCPClient, *codeMCPURL) + + flStore := featurelead.NewStore() + registry := mcpserver.NewRegistry() + + apiServer := api.NewServer(orch, flEngine, flStore, registry, baseURL, *codeMCPURL, codeMCPClient, logger) + + // ── Start the HTTP server ───────────────────────────────────────────────── + addr := fmt.Sprintf(":%d", *port) + httpServer := &http.Server{ + Addr: addr, + Handler: apiServer.Handler(), } - enc := json.NewEncoder(os.Stdout) - enc.SetIndent("", " ") - if err := enc.Encode(result); err != nil { - fmt.Fprintf(os.Stderr, "encoding result: %v\n", err) + // Shut down the HTTP server gracefully when the context is cancelled. + go func() { + <-ctx.Done() + shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + _ = httpServer.Shutdown(shutdownCtx) + }() + + fmt.Printf("OpenDev API listening on %s\n", addr) + fmt.Printf("Per-run MCP endpoints: %s//mcp\n", baseURL) + if err := httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed { + fmt.Fprintf(os.Stderr, "server error: %v\n", err) os.Exit(1) } } diff --git a/mcp/registry.go b/mcp/registry.go new file mode 100644 index 0000000..1e1d8c4 --- /dev/null +++ b/mcp/registry.go @@ -0,0 +1,104 @@ +package mcp + +import ( + "net/http" + "strings" + "sync" +) + +// Registry dynamically maps orchestration run IDs to their MCP http.Handlers. +// +// Each orchestration run registers its handler before calling orch.Run() and +// deregisters it immediately after, so the handler only lives for the duration +// of the spec-generation step (the only step that uses ask_user). +// +// Incoming requests must have the shape: +// +// /{orchID}/mcp +// +// The registry strips the /{orchID} prefix and delegates the remainder to the +// registered handler. If no handler is found for the orchID it returns 404. +type Registry struct { + mu sync.RWMutex + handlers map[string]http.Handler +} + +// NewRegistry returns an empty, ready-to-use Registry. +func NewRegistry() *Registry { + return &Registry{ + handlers: make(map[string]http.Handler), + } +} + +// Register associates orchID with h. A second Register for the same orchID +// replaces the previous handler. +func (r *Registry) Register(orchID string, h http.Handler) { + r.mu.Lock() + defer r.mu.Unlock() + r.handlers[orchID] = h +} + +// Deregister removes the handler for orchID. It is a no-op if orchID is not +// registered. +func (r *Registry) Deregister(orchID string) { + r.mu.Lock() + defer r.mu.Unlock() + delete(r.handlers, orchID) +} + +// ServeHTTP implements http.Handler. It expects paths of the form +// /{orchID}/mcp (with any suffix). It strips the leading /{orchID} segment, +// then delegates to the registered handler. Returns 404 if the orchID is not +// registered or the path does not contain at least two segments. +func (r *Registry) ServeHTTP(w http.ResponseWriter, req *http.Request) { + orchID, remaining, ok := splitOrchID(req.URL.Path) + if !ok { + http.NotFound(w, req) + return + } + + r.mu.RLock() + h, found := r.handlers[orchID] + r.mu.RUnlock() + + if !found { + http.NotFound(w, req) + return + } + + // Rewrite the path so the inner handler sees /mcp (or whatever comes after + // the orchID segment) rather than /{orchID}/mcp. + req2 := req.Clone(req.Context()) + req2.URL = req.URL.JoinPath() // shallow copy URL + req2.URL.Path = remaining + if req2.URL.RawPath != "" { + // Keep RawPath consistent if the original request had one. + _, rawRemaining, _ := splitOrchID(req.URL.RawPath) + req2.URL.RawPath = rawRemaining + } + + h.ServeHTTP(w, req2) +} + +// splitOrchID splits a path of the form /{orchID}/rest into (orchID, /rest, true). +// Returns ("", "", false) if the path has fewer than two non-empty segments. +func splitOrchID(path string) (orchID, remaining string, ok bool) { + // Normalise: ensure leading slash, no double slashes. + path = "/" + strings.TrimLeft(path, "/") + + // path is now guaranteed to start with "/". + // Trim the leading slash and split on the next slash. + rest := path[1:] // drop leading "/" + idx := strings.IndexByte(rest, '/') + if idx < 0 { + // Only one segment — no orchID/rest pair. + return "", "", false + } + + orchID = rest[:idx] + remaining = rest[idx:] // starts with "/" + if orchID == "" { + return "", "", false + } + return orchID, remaining, true +} diff --git a/mcp/registry_test.go b/mcp/registry_test.go new file mode 100644 index 0000000..a0625e2 --- /dev/null +++ b/mcp/registry_test.go @@ -0,0 +1,188 @@ +package mcp + +import ( + "fmt" + "net/http" + "net/http/httptest" + "testing" +) + +// echoHandler returns an http.Handler that writes statusCode and body. +func echoHandler(statusCode int, body string) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(statusCode) + _, _ = w.Write([]byte(body)) + }) +} + +// ── splitOrchID ─────────────────────────────────────────────────────────────── + +func TestSplitOrchID(t *testing.T) { + cases := []struct { + path string + orchID string + remaining string + ok bool + }{ + {"/abc123/mcp", "abc123", "/mcp", true}, + {"/abc123/mcp/extra", "abc123", "/mcp/extra", true}, + {"abc123/mcp", "abc123", "/mcp", true}, // no leading slash tolerated + {"/abc123", "", "", false}, // only one segment + {"/", "", "", false}, // just a slash + {"", "", "", false}, // empty + {"//mcp", "", "", false}, // empty orchID segment + } + + for _, tc := range cases { + orchID, remaining, ok := splitOrchID(tc.path) + if ok != tc.ok { + t.Errorf("splitOrchID(%q): ok=%v, want %v", tc.path, ok, tc.ok) + continue + } + if !ok { + continue + } + if orchID != tc.orchID { + t.Errorf("splitOrchID(%q): orchID=%q, want %q", tc.path, orchID, tc.orchID) + } + if remaining != tc.remaining { + t.Errorf("splitOrchID(%q): remaining=%q, want %q", tc.path, remaining, tc.remaining) + } + } +} + +// ── Registry.Register / Deregister ─────────────────────────────────────────── + +func TestRegistry_RegisterAndServe(t *testing.T) { + r := NewRegistry() + r.Register("run-1", echoHandler(http.StatusOK, "run-1-response")) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/run-1/mcp", nil) + r.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + if got := rec.Body.String(); got != "run-1-response" { + t.Errorf("body=%q, want %q", got, "run-1-response") + } +} + +func TestRegistry_UnknownOrchID_Returns404(t *testing.T) { + r := NewRegistry() + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/unknown-id/mcp", nil) + r.ServeHTTP(rec, req) + + if rec.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d", rec.Code) + } +} + +func TestRegistry_Deregister(t *testing.T) { + r := NewRegistry() + r.Register("run-2", echoHandler(http.StatusOK, "ok")) + r.Deregister("run-2") + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/run-2/mcp", nil) + r.ServeHTTP(rec, req) + + if rec.Code != http.StatusNotFound { + t.Fatalf("expected 404 after deregister, got %d", rec.Code) + } +} + +func TestRegistry_DeregisterNoop(t *testing.T) { + r := NewRegistry() + // Should not panic. + r.Deregister("does-not-exist") +} + +// ── Path stripping ──────────────────────────────────────────────────────────── + +func TestRegistry_PathStripping(t *testing.T) { + // The inner handler should see /mcp, not /run-3/mcp. + r := NewRegistry() + + var seenPath string + r.Register("run-3", http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + seenPath = req.URL.Path + w.WriteHeader(http.StatusOK) + })) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/run-3/mcp", nil) + r.ServeHTTP(rec, req) + + if seenPath != "/mcp" { + t.Errorf("inner handler saw path %q, want %q", seenPath, "/mcp") + } +} + +// ── Concurrent access ───────────────────────────────────────────────────────── + +func TestRegistry_ConcurrentRegisterAndServe(t *testing.T) { + r := NewRegistry() + const n = 50 + + done := make(chan struct{}, n*2) + + // Concurrently register handlers and serve requests. + for i := 0; i < n; i++ { + id := fmt.Sprintf("run-concurrent-%d", i) + go func(id string) { + r.Register(id, echoHandler(http.StatusOK, id)) + done <- struct{}{} + }(id) + + go func(id string) { + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/"+id+"/mcp", nil) + r.ServeHTTP(rec, req) // may 404 if handler not registered yet — that's fine + done <- struct{}{} + }(id) + } + + for i := 0; i < n*2; i++ { + <-done + } +} + +// ── Short path (no remaining segment) ───────────────────────────────────────── + +func TestRegistry_PathWithNoRemaining_Returns404(t *testing.T) { + r := NewRegistry() + r.Register("run-4", echoHandler(http.StatusOK, "ok")) + + // Path only has one segment — no /mcp suffix. + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/run-4", nil) + r.ServeHTTP(rec, req) + + if rec.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d", rec.Code) + } +} + +// ── Multiple runs isolated ──────────────────────────────────────────────────── + +func TestRegistry_MultipleRunsIsolated(t *testing.T) { + r := NewRegistry() + r.Register("run-a", echoHandler(http.StatusOK, "response-a")) + r.Register("run-b", echoHandler(http.StatusCreated, "response-b")) + + recA := httptest.NewRecorder() + r.ServeHTTP(recA, httptest.NewRequest(http.MethodPost, "/run-a/mcp", nil)) + if recA.Code != http.StatusOK || recA.Body.String() != "response-a" { + t.Errorf("run-a: got %d %q", recA.Code, recA.Body.String()) + } + + recB := httptest.NewRecorder() + r.ServeHTTP(recB, httptest.NewRequest(http.MethodPost, "/run-b/mcp", nil)) + if recB.Code != http.StatusCreated || recB.Body.String() != "response-b" { + t.Errorf("run-b: got %d %q", recB.Code, recB.Body.String()) + } +} diff --git a/mcp/server.go b/mcp/server.go index e4cc01a..a888141 100644 --- a/mcp/server.go +++ b/mcp/server.go @@ -1,16 +1,14 @@ -// Package mcp provides the OpenDev MCP server that exposes interactive tools +// Package mcp provides the OpenDev MCP handler that exposes interactive tools // to agents running in an external agentfile service. // -// The server runs persistently for the lifetime of the OpenDev process, so -// agents can call back into OpenDev at any point during a run. +// The handler is designed to be mounted at /mcp on the main API server so that +// a single public port serves both the REST API and the MCP endpoint. package mcp import ( "bufio" "context" "fmt" - "log/slog" - "net" "net/http" "os" "strings" @@ -18,35 +16,51 @@ import ( "github.com/mark3labs/mcp-go/mcp" "github.com/mark3labs/mcp-go/server" + + "github.com/iamangus/OpenDev/questions" ) -// AskUserServer is a persistent MCP server that exposes the ask_user tool. -// When the tool is called by an agent, it prints the question to stdout and -// blocks until the user types a response on stdin. +// AskUserServer exposes the ask_user MCP tool as an http.Handler. +// +// By default (stdinFallback == false) questions are routed through a +// questions.Store so that an external TUI or HTTP client can poll for pending +// questions and post answers without direct stdin access. // -// A mutex serialises concurrent tool calls so questions are never interleaved, -// which is important when a future web UI drives multiple runs in parallel. +// When stdinFallback is true the server falls back to the original behaviour: +// it prints the question to stdout and blocks until the user types a response +// on stdin. In that mode a mutex serialises concurrent tool calls so questions +// are never interleaved. type AskUserServer struct { - mu sync.Mutex - url string - httpServer *http.Server - stdin *bufio.Reader + // stdin-fallback fields (used only when stdinFallback == true) + mu sync.Mutex + stdin *bufio.Reader + + // HTTP-queue fields (used when stdinFallback == false) + store *questions.Store + + stdinFallback bool } -// NewAskUserServer creates an AskUserServer. It does not start listening until -// Start is called. -func NewAskUserServer() *AskUserServer { +// NewAskUserServer creates an AskUserServer that routes questions through the +// supplied questions.Store (the default, TUI-friendly mode). +func NewAskUserServer(store *questions.Store) *AskUserServer { return &AskUserServer{ - stdin: bufio.NewReader(os.Stdin), + store: store, } } -// Start begins listening on the given port. If port is 0 a random free port is -// chosen. advertisedURL overrides the URL returned by URL() — use this when -// the server is reachable from a different host than localhost (e.g. a LAN or -// public address). If advertisedURL is empty, URL() returns the localhost address. -// It returns once the server is ready to accept connections. -func (s *AskUserServer) Start(ctx context.Context, port int, advertisedURL string) error { +// NewAskUserServerStdinFallback creates an AskUserServer that reads answers +// from stdin (the original behaviour). Use this when running without a TUI. +func NewAskUserServerStdinFallback() *AskUserServer { + return &AskUserServer{ + stdinFallback: true, + stdin: bufio.NewReader(os.Stdin), + } +} + +// Handler returns an http.Handler that serves the MCP streamable-HTTP +// transport. Mount it at /mcp on the main API server mux. +func (s *AskUserServer) Handler() http.Handler { mcpServer := server.NewMCPServer( "opendev", "1.0.0", @@ -63,59 +77,41 @@ func (s *AskUserServer) Start(ctx context.Context, port int, advertisedURL strin mcpServer.AddTool(askTool, s.handleAskUser) - httpSrv := server.NewStreamableHTTPServer(mcpServer) - - addr := fmt.Sprintf(":%d", port) - ln, err := net.Listen("tcp", addr) - if err != nil { - return fmt.Errorf("listen on %s: %w", addr, err) - } - - actualPort := ln.Addr().(*net.TCPAddr).Port - if advertisedURL != "" { - s.url = advertisedURL - } else { - s.url = fmt.Sprintf("http://localhost:%d", actualPort) - } - - // Wrap the StreamableHTTPServer in a plain net/http.Server so we can - // call Shutdown gracefully and use our own listener. - mux := http.NewServeMux() - mux.Handle("/", httpSrv) - s.httpServer = &http.Server{Handler: mux} - - go func() { - slog.Info("OpenDev MCP server listening", "url", s.url) - if err := s.httpServer.Serve(ln); err != nil && err != http.ErrServerClosed { - slog.Error("OpenDev MCP server error", "error", err) - } - }() - - // Stop the server when the context is cancelled. - go func() { - <-ctx.Done() - if err := s.httpServer.Shutdown(context.Background()); err != nil { - slog.Error("OpenDev MCP server shutdown error", "error", err) - } - }() - - return nil -} - -// URL returns the base URL of the running MCP server (e.g. "http://localhost:9090"). -// It is only valid after Start has been called. -func (s *AskUserServer) URL() string { - return s.url + return server.NewStreamableHTTPServer(mcpServer) } -// handleAskUser is the MCP tool handler for ask_user. It serialises access to -// stdin so concurrent calls from different agent turns don't interleave. +// handleAskUser is the MCP tool handler for ask_user. +// +// In HTTP-queue mode (default) it delegates to the questions.Store, which +// blocks until an external client posts an answer. Multiple concurrent calls +// are all queued independently. +// +// In stdin-fallback mode it serialises access to stdin so concurrent calls +// from different agent turns don't interleave. func (s *AskUserServer) handleAskUser(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) { question, ok := req.GetArguments()["question"].(string) if !ok || strings.TrimSpace(question) == "" { return mcp.NewToolResultError("question argument is required and must be a non-empty string"), nil } + if s.stdinFallback { + return s.handleAskUserStdin(ctx, question) + } + return s.handleAskUserStore(ctx, question) +} + +// handleAskUserStore delegates to the questions.Store (HTTP-queue mode). +func (s *AskUserServer) handleAskUserStore(ctx context.Context, question string) (*mcp.CallToolResult, error) { + answer, err := s.store.Ask(ctx, question) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("waiting for answer: %v", err)), nil + } + return mcp.NewToolResultText(answer), nil +} + +// handleAskUserStdin reads an answer from stdin (fallback mode). +// A mutex ensures that at most one question is active at a time. +func (s *AskUserServer) handleAskUserStdin(_ context.Context, question string) (*mcp.CallToolResult, error) { s.mu.Lock() defer s.mu.Unlock() diff --git a/orchestrator/orchestrator.go b/orchestrator/orchestrator.go index 9a9fb81..0a2da91 100644 --- a/orchestrator/orchestrator.go +++ b/orchestrator/orchestrator.go @@ -1,32 +1,34 @@ // Package orchestrator contains the core workflow for the OpenDev orchestrator. // -// The orchestrator drives the entire process of turning a user's natural-language -// change request into a set of fully-initialized Feature Leads, each equipped with -// a clear specification, strict technical contracts, and a dedicated git branch. +// The orchestrator drives the process of turning a user's natural-language +// change request into a set of fully-initialized Feature Leads, each equipped +// with a clear specification, strict technical contracts, and a dedicated git +// branch. Once the Feature Leads are produced the orchestrator's job is done — +// it hands them off and does not participate in implementation. // // Workflow: -// 1. Discovery & Spec Generation – the spec agent is invoked with the user request -// and an ask_user MCP tool so it can ask clarifying questions before producing -// a comprehensive System Specification Document. +// 1. Discovery & Spec Generation – the spec agent is invoked with the user +// request and an ask_user MCP tool so it can ask clarifying questions +// before producing a comprehensive System Specification Document. // 2. Feature Breakdown – the spec is sliced into distinct, manageable domain // features (e.g., User Auth, Product Catalog). // 3. Contract Definition – cross-feature interaction points are identified and -// expressed as strict technical contracts (Database Schemas, APIs, Protobufs). +// expressed as strict technical contracts (Database Schemas, APIs, +// Protobufs). // 4. Delegation – one FeatureLead is initialised per feature; it receives the -// feature's requirements, the contracts it must consume (RequiredContracts) and -// the contracts it must produce (ProvidedContracts), and a dedicated git branch -// is created for it. +// feature's requirements, the contracts it must consume +// (RequiredContracts) and the contracts it must produce +// (ProvidedContracts), and a dedicated git branch is created for it. package orchestrator import ( "context" "encoding/json" "fmt" - "os/exec" - "regexp" "strings" "github.com/iamangus/OpenDev/agent" + "github.com/iamangus/OpenDev/internal/jsonutil" ) // ── Domain types ────────────────────────────────────────────────────────────── @@ -83,9 +85,9 @@ type Feature struct { ProvidedContracts []string `json:"provided_contracts"` } -// FeatureLead is an initialised unit of work ready to be handed to an -// autonomous coding agent. It bundles the feature spec together with the -// fully-resolved contracts so the agent never needs to go looking for context. +// FeatureLead is an initialised unit of work ready to be handed to the feature +// lead engine. It bundles the feature spec together with the fully-resolved +// contracts so the engine never needs to go looking for context. type FeatureLead struct { // Feature is the domain feature this lead is responsible for. Feature Feature `json:"feature"` @@ -95,14 +97,20 @@ type FeatureLead struct { ProvidedContracts []Contract `json:"provided_contracts"` // BranchName is the git branch created for this feature lead to work on. BranchName string `json:"branch_name"` + // OrchBranchName is the orchestration branch this feature branch was forked + // from. After all tasks pass the feature lead engine merges BranchName into + // OrchBranchName. + OrchBranchName string `json:"orch_branch_name"` } -// Result is the complete output of a successful orchestrator run. +// Result is the complete output of a successful orchestrator run. It describes +// what was understood and what was delegated — the orchestrator's job ends here. type Result struct { - Spec SystemSpec `json:"spec"` - Features []Feature `json:"features"` - Contracts []Contract `json:"contracts"` - FeatureLeads []FeatureLead `json:"feature_leads"` + Spec SystemSpec `json:"spec"` + Features []Feature `json:"features"` + Contracts []Contract `json:"contracts"` + FeatureLeads []FeatureLead `json:"feature_leads"` + OrchBranchName string `json:"orch_branch_name,omitempty"` } // ── Agent name configuration ────────────────────────────────────────────────── @@ -120,6 +128,20 @@ type AgentNames struct { Contract string // Branch is the agent that suggests a git branch name for a feature. Branch string + // PRSummary is the agent used to generate a plain-text pull request + // description from the list of commits on the orchestration branch. + PRSummary string +} + +// ── Branch creation ─────────────────────────────────────────────────────────── + +// BranchCreator abstracts git branch/worktree creation. The orchestrator only +// needs to create branches; test execution is handled by the feature-lead engine. +// featurelead.HTTPCodeMCPClient satisfies this interface. +type BranchCreator interface { + // CreateBranch creates a new git worktree/branch. When base is non-empty it + // is used as the start point (equivalent to git worktree add -b ). + CreateBranch(repo, branch, base string) error } // ── Orchestrator ────────────────────────────────────────────────────────────── @@ -130,7 +152,8 @@ type Orchestrator struct { agentClient agent.Client agentNames AgentNames askUserMCPURL string // URL of the running ask_user MCP server; empty = no tool injection - repoPath string + codeMCPURL string // base URL of code-mcp for read-only repo access; empty = no injection + branchCreator BranchCreator } // New creates a new Orchestrator. @@ -139,34 +162,61 @@ type Orchestrator struct { // - names specifies which agentfile agent handles each step. // - askUserMCPURL is the base URL of the OpenDev ask_user MCP server. // Pass an empty string to disable interactive clarification (e.g. in tests). -// - repoPath is the path to the local git repository where feature branches -// should be created. Pass an empty string to skip git branch creation. -func New(client agent.Client, names AgentNames, askUserMCPURL string, repoPath string) *Orchestrator { +// - codeMCPURL is the base URL of code-mcp. When non-empty the spec agent +// receives the read profile for the target repo so it can inspect the +// existing codebase before producing a spec. +// - branchCreator creates git branches via code-mcp. Pass nil to skip branch +// creation (e.g. in tests). +func New(client agent.Client, names AgentNames, askUserMCPURL string, codeMCPURL string, branchCreator BranchCreator) *Orchestrator { return &Orchestrator{ agentClient: client, agentNames: names, askUserMCPURL: askUserMCPURL, - repoPath: repoPath, + codeMCPURL: codeMCPURL, + branchCreator: branchCreator, } } -// Run executes the complete four-step orchestrator workflow and returns the -// result. It is the single entry point for all orchestration logic. +// Run executes the complete orchestrator workflow and returns the result. +// It uses the askUserMCPURL set at construction time. func (o *Orchestrator) Run(ctx context.Context, userRequest string) (*Result, error) { + return o.RunWithMCPURL(ctx, userRequest, o.askUserMCPURL, "") +} + +// AgentNames returns the agent name configuration for this orchestrator. +func (o *Orchestrator) AgentNames() AgentNames { return o.agentNames } + +// AgentClient returns the agent.Client used by this orchestrator. +func (o *Orchestrator) AgentClient() agent.Client { return o.agentClient } + +// RunWithMCPURL is like Run but overrides the ask_user MCP base URL and +// specifies the repo to create branches in. This allows a single shared +// Orchestrator to serve concurrent runs, each with its own per-run MCP +// endpoint (e.g. https://example.com/{runID}) and repo. +func (o *Orchestrator) RunWithMCPURL(ctx context.Context, userRequest string, mcpBaseURL string, repo string) (*Result, error) { + // Shadow the struct with the per-run URL so all steps read the right value. + scoped := &Orchestrator{ + agentClient: o.agentClient, + agentNames: o.agentNames, + askUserMCPURL: mcpBaseURL, + codeMCPURL: o.codeMCPURL, + branchCreator: o.branchCreator, + } + // Step 1 – Discovery & Spec Generation - spec, err := o.generateSpec(ctx, userRequest) + spec, err := scoped.generateSpec(ctx, userRequest, repo) if err != nil { return nil, fmt.Errorf("step 1 (spec generation): %w", err) } // Step 2 – Feature Breakdown - features, err := o.breakdownFeatures(ctx, spec) + features, err := scoped.breakdownFeatures(ctx, spec) if err != nil { return nil, fmt.Errorf("step 2 (feature breakdown): %w", err) } // Step 3 – Contract Definition - contracts, features, err := o.defineContracts(ctx, spec, features) + contracts, features, err := scoped.defineContracts(ctx, spec, features) if err != nil { return nil, fmt.Errorf("step 3 (contract definition): %w", err) } @@ -177,24 +227,29 @@ func (o *Orchestrator) Run(ctx context.Context, userRequest string) (*Result, er contractByName[c.Name] = c } - // Step 4 – Delegation - leads, err := o.delegateFeatures(ctx, features, contractByName) + // Step 4 – Delegation (includes orch branch creation) + orchBranchName, leads, err := scoped.delegateFeatures(ctx, features, contractByName, repo) if err != nil { return nil, fmt.Errorf("step 4 (delegation): %w", err) } return &Result{ - Spec: *spec, - Features: features, - Contracts: contracts, - FeatureLeads: leads, + Spec: *spec, + Features: features, + Contracts: contracts, + FeatureLeads: leads, + OrchBranchName: orchBranchName, }, nil } // ── Step 1: Discovery & Spec Generation ────────────────────────────────────── -func (o *Orchestrator) generateSpec(ctx context.Context, userRequest string) (*SystemSpec, error) { - message := fmt.Sprintf(`Analyse the following user request and produce a comprehensive System Specification Document. +func (o *Orchestrator) generateSpec(ctx context.Context, userRequest string, repo string) (*SystemSpec, error) { + message := fmt.Sprintf(`You MUST NOT output any explanatory text before acting. Do not narrate your intentions. +If you have tools available, call them immediately without preamble. +When you have gathered sufficient information, return ONLY a valid JSON object – no markdown, no explanation. + +Analyse the following user request and produce a comprehensive System Specification Document. Use the ask_user tool to clarify any ambiguities before producing the spec. Ask one question at a time. When you have enough information, return ONLY a valid JSON object – no markdown, no explanation. @@ -214,13 +269,20 @@ User request: // Inject the ask_user MCP server if one is configured. var mcpServers []agent.MCPServer if o.askUserMCPURL != "" { - mcpServers = []agent.MCPServer{ - { - Name: "opendev", - URL: o.askUserMCPURL + "/mcp", - Transport: "streamable-http", - }, - } + mcpServers = append(mcpServers, agent.MCPServer{ + Name: "opendev", + URL: o.askUserMCPURL + "/mcp", + Transport: "streamable-http", + }) + } + // Inject the code-mcp read server so the spec agent can inspect the existing + // codebase before producing a spec. + if o.codeMCPURL != "" && repo != "" { + mcpServers = append(mcpServers, agent.MCPServer{ + Name: "dev-read", + URL: fmt.Sprintf("%s/%s/main/read/mcp", o.codeMCPURL, repo), + Transport: "streamable-http", + }) } raw, err := o.agentClient.Chat(ctx, o.agentNames.Spec, message, mcpServers) @@ -228,9 +290,9 @@ User request: return nil, fmt.Errorf("agent call: %w", err) } - jsonStr, err := extractJSON(raw) + jsonStr, err := jsonutil.ExtractJSON(raw) if err != nil { - return nil, fmt.Errorf("extracting JSON from agent response: %w", err) + return nil, fmt.Errorf("extracting JSON from agent response: %w\nraw response (first 200 chars): %s", err, truncate(raw, 200)) } var spec SystemSpec @@ -271,9 +333,9 @@ System Specification: return nil, fmt.Errorf("agent call: %w", err) } - jsonStr, err := extractJSON(raw) + jsonStr, err := jsonutil.ExtractJSON(raw) if err != nil { - return nil, fmt.Errorf("extracting JSON from agent response: %w", err) + return nil, fmt.Errorf("extracting JSON from agent response: %w\nraw response (first 200 chars): %s", err, truncate(raw, 200)) } var features []Feature @@ -342,9 +404,9 @@ Features: return nil, nil, fmt.Errorf("agent call: %w", err) } - jsonStr, err := extractJSON(raw) + jsonStr, err := jsonutil.ExtractJSON(raw) if err != nil { - return nil, nil, fmt.Errorf("extracting JSON from agent response: %w", err) + return nil, nil, fmt.Errorf("extracting JSON from agent response: %w\nraw response (first 200 chars): %s", err, truncate(raw, 200)) } var resp contractsResponse @@ -369,18 +431,30 @@ func (o *Orchestrator) delegateFeatures( ctx context.Context, features []Feature, contractByName map[string]Contract, -) ([]FeatureLead, error) { - leads := make([]FeatureLead, 0, len(features)) + repo string, +) (orchBranchName string, leads []FeatureLead, err error) { + // First, ask the Branch agent for an orchestration branch name and create it. + orchBranchName, err = o.suggestOrchBranchName(ctx, features) + if err != nil { + return "", nil, fmt.Errorf("suggesting orch branch name: %w", err) + } + if o.branchCreator != nil { + if err := o.branchCreator.CreateBranch(repo, orchBranchName, ""); err != nil { + return "", nil, fmt.Errorf("creating orch branch %q: %w", orchBranchName, err) + } + } + + leads = make([]FeatureLead, 0, len(features)) for _, f := range features { branchName, err := o.suggestBranchName(ctx, f) if err != nil { - return nil, fmt.Errorf("suggesting branch name for %q: %w", f.Name, err) + return "", nil, fmt.Errorf("suggesting branch name for %q: %w", f.Name, err) } - if o.repoPath != "" { - if err := createGitBranch(o.repoPath, branchName); err != nil { - return nil, fmt.Errorf("creating branch %q for feature %q: %w", branchName, f.Name, err) + if o.branchCreator != nil { + if err := o.branchCreator.CreateBranch(repo, branchName, orchBranchName); err != nil { + return "", nil, fmt.Errorf("creating branch %q for feature %q: %w", branchName, f.Name, err) } } @@ -389,10 +463,11 @@ func (o *Orchestrator) delegateFeatures( RequiredContracts: resolveContracts(f.RequiredContracts, contractByName), ProvidedContracts: resolveContracts(f.ProvidedContracts, contractByName), BranchName: branchName, + OrchBranchName: orchBranchName, }) } - return leads, nil + return orchBranchName, leads, nil } // suggestBranchName calls the agent to generate a concise, valid git branch @@ -402,10 +477,22 @@ func (o *Orchestrator) suggestBranchName(ctx context.Context, f Feature) (string Rules: - Use lowercase letters, numbers, and hyphens only. - No spaces, no slashes, no special characters. -- Prefix with "feature/". +- The branch name MUST start with exactly "feature-" followed immediately by a short description. +- The format is: feature- +- Do NOT add any extra words between "feature-" and the description (e.g. do NOT produce "feature-agent-...", "branch-name-feature-...", "feature-name-...", etc.). - Maximum 60 characters total. - Return ONLY the branch name, nothing else. +Good examples: + feature-user-auth + feature-product-catalog + feature-payment-flow + +Bad examples (do not produce these): + branch-name-feature-user-auth + feature-agent-user-auth + feature-name-user-auth + Feature name: %s Feature description: %s`, f.Name, f.Description) @@ -417,89 +504,88 @@ Feature description: %s`, f.Name, f.Description) return sanitiseBranchName(strings.TrimSpace(raw)), nil } -// ── Git helpers ─────────────────────────────────────────────────────────────── - -// createGitBranch creates a new git branch at repoPath. This is pure code – -// no AI is involved once the branch name has been decided. -func createGitBranch(repoPath, branchName string) error { - cmd := exec.Command("git", "checkout", "-b", branchName) - cmd.Dir = repoPath - out, err := cmd.CombinedOutput() - if err != nil { - return fmt.Errorf("git checkout -b %q: %w\noutput: %s", branchName, err, out) +// suggestOrchBranchName calls the agent to generate a concise, valid git branch +// name for the orchestration branch (prefixed with "orch-"), then sanitises it. +func (o *Orchestrator) suggestOrchBranchName(ctx context.Context, features []Feature) (string, error) { + names := make([]string, len(features)) + for i, f := range features { + names[i] = f.Name } - return nil -} -// ── Utility helpers ─────────────────────────────────────────────────────────── + message := fmt.Sprintf(`Generate a concise git branch name for an orchestration branch that groups the following features. +Rules: +- Use lowercase letters, numbers, and hyphens only. +- No spaces, no slashes, no special characters. +- The branch name MUST start with exactly "orch-" followed immediately by a short description. +- Maximum 60 characters total. +- Return ONLY the branch name, nothing else. -// jsonBlockRe matches an optional markdown ```json ... ``` fence. -var jsonBlockRe = regexp.MustCompile("(?s)```(?:json)?\\s*(.*?)\\s*```") +Features: %s`, strings.Join(names, ", ")) -// extractJSON strips markdown code fences from a string and returns the first -// valid JSON object or array found within it. This tolerates AI models that -// wrap their JSON output in markdown fences. -func extractJSON(s string) (string, error) { - // Try to unwrap a markdown code block first. - if m := jsonBlockRe.FindStringSubmatch(s); len(m) == 2 { - s = m[1] + raw, err := o.agentClient.Chat(ctx, o.agentNames.Branch, message, nil) + if err != nil { + return "", fmt.Errorf("agent call: %w", err) } - s = strings.TrimSpace(s) + return sanitiseOrchBranchName(strings.TrimSpace(raw)), nil +} - // Find the outermost JSON object or array. - start := strings.IndexAny(s, "{[") - if start == -1 { - return "", fmt.Errorf("no JSON object or array found in response") - } +// ── Utility helpers ─────────────────────────────────────────────────────────── - // Walk backwards from the end to find the matching closer. - opener := rune(s[start]) - closer := '}' - if opener == '[' { - closer = ']' +// sanitiseBranchName ensures the branch name contains only characters that are +// safe for code-mcp: lowercase alphanumerics and hyphens. Slashes are replaced +// with hyphens (code-mcp rejects branch names containing "/"). Consecutive +// hyphens are collapsed and leading/trailing hyphens are trimmed. Finally, a +// "feature-" prefix is prepended if not already present. +func sanitiseBranchName(name string) string { + var sb strings.Builder + for _, r := range strings.ToLower(name) { + if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' { + sb.WriteRune(r) + } else { + sb.WriteRune('-') + } } - end := strings.LastIndexAny(s, "}]") - if end == -1 || end < start { - return "", fmt.Errorf("no closing delimiter found in response") + // Collapse consecutive hyphens. + result := sb.String() + for strings.Contains(result, "--") { + result = strings.ReplaceAll(result, "--", "-") } + // Trim leading/trailing hyphens. + result = strings.Trim(result, "-") - // Ensure the last character matches the expected closer. - if rune(s[end]) != closer { - // Find the last occurrence of the correct closer. - needle := string(closer) - end = strings.LastIndex(s, needle) - if end == -1 || end < start { - return "", fmt.Errorf("mismatched JSON delimiters in response") - } + // Ensure the branch always starts with "feature-". + if !strings.HasPrefix(result, "feature-") { + result = "feature-" + result } - return s[start : end+1], nil + return result } -// sanitiseBranchName ensures the branch name contains only characters that are -// safe in a git ref: lowercase alphanumerics, hyphens, and forward slashes. -// Any other character is replaced with a hyphen, and consecutive hyphens are -// collapsed. -func sanitiseBranchName(name string) string { +// sanitiseOrchBranchName applies the same character sanitisation as +// sanitiseBranchName but enforces an "orch-" prefix instead of "feature-". +func sanitiseOrchBranchName(name string) string { var sb strings.Builder for _, r := range strings.ToLower(name) { - if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '/' { + if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' { sb.WriteRune(r) } else { sb.WriteRune('-') } } - // Collapse consecutive hyphens. - result := regexp.MustCompile(`-{2,}`).ReplaceAllString(sb.String(), "-") - // Trim leading/trailing hyphens from each path segment. - parts := strings.Split(result, "/") - for i, p := range parts { - parts[i] = strings.Trim(p, "-") + result := sb.String() + for strings.Contains(result, "--") { + result = strings.ReplaceAll(result, "--", "-") } - return strings.Join(parts, "/") + result = strings.Trim(result, "-") + + if !strings.HasPrefix(result, "orch-") { + result = "orch-" + result + } + + return result } // resolveContracts looks up each contract name in the provided map and returns @@ -513,3 +599,12 @@ func resolveContracts(names []string, byName map[string]Contract) []Contract { } return out } + +// truncate returns the first n characters of s, appending "…" if it was cut. +func truncate(s string, n int) string { + runes := []rune(s) + if len(runes) <= n { + return s + } + return string(runes[:n]) + "…" +} diff --git a/orchestrator/orchestrator_test.go b/orchestrator/orchestrator_test.go index 22cc6d9..7aa66a5 100644 --- a/orchestrator/orchestrator_test.go +++ b/orchestrator/orchestrator_test.go @@ -55,81 +55,54 @@ func testAgentNames() AgentNames { } } -// ── extractJSON ─────────────────────────────────────────────────────────────── - -func TestExtractJSON_PlainObject(t *testing.T) { - input := `{"foo":"bar"}` - got, err := extractJSON(input) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if got != input { - t.Errorf("expected %q, got %q", input, got) - } -} - -func TestExtractJSON_MarkdownFence(t *testing.T) { - input := "```json\n{\"foo\":\"bar\"}\n```" - got, err := extractJSON(input) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - want := `{"foo":"bar"}` - if got != want { - t.Errorf("expected %q, got %q", want, got) - } -} - -func TestExtractJSON_MarkdownFenceNoLang(t *testing.T) { - input := "Here is the JSON:\n```\n[1,2,3]\n```\nDone." - got, err := extractJSON(input) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - want := `[1,2,3]` - if got != want { - t.Errorf("expected %q, got %q", want, got) - } -} +// ── sanitiseBranchName ──────────────────────────────────────────────────────── -func TestExtractJSON_TrailingText(t *testing.T) { - input := `Here you go: {"key":"value"} and that's it.` - got, err := extractJSON(input) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - want := `{"key":"value"}` - if got != want { - t.Errorf("expected %q, got %q", want, got) +func TestSanitiseBranchName(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"feature-user-auth", "feature-user-auth"}, + {"Feature-User Auth", "feature-user-auth"}, + {"feature-user--auth", "feature-user-auth"}, + {"feature- user ", "feature-user"}, + {"FEATURE-ProductCatalog", "feature-productcatalog"}, + {"feature-user_auth!", "feature-user-auth"}, + {"feature/user-auth", "feature-user-auth"}, // slashes become hyphens + // Missing feature- prefix — sanitiser must prepend it. + {"user-auth", "feature-user-auth"}, + {"product-catalog", "feature-product-catalog"}, + // Bad prefix patterns the agent has produced in the wild. + {"branch-name-feature-user-auth", "feature-branch-name-feature-user-auth"}, } -} -func TestExtractJSON_NoJSON(t *testing.T) { - _, err := extractJSON("no json here") - if err == nil { - t.Fatal("expected an error, got nil") + for _, tc := range tests { + got := sanitiseBranchName(tc.input) + if got != tc.want { + t.Errorf("sanitiseBranchName(%q) = %q, want %q", tc.input, got, tc.want) + } } } -// ── sanitiseBranchName ──────────────────────────────────────────────────────── +// ── sanitiseOrchBranchName ──────────────────────────────────────────────────── -func TestSanitiseBranchName(t *testing.T) { +func TestSanitiseOrchBranchName(t *testing.T) { tests := []struct { input string want string }{ - {"feature/user-auth", "feature/user-auth"}, - {"Feature/User Auth", "feature/user-auth"}, - {"feature/user--auth", "feature/user-auth"}, - {"feature/ user ", "feature/user"}, - {"FEATURE/ProductCatalog", "feature/productcatalog"}, - {"feature/user_auth!", "feature/user-auth"}, + {"orch-e-commerce", "orch-e-commerce"}, + {"Orch E Commerce", "orch-e-commerce"}, + {"orch-e--commerce", "orch-e-commerce"}, + {"e-commerce", "orch-e-commerce"}, // missing prefix — must be prepended + {"ORCH-Platform", "orch-platform"}, + {"orch/e-commerce", "orch-e-commerce"}, // slashes become hyphens } for _, tc := range tests { - got := sanitiseBranchName(tc.input) + got := sanitiseOrchBranchName(tc.input) if got != tc.want { - t.Errorf("sanitiseBranchName(%q) = %q, want %q", tc.input, got, tc.want) + t.Errorf("sanitiseOrchBranchName(%q) = %q, want %q", tc.input, got, tc.want) } } } @@ -193,15 +166,16 @@ func orchestratorFixtures() []string { mustJSON(spec), // Step 1 – spec mustJSON(features), // Step 2 – features mustJSON(contractsResp), // Step 3 – contracts - "feature/user-auth", // Step 4 – branch for User Authentication - "feature/product-catalog", // Step 4 – branch for Product Catalog + "orch-e-commerce", // Step 4 – orch branch name + "feature-user-auth", // Step 4 – branch for User Authentication + "feature-product-catalog", // Step 4 – branch for Product Catalog } } func TestRun_FullWorkflow(t *testing.T) { client := &mockAgentClient{responses: orchestratorFixtures()} names := testAgentNames() - orch := New(client, names, "", "") // empty MCP URL + empty repoPath → skip both + orch := New(client, names, "", "", nil) // no MCP URLs, no branch creator result, err := orch.Run(context.Background(), "Build an e-commerce platform") if err != nil { @@ -238,9 +212,12 @@ func TestRun_FullWorkflow(t *testing.T) { if len(authLead.ProvidedContracts) != 1 || authLead.ProvidedContracts[0].Name != "UserAuthAPI" { t.Errorf("auth lead should provide UserAuthAPI; got %v", authLead.ProvidedContracts) } - if authLead.BranchName != "feature/user-auth" { + if authLead.BranchName != "feature-user-auth" { t.Errorf("unexpected branch name: %q", authLead.BranchName) } + if authLead.OrchBranchName != "orch-e-commerce" { + t.Errorf("unexpected orch branch name on auth lead: %q", authLead.OrchBranchName) + } catalogLead := result.FeatureLeads[1] if catalogLead.Feature.Name != "Product Catalog" { @@ -249,9 +226,17 @@ func TestRun_FullWorkflow(t *testing.T) { if len(catalogLead.RequiredContracts) != 1 || catalogLead.RequiredContracts[0].Name != "UserAuthAPI" { t.Errorf("catalog lead should require UserAuthAPI; got %v", catalogLead.RequiredContracts) } - if catalogLead.BranchName != "feature/product-catalog" { + if catalogLead.BranchName != "feature-product-catalog" { t.Errorf("unexpected branch name: %q", catalogLead.BranchName) } + if catalogLead.OrchBranchName != "orch-e-commerce" { + t.Errorf("unexpected orch branch name on catalog lead: %q", catalogLead.OrchBranchName) + } + + // OrchBranchName should be set on the result. + if result.OrchBranchName != "orch-e-commerce" { + t.Errorf("unexpected result.OrchBranchName: %q", result.OrchBranchName) + } // All agent responses should have been consumed. if client.callIdx != len(client.responses) { @@ -268,35 +253,47 @@ func TestRun_FullWorkflow(t *testing.T) { if client.calls[2].agentName != names.Contract { t.Errorf("step 3 used agent %q, want %q", client.calls[2].agentName, names.Contract) } + // call[3] = orch branch name, call[4] = first feature branch, call[5] = second feature branch if client.calls[3].agentName != names.Branch { - t.Errorf("step 4a used agent %q, want %q", client.calls[3].agentName, names.Branch) + t.Errorf("step 4 (orch branch) used agent %q, want %q", client.calls[3].agentName, names.Branch) + } + if client.calls[4].agentName != names.Branch { + t.Errorf("step 4a used agent %q, want %q", client.calls[4].agentName, names.Branch) } } func TestRun_SpecStepInjectsMCPServer(t *testing.T) { client := &mockAgentClient{responses: orchestratorFixtures()} names := testAgentNames() - mcpURL := "http://localhost:9090" - orch := New(client, names, mcpURL, "") + askUserURL := "http://localhost:9090" + codeMCPURL := "http://code-mcp:8080" + orch := New(client, names, askUserURL, codeMCPURL, nil) - _, err := orch.Run(context.Background(), "Build an e-commerce platform") + _, err := orch.RunWithMCPURL(context.Background(), "Build an e-commerce platform", askUserURL, "my-repo") if err != nil { t.Fatalf("Run returned error: %v", err) } - // Step 1 (spec) should have the MCP server injected. + // Step 1 (spec) should have both MCP servers injected: ask_user and dev-read. specCall := client.calls[0] - if len(specCall.mcpServers) != 1 { - t.Fatalf("expected 1 MCP server on spec call, got %d", len(specCall.mcpServers)) + if len(specCall.mcpServers) != 2 { + t.Fatalf("expected 2 MCP servers on spec call, got %d", len(specCall.mcpServers)) } if specCall.mcpServers[0].Name != "opendev" { - t.Errorf("expected MCP server name %q, got %q", "opendev", specCall.mcpServers[0].Name) + t.Errorf("expected mcpServers[0].Name %q, got %q", "opendev", specCall.mcpServers[0].Name) + } + if specCall.mcpServers[0].URL != askUserURL+"/mcp" { + t.Errorf("expected mcpServers[0].URL %q, got %q", askUserURL+"/mcp", specCall.mcpServers[0].URL) + } + if specCall.mcpServers[1].Name != "dev-read" { + t.Errorf("expected mcpServers[1].Name %q, got %q", "dev-read", specCall.mcpServers[1].Name) } - if specCall.mcpServers[0].URL != mcpURL+"/mcp" { - t.Errorf("expected MCP URL %q, got %q", mcpURL+"/mcp", specCall.mcpServers[0].URL) + wantReadURL := codeMCPURL + "/my-repo/main/read/mcp" + if specCall.mcpServers[1].URL != wantReadURL { + t.Errorf("expected mcpServers[1].URL %q, got %q", wantReadURL, specCall.mcpServers[1].URL) } - // Steps 2–4 should have no MCP servers. + // Steps 2–5 should have no MCP servers. for i, call := range client.calls[1:] { if len(call.mcpServers) != 0 { t.Errorf("step %d should have no MCP servers, got %d", i+2, len(call.mcpServers)) @@ -306,7 +303,7 @@ func TestRun_SpecStepInjectsMCPServer(t *testing.T) { func TestRun_AgentErrorPropagates(t *testing.T) { client := &mockAgentClient{responses: []string{}} // no responses → first call fails - orch := New(client, testAgentNames(), "", "") + orch := New(client, testAgentNames(), "", "", nil) _, err := orch.Run(context.Background(), "anything") if err == nil { @@ -319,7 +316,7 @@ func TestRun_EmptyFeatureList(t *testing.T) { mustJSON(SystemSpec{Title: "T", Description: "D"}), // step 1 ok "[]", // step 2 returns empty features }} - orch := New(client, testAgentNames(), "", "") + orch := New(client, testAgentNames(), "", "", nil) _, err := orch.Run(context.Background(), "anything") if err == nil { @@ -334,9 +331,9 @@ func TestGenerateSpec_StripsMarkdown(t *testing.T) { wrapped := fmt.Sprintf("```json\n%s\n```", mustJSON(spec)) client := &mockAgentClient{responses: []string{wrapped}} - orch := New(client, testAgentNames(), "", "") + orch := New(client, testAgentNames(), "", "", nil) - got, err := orch.generateSpec(context.Background(), "build T") + got, err := orch.generateSpec(context.Background(), "build T", "") if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/questions/store.go b/questions/store.go new file mode 100644 index 0000000..9c2f60d --- /dev/null +++ b/questions/store.go @@ -0,0 +1,125 @@ +// Package questions provides a thread-safe in-memory store for interactive +// questions posed by MCP tool calls. It allows an external TUI (or any HTTP +// client) to poll for pending questions and post answers asynchronously, so +// that the ask_user MCP tool does not need direct access to stdin. +package questions + +import ( + "context" + "errors" + "fmt" + "sync" + "time" + + "github.com/google/uuid" +) + +// ErrNotFound is returned when an operation targets a question ID that does +// not exist in the store. +var ErrNotFound = errors.New("question not found") + +// ErrAlreadyAnswered is returned when an answer is posted for a question that +// has already been answered. +var ErrAlreadyAnswered = errors.New("question already answered") + +// Question is a single interactive question waiting for (or having received) +// a user-supplied answer. +type Question struct { + ID string `json:"id"` + Text string `json:"text"` + CreatedAt time.Time `json:"created_at"` + Answer *string `json:"answer"` // nil until answered +} + +// entry holds the public Question and an internal channel used to deliver the +// answer to the blocked Ask call. +type entry struct { + question Question + answerCh chan string // buffered(1); closed/sent to exactly once +} + +// Store is a thread-safe in-memory queue of questions and their answers. +// The zero value is not usable — create one with NewStore. +type Store struct { + mu sync.Mutex + entries map[string]*entry +} + +// NewStore returns an initialised, empty Store. +func NewStore() *Store { + return &Store{ + entries: make(map[string]*entry), + } +} + +// Ask adds a new question with the given text to the store, then blocks until +// either an answer is posted via Answer or the context is cancelled. +// +// On success it returns the trimmed answer string. On context cancellation it +// returns an error and removes the question from the store. +func (s *Store) Ask(ctx context.Context, text string) (string, error) { + id := uuid.New().String() + e := &entry{ + question: Question{ + ID: id, + Text: text, + CreatedAt: time.Now().UTC(), + }, + answerCh: make(chan string, 1), + } + + s.mu.Lock() + s.entries[id] = e + s.mu.Unlock() + + select { + case answer := <-e.answerCh: + return answer, nil + case <-ctx.Done(): + // Clean up the unanswered entry so it no longer appears as pending. + s.mu.Lock() + delete(s.entries, id) + s.mu.Unlock() + return "", fmt.Errorf("ask_user cancelled for question %q: %w", id, ctx.Err()) + } +} + +// Pending returns all questions that have not yet been answered, in an +// unspecified order. The returned slice is a snapshot; callers should not +// mutate the Question values. +func (s *Store) Pending() []Question { + s.mu.Lock() + defer s.mu.Unlock() + + out := make([]Question, 0, len(s.entries)) + for _, e := range s.entries { + if e.question.Answer == nil { + out = append(out, e.question) + } + } + return out +} + +// Answer delivers answer to the Ask call that is blocking on question id. +// It returns ErrNotFound if id is unknown and ErrAlreadyAnswered if the +// question has already been answered. +func (s *Store) Answer(id string, answer string) error { + s.mu.Lock() + e, ok := s.entries[id] + if !ok { + s.mu.Unlock() + return ErrNotFound + } + if e.question.Answer != nil { + s.mu.Unlock() + return ErrAlreadyAnswered + } + // Record the answer on the Question so Pending() stops returning it. + e.question.Answer = &answer + s.mu.Unlock() + + // Deliver to the blocked Ask call (non-blocking because the channel is + // buffered(1) and we guarantee it is sent to at most once). + e.answerCh <- answer + return nil +} diff --git a/questions/store_test.go b/questions/store_test.go new file mode 100644 index 0000000..940fe17 --- /dev/null +++ b/questions/store_test.go @@ -0,0 +1,254 @@ +package questions_test + +import ( + "context" + "sync" + "testing" + "time" + + "github.com/iamangus/OpenDev/questions" +) + +// ── Ask / Answer ────────────────────────────────────────────────────────────── + +func TestAskAndAnswer(t *testing.T) { + s := questions.NewStore() + + const text = "What is your name?" + const want = "Alice" + + // Ask blocks, so run it in a goroutine. + var got string + var askErr error + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + got, askErr = s.Ask(context.Background(), text) + }() + + // Give Ask time to register the question before we call Pending. + time.Sleep(10 * time.Millisecond) + + pending := s.Pending() + if len(pending) != 1 { + t.Fatalf("expected 1 pending question, got %d", len(pending)) + } + if pending[0].Text != text { + t.Errorf("pending question text = %q, want %q", pending[0].Text, text) + } + if pending[0].Answer != nil { + t.Error("expected Answer to be nil before answering") + } + + id := pending[0].ID + if err := s.Answer(id, want); err != nil { + t.Fatalf("Answer returned unexpected error: %v", err) + } + + wg.Wait() + + if askErr != nil { + t.Fatalf("Ask returned unexpected error: %v", askErr) + } + if got != want { + t.Errorf("Ask returned %q, want %q", got, want) + } +} + +// ── Pending filters answered questions ──────────────────────────────────────── + +func TestPendingFiltersAnswered(t *testing.T) { + s := questions.NewStore() + + var ids []string + var wg sync.WaitGroup + + // Ask two questions concurrently. + for i := 0; i < 2; i++ { + wg.Add(1) + go func(n int) { + defer wg.Done() + _, _ = s.Ask(context.Background(), "question") + }(i) + } + + // Wait for both to register. + time.Sleep(20 * time.Millisecond) + + pending := s.Pending() + if len(pending) != 2 { + t.Fatalf("expected 2 pending questions, got %d", len(pending)) + } + for _, q := range pending { + ids = append(ids, q.ID) + } + + // Answer one of them. + if err := s.Answer(ids[0], "ans"); err != nil { + t.Fatalf("Answer: %v", err) + } + + // Give the goroutine a moment to unblock. + time.Sleep(10 * time.Millisecond) + + pending = s.Pending() + if len(pending) != 1 { + t.Errorf("expected 1 pending after answering one, got %d", len(pending)) + } + if pending[0].ID == ids[0] { + t.Error("answered question should not appear in Pending") + } + + // Answer the second so the goroutines finish cleanly. + _ = s.Answer(ids[1], "ans") + wg.Wait() +} + +// ── Concurrent Ask calls — each answered independently ──────────────────────── + +func TestConcurrentAsks(t *testing.T) { + s := questions.NewStore() + const n = 10 + + type result struct { + answer string + err error + } + results := make([]result, n) + var wg sync.WaitGroup + + for i := 0; i < n; i++ { + wg.Add(1) + go func(idx int) { + defer wg.Done() + ans, err := s.Ask(context.Background(), "question") + results[idx] = result{ans, err} + }(i) + } + + // Poll until all n questions are registered. + deadline := time.Now().Add(time.Second) + for { + if len(s.Pending()) == n { + break + } + if time.Now().After(deadline) { + t.Fatalf("timed out waiting for %d pending questions", n) + } + time.Sleep(5 * time.Millisecond) + } + + // Answer each one. + for _, q := range s.Pending() { + if err := s.Answer(q.ID, "reply-"+q.ID); err != nil { + t.Errorf("Answer(%q): %v", q.ID, err) + } + } + + wg.Wait() + + for i, r := range results { + if r.err != nil { + t.Errorf("results[%d]: unexpected error: %v", i, r.err) + } + if r.answer == "" { + t.Errorf("results[%d]: empty answer", i) + } + } +} + +// ── Context cancellation ────────────────────────────────────────────────────── + +func TestAskCancelledByContext(t *testing.T) { + s := questions.NewStore() + + ctx, cancel := context.WithCancel(context.Background()) + + done := make(chan error, 1) + go func() { + _, err := s.Ask(ctx, "Will this be answered?") + done <- err + }() + + time.Sleep(10 * time.Millisecond) + cancel() + + select { + case err := <-done: + if err == nil { + t.Fatal("expected an error from cancelled Ask, got nil") + } + case <-time.After(time.Second): + t.Fatal("Ask did not unblock after context cancellation") + } + + // The question should be removed from the store. + if len(s.Pending()) != 0 { + t.Errorf("expected 0 pending after cancellation, got %d", len(s.Pending())) + } +} + +// ── Error cases ─────────────────────────────────────────────────────────────── + +func TestAnswerNotFound(t *testing.T) { + s := questions.NewStore() + err := s.Answer("no-such-id", "whatever") + if err != questions.ErrNotFound { + t.Errorf("expected ErrNotFound, got %v", err) + } +} + +func TestAnswerAlreadyAnswered(t *testing.T) { + s := questions.NewStore() + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + _, _ = s.Ask(context.Background(), "once") + }() + + time.Sleep(10 * time.Millisecond) + + id := s.Pending()[0].ID + if err := s.Answer(id, "first"); err != nil { + t.Fatalf("first Answer: %v", err) + } + + wg.Wait() + + if err := s.Answer(id, "second"); err != questions.ErrAlreadyAnswered { + t.Errorf("expected ErrAlreadyAnswered, got %v", err) + } +} + +// ── CreatedAt is populated ──────────────────────────────────────────────────── + +func TestQuestionCreatedAt(t *testing.T) { + s := questions.NewStore() + before := time.Now().UTC() + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + _, _ = s.Ask(context.Background(), "when was I created?") + }() + + time.Sleep(10 * time.Millisecond) + + after := time.Now().UTC() + pending := s.Pending() + if len(pending) != 1 { + t.Fatalf("expected 1 pending, got %d", len(pending)) + } + + ts := pending[0].CreatedAt + if ts.Before(before) || ts.After(after) { + t.Errorf("CreatedAt %v not in range [%v, %v]", ts, before, after) + } + + _ = s.Answer(pending[0].ID, "now") + wg.Wait() +}