From 2f815ea2fbdea75a56ea7b7a00d15ef979fc0a8e Mon Sep 17 00:00:00 2001 From: whoisasx Date: Tue, 2 Jun 2026 06:07:01 +0530 Subject: [PATCH 01/20] feat: add provider-neutral scm observer --- backend/internal/adapters/scm/github/auth.go | 38 + .../internal/adapters/scm/github/client.go | 51 ++ backend/internal/adapters/scm/github/doc.go | 20 +- .../adapters/scm/github/observer_provider.go | 537 ++++++++++++ backend/internal/daemon/daemon.go | 1 + backend/internal/daemon/lifecycle_wiring.go | 8 +- backend/internal/daemon/scm_wiring.go | 40 + backend/internal/domain/pr.go | 49 ++ backend/internal/lifecycle/manager_test.go | 22 + backend/internal/lifecycle/reactions.go | 81 +- backend/internal/observe/scm/observer.go | 770 ++++++++++++++++++ backend/internal/observe/scm/observer_test.go | 388 +++++++++ backend/internal/ports/outbound.go | 7 + backend/internal/ports/scm_observations.go | 140 ++++ backend/internal/session_manager/manager.go | 2 +- backend/internal/storage/sqlite/gen/models.go | 62 +- backend/internal/storage/sqlite/gen/pr.sql.go | 200 ++++- .../storage/sqlite/gen/pr_checks.sql.go | 16 +- .../storage/sqlite/gen/pr_comment.sql.go | 15 +- .../sqlite/gen/pr_review_threads.sql.go | 94 +++ .../storage/sqlite/gen/projects.sql.go | 2 +- .../migrations/0003_scm_observer_schema.sql | 93 +++ .../internal/storage/sqlite/queries/pr.sql | 64 +- .../storage/sqlite/queries/pr_checks.sql | 10 +- .../storage/sqlite/queries/pr_comment.sql | 6 +- .../sqlite/queries/pr_review_threads.sql | 17 + .../internal/storage/sqlite/store/pr_store.go | 185 ++++- .../storage/sqlite/store/store_test.go | 47 ++ 28 files changed, 2883 insertions(+), 82 deletions(-) create mode 100644 backend/internal/adapters/scm/github/observer_provider.go create mode 100644 backend/internal/daemon/scm_wiring.go create mode 100644 backend/internal/observe/scm/observer.go create mode 100644 backend/internal/observe/scm/observer_test.go create mode 100644 backend/internal/ports/scm_observations.go create mode 100644 backend/internal/storage/sqlite/gen/pr_review_threads.sql.go create mode 100644 backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql create mode 100644 backend/internal/storage/sqlite/queries/pr_review_threads.sql diff --git a/backend/internal/adapters/scm/github/auth.go b/backend/internal/adapters/scm/github/auth.go index 3349d7c3..12993873 100644 --- a/backend/internal/adapters/scm/github/auth.go +++ b/backend/internal/adapters/scm/github/auth.go @@ -59,6 +59,44 @@ func (s EnvTokenSource) Token(context.Context) (string, error) { return "", ErrNoToken } +// FallbackTokenSource tries each source in order, returning the first token. A +// source that returns ErrNoToken is skipped; other errors are remembered and +// surfaced if no later source yields a token. +type FallbackTokenSource []TokenSource + +// Token returns the first non-empty token from the configured sources. +func (s FallbackTokenSource) Token(ctx context.Context) (string, error) { + var firstErr error + for _, src := range s { + if src == nil { + continue + } + tok, err := src.Token(ctx) + if err == nil { + return tok, nil + } + if errors.Is(err, ErrNoToken) { + continue + } + if firstErr == nil { + firstErr = err + } + } + if firstErr != nil { + return "", firstErr + } + return "", ErrNoToken +} + +// InvalidateToken forwards cache invalidation to sources that support it. +func (s FallbackTokenSource) InvalidateToken() { + for _, src := range s { + if inv, ok := src.(tokenInvalidator); ok { + inv.InvalidateToken() + } + } +} + const defaultGHTokenCacheTTL = 5 * time.Minute // GHTokenSource shells out to `gh auth token` when env vars are not diff --git a/backend/internal/adapters/scm/github/client.go b/backend/internal/adapters/scm/github/client.go index 13897672..c342aa06 100644 --- a/backend/internal/adapters/scm/github/client.go +++ b/backend/internal/adapters/scm/github/client.go @@ -127,6 +127,50 @@ type RESTResponse struct { Body []byte } +// doRESTWithETag performs one REST GET with an explicit caller-owned ETag. +// Unlike doREST, it does not replay cached bodies or mutate the client's +// internal compatibility cache; it exists for the provider-neutral SCM observer, +// whose ETag cache belongs to the observer orchestration layer. +func (c *Client) doRESTWithETag(ctx context.Context, path string, q url.Values, etag string) (RESTResponse, error) { + u, err := c.restURL(path, q) + if err != nil { + return RESTResponse{}, fmt.Errorf("github scm: build %s URL: %w", path, err) + } + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, http.NoBody) + if err != nil { + return RESTResponse{}, fmt.Errorf("github scm: build GET %s request: %w", path, err) + } + req.Header.Set("Accept", "application/vnd.github+json") + req.Header.Set("X-GitHub-Api-Version", "2022-11-28") + req.Header.Set("User-Agent", c.userAgent) + if etag != "" { + req.Header.Set("If-None-Match", etag) + } + if err := c.authorize(ctx, req); err != nil { + return RESTResponse{}, err + } + resp, err := c.http.Do(req) + if err != nil { + return RESTResponse{}, fmt.Errorf("github scm: GET %s: %w", path, err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode == http.StatusNotModified { + return RESTResponse{StatusCode: resp.StatusCode, NotModified: true, ETag: firstNonEmptyHeader(resp.Header.Get("ETag"), etag)}, nil + } + b, readErr := io.ReadAll(resp.Body) + if readErr != nil { + return RESTResponse{}, fmt.Errorf("github scm: read %s body: %w", path, readErr) + } + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + return RESTResponse{StatusCode: resp.StatusCode, ETag: resp.Header.Get("ETag"), Body: b}, nil + } + err = classifyError(resp, b) + if errors.Is(err, ErrAuthFailed) { + c.invalidateToken() + } + return RESTResponse{StatusCode: resp.StatusCode, Body: b}, err +} + // doREST performs one REST request with ETag-aware caching. The cache is // scoped to the (method, path, query) tuple so repeated PR observations // against the same endpoint replay from the cache while observations of a @@ -434,3 +478,10 @@ func githubMessage(body []byte) string { } return strings.TrimSpace(string(body)) } + +func firstNonEmptyHeader(a, b string) string { + if a != "" { + return a + } + return b +} diff --git a/backend/internal/adapters/scm/github/doc.go b/backend/internal/adapters/scm/github/doc.go index 6bc7b146..4b496379 100644 --- a/backend/internal/adapters/scm/github/doc.go +++ b/backend/internal/adapters/scm/github/doc.go @@ -1,6 +1,6 @@ -// Package github observes GitHub pull requests for the PR Manager. +// Package github observes GitHub pull requests for AO's SCM integrations. // -// The exported surface is one function: +// The compatibility exported surface is: // // (*Provider).Observe(ctx, prURL) (ports.PRObservation, error) // @@ -11,9 +11,11 @@ // GET on /repos/{o}/{r}/actions/jobs/{job_id}/logs to splice the last 20 // lines of the failed job into the observation. // -// The poller / cadence loop is intentionally NOT in this package — it is -// a follow-up PR. This adapter is the observation primitive that loop -// will call. +// The provider-neutral SCM observer uses the same Provider for lower-level +// primitives: repo/commit ETag guards, branch-to-PR detection, GraphQL PR +// batching, failed-job log tails, and review-thread pagination. The polling +// loop itself is intentionally not in this package; it lives in +// internal/observe/scm. // // # State mapping // @@ -105,16 +107,18 @@ // // # Caching // -// The Client maintains an in-memory ETag cache per (method, path, query). +// The legacy Observe path's Client maintains an in-memory ETag cache per +// (method, path, query). // On the second observation of the same PR the REST GET sends // If-None-Match and replays the cached body on a 304 — GraphQL is always // re-fetched because it doesn't expose ETag-based revalidation. // +// The provider-neutral observer owns its own ETag cache and calls explicit +// provider guard methods that do not mutate the legacy Client cache. +// // # Out of scope (intentionally — these are different PRs / lanes) // -// - The poller loop and cadence selection (issue #35). // - Webhook ingestion (this package is polling-only). -// - Persistence (PR Manager owns the row mapping; see internal/service/pr). // - Linear / GitLab providers (separate PRs). // - Issue tracking (separate lane, see internal/adapters/tracker). // - Comment-injection-into-session-context (Messenger lane, not SCM). diff --git a/backend/internal/adapters/scm/github/observer_provider.go b/backend/internal/adapters/scm/github/observer_provider.go new file mode 100644 index 00000000..5ce75c11 --- /dev/null +++ b/backend/internal/adapters/scm/github/observer_provider.go @@ -0,0 +1,537 @@ +package github + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "net/url" + "sort" + "strconv" + "strings" + "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +const scmBatchCheckContextLimit = 100 + +// ParseRepository normalizes a GitHub remote/origin URL into a provider-neutral +// repository key. It accepts https://github.com/owner/repo(.git), +// git@github.com:owner/repo(.git), and path-only owner/repo inputs used by tests. +func (p *Provider) ParseRepository(remote string) (ports.SCMRepo, bool) { + repo, ok := parseGitHubRepo(remote) + return repo, ok +} + +// RepoPRListGuard checks GitHub's cheap open-PR-list ETag guard. +func (p *Provider) RepoPRListGuard(ctx context.Context, repo ports.SCMRepo, etag string) (ports.SCMGuardResult, error) { + q := url.Values{} + q.Set("state", "open") + q.Set("sort", "updated") + q.Set("direction", "desc") + q.Set("per_page", "1") + resp, err := p.client.doRESTWithETag(ctx, repoPath(repo.Owner, repo.Name, "pulls"), q, etag) + if err != nil { + return ports.SCMGuardResult{}, err + } + return ports.SCMGuardResult{ETag: firstNonEmptyHeader(resp.ETag, etag), NotModified: resp.NotModified}, nil +} + +// DetectPRByBranch finds an open PR whose head branch matches the session branch. +func (p *Provider) DetectPRByBranch(ctx context.Context, repo ports.SCMRepo, branch string) (ports.SCMPRObservation, error) { + branch = strings.TrimSpace(branch) + if branch == "" { + return ports.SCMPRObservation{}, fmt.Errorf("%w: empty branch", ErrNotFound) + } + pulls, err := p.detectPRByHead(ctx, repo, repo.Owner+":"+branch) + if err != nil { + return ports.SCMPRObservation{}, err + } + if len(pulls) == 0 { + return ports.SCMPRObservation{}, fmt.Errorf("%w: no open PR for branch %s", ErrNotFound, branch) + } + return restListPullToSCM(pulls[0]), nil +} + +func (p *Provider) detectPRByHead(ctx context.Context, repo ports.SCMRepo, head string) ([]restListPull, error) { + q := url.Values{} + q.Set("state", "open") + q.Set("head", head) + q.Set("per_page", "10") + resp, err := p.client.doREST(ctx, http.MethodGet, repoPath(repo.Owner, repo.Name, "pulls"), q, nil) + if err != nil { + return nil, err + } + var pulls []restListPull + if err := json.Unmarshal(resp.Body, &pulls); err != nil { + return nil, fmt.Errorf("github scm: decode branch PR list: %w", err) + } + return pulls, nil +} + +// CommitChecksGuard checks GitHub's per-commit check-runs ETag guard. +func (p *Provider) CommitChecksGuard(ctx context.Context, repo ports.SCMRepo, headSHA, etag string) (ports.SCMGuardResult, error) { + if strings.TrimSpace(headSHA) == "" { + return ports.SCMGuardResult{}, fmt.Errorf("%w: empty head sha", ErrNotFound) + } + q := url.Values{} + q.Set("per_page", "1") + resp, err := p.client.doRESTWithETag(ctx, repoPath(repo.Owner, repo.Name, "commits", headSHA, "check-runs"), q, etag) + if err != nil { + return ports.SCMGuardResult{}, err + } + return ports.SCMGuardResult{ETag: firstNonEmptyHeader(resp.ETag, etag), NotModified: resp.NotModified}, nil +} + +// FetchPullRequests fetches normalized PR/check metadata for up to 25 PR refs in +// one GraphQL request. The observer owns chunking; this method rejects larger +// batches so tests catch accidental over-batching. +func (p *Provider) FetchPullRequests(ctx context.Context, refs []ports.SCMPRRef) ([]ports.SCMObservation, error) { + if len(refs) == 0 { + return nil, nil + } + if len(refs) > 25 { + return nil, fmt.Errorf("github scm: batch size %d exceeds 25", len(refs)) + } + query, aliases := buildSCMBatchQuery(refs) + data, err := p.client.doGraphQL(ctx, query, nil) + if err != nil { + return nil, err + } + out := make([]ports.SCMObservation, 0, len(refs)) + for i, ref := range refs { + repoData, _ := data[aliases[i]].(map[string]any) + pr, _ := repoData["pullRequest"].(map[string]any) + if pr == nil { + continue + } + out = append(out, scmObservationFromGraphQL(ref, pr)) + } + return out, nil +} + +// FetchFailedCheckLogTail fetches and tails a failed GitHub Actions job log. +func (p *Provider) FetchFailedCheckLogTail(ctx context.Context, repo ports.SCMRepo, check ports.SCMCheckObservation) (string, error) { + if check.ProviderID == "" { + return "", nil + } + jobID, err := strconv.ParseInt(check.ProviderID, 10, 64) + if err != nil { + return "", fmt.Errorf("github scm: parse check provider id %q: %w", check.ProviderID, err) + } + if jobID <= 0 { + return "", nil + } + log, err := p.fetchJobLogTail(ctx, repo.Owner, repo.Name, jobID) + if err != nil { + return "", err + } + return tailLines(log, ciFailureLogTailLines), nil +} + +// FetchReviewThreads fetches review threads separately from the fast PR/CI path. +func (p *Provider) FetchReviewThreads(ctx context.Context, ref ports.SCMPRRef) (ports.SCMReviewObservation, error) { + var all []ports.SCMReviewThreadObservation + var decision string + var cursor string + for { + query := buildReviewThreadsQuery(ref, cursor) + data, err := p.client.doGraphQL(ctx, query, nil) + if err != nil { + return ports.SCMReviewObservation{}, err + } + repoData, _ := data["repo"].(map[string]any) + pr, _ := repoData["pullRequest"].(map[string]any) + if pr == nil { + return ports.SCMReviewObservation{}, fmt.Errorf("%w: pull request not found in review response", ErrNotFound) + } + decision = string(reviewDecisionFromGraphQL(pr)) + threads, _ := pr["reviewThreads"].(map[string]any) + for _, th := range nodes(threads["nodes"]) { + all = append(all, scmThreadFromGraphQL(th)) + } + pi, _ := threads["pageInfo"].(map[string]any) + if !boolv(pi["hasNextPage"]) { + break + } + cursor = str(pi["endCursor"]) + if cursor == "" { + break + } + } + return ports.SCMReviewObservation{Decision: decision, Threads: all}, nil +} + +type restListPull struct { + URL string `json:"url"` + HTMLURL string `json:"html_url"` + Number int `json:"number"` + State string `json:"state"` + Draft bool `json:"draft"` + Title string `json:"title"` + Head struct { + Ref string `json:"ref"` + SHA string `json:"sha"` + } `json:"head"` + Base struct { + Ref string `json:"ref"` + SHA string `json:"sha"` + } `json:"base"` + User struct { + Login string `json:"login"` + Type string `json:"type"` + } `json:"user"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` +} + +func restListPullToSCM(pull restListPull) ports.SCMPRObservation { + closed := strings.EqualFold(pull.State, "closed") + return ports.SCMPRObservation{ + URL: firstNonEmpty(pull.HTMLURL, pull.URL), + Number: pull.Number, + State: normalizePRState(pull.Draft, false, closed), + Draft: pull.Draft, + Closed: closed, + SourceBranch: pull.Head.Ref, + TargetBranch: pull.Base.Ref, + HeadSHA: pull.Head.SHA, + Title: pull.Title, + Author: pull.User.Login, + BaseSHA: pull.Base.SHA, + ProviderState: pull.State, + HTMLURL: pull.HTMLURL, + CreatedAtProvider: parseGitHubTime(pull.CreatedAt), + UpdatedAtProvider: parseGitHubTime(pull.UpdatedAt), + } +} + +func buildSCMBatchQuery(refs []ports.SCMPRRef) (string, []string) { + aliases := make([]string, len(refs)) + var b strings.Builder + b.WriteString("query{\n") + for i, ref := range refs { + alias := fmt.Sprintf("pr%d", i) + aliases[i] = alias + b.WriteString(fmt.Sprintf("%s: repository(owner:%s,name:%s){ pullRequest(number:%d){ %s } }\n", + alias, graphQLString(ref.Repo.Owner), graphQLString(ref.Repo.Name), ref.Number, scmPRFields())) + } + b.WriteString("}") + return b.String(), aliases +} + +func scmPRFields() string { + return strings.ReplaceAll(` +number url state isDraft merged closed title additions deletions changedFiles +mergeable mergeStateStatus reviewDecision headRefName headRefOid baseRefName baseRefOid +createdAt updatedAt mergedAt closedAt +author{ login __typename } +mergeCommit{ oid } +commits(last:1){ nodes{ commit{ oid statusCheckRollup{ state contexts(first:CONTEXT_LIMIT){ nodes{ + __typename + ... on CheckRun { name status conclusion detailsUrl url databaseId } + ... on StatusContext { context state targetUrl } +} pageInfo{ hasNextPage } } } } } } +`, "CONTEXT_LIMIT", strconv.Itoa(scmBatchCheckContextLimit)) +} + +func scmObservationFromGraphQL(ref ports.SCMPRRef, pr map[string]any) ports.SCMObservation { + checks := scmChecksFromGraphQL(pr) + failed := failedSCMChecks(checks) + ci := string(ciSummaryFromGraphQL(pr)) + review := string(reviewDecisionFromGraphQL(pr)) + providerMergeable := str(pr["mergeable"]) + providerMergeState := str(pr["mergeStateStatus"]) + merged := boolv(pr["merged"]) + closed := boolv(pr["closed"]) && !merged + draft := boolv(pr["isDraft"]) + obs := ports.SCMObservation{ + Fetched: true, + Provider: ref.Repo.Provider, + Host: ref.Repo.Host, + Repo: repoFullName(ref.Repo), + PR: ports.SCMPRObservation{ + URL: firstNonEmpty(str(pr["url"]), ref.URL), + Number: int(num(pr["number"])), + State: normalizePRState(draft, merged, closed), + Draft: draft, + Merged: merged, + Closed: closed, + SourceBranch: str(pr["headRefName"]), + TargetBranch: str(pr["baseRefName"]), + HeadSHA: firstNonEmpty(str(pr["headRefOid"]), latestCommitOID(pr)), + Title: str(pr["title"]), + Additions: int(num(pr["additions"])), + Deletions: int(num(pr["deletions"])), + ChangedFiles: int(num(pr["changedFiles"])), + Author: authorLogin(pr["author"]), + BaseSHA: str(pr["baseRefOid"]), + MergeCommitSHA: mergeCommitOID(pr), + ProviderState: str(pr["state"]), + ProviderMergeable: providerMergeable, + ProviderMergeStateStatus: providerMergeState, + HTMLURL: str(pr["url"]), + CreatedAtProvider: parseGitHubTime(str(pr["createdAt"])), + UpdatedAtProvider: parseGitHubTime(str(pr["updatedAt"])), + MergedAtProvider: parseGitHubTime(str(pr["mergedAt"])), + ClosedAtProvider: parseGitHubTime(str(pr["closedAt"])), + }, + CI: ports.SCMCIObservation{ + Summary: ci, + HeadSHA: firstNonEmpty(str(pr["headRefOid"]), latestCommitOID(pr)), + Checks: checks, + FailedChecks: failed, + FailedFingerprint: githubFailedFingerprint(firstNonEmpty(str(pr["headRefOid"]), latestCommitOID(pr)), failed), + }, + Review: ports.SCMReviewObservation{Decision: review}, + } + obs.Mergeability = mergeabilityObservation(providerMergeable, providerMergeState, ci, review) + return obs +} + +func scmChecksFromGraphQL(pr map[string]any) []ports.SCMCheckObservation { + roll := statusRollup(pr) + contexts, _ := roll["contexts"].(map[string]any) + rawNodes := nodes(contexts["nodes"]) + out := make([]ports.SCMCheckObservation, 0, len(rawNodes)) + for _, n := range rawNodes { + typ := str(n["__typename"]) + var ch ports.SCMCheckObservation + switch typ { + case "CheckRun": + ch.Name = str(n["name"]) + ch.Status = strings.ToLower(str(n["status"])) + ch.Conclusion = strings.ToLower(str(n["conclusion"])) + ch.URL = firstNonEmpty(str(n["detailsUrl"]), str(n["url"])) + if id := int64(num(n["databaseId"])); id > 0 { + ch.ProviderID = strconv.FormatInt(id, 10) + } + case "StatusContext": + ch.Name = str(n["context"]) + ch.Status = strings.ToLower(str(n["state"])) + ch.Conclusion = strings.ToLower(str(n["state"])) + ch.URL = str(n["targetUrl"]) + default: + continue + } + if ch.Name == "" { + continue + } + ch.Status = string(checkStatusFromGraphQL(n)) + out = append(out, ch) + } + return out +} + +func failedSCMChecks(checks []ports.SCMCheckObservation) []ports.SCMCheckObservation { + out := make([]ports.SCMCheckObservation, 0, len(checks)) + for _, ch := range checks { + status := domain.PRCheckStatus(ch.Status) + if status == domain.PRCheckFailed || status == domain.PRCheckCancelled { + out = append(out, ch) + } + } + return out +} + +func githubFailedFingerprint(head string, checks []ports.SCMCheckObservation) string { + if len(checks) == 0 { + return "" + } + parts := make([]string, 0, len(checks)) + for _, ch := range checks { + parts = append(parts, strings.Join([]string{head, ch.Name, ch.Status, ch.Conclusion, ch.URL, ch.ProviderID}, "\x00")) + } + sort.Strings(parts) + sum := sha256.Sum256([]byte(strings.Join(parts, "\x1e"))) + return hex.EncodeToString(sum[:]) +} + +func mergeabilityObservation(providerMergeable, providerMergeState, ci, review string) ports.SCMMergeabilityObservation { + state := strings.ToUpper(strings.TrimSpace(providerMergeState)) + mergeable := strings.ToUpper(strings.TrimSpace(providerMergeable)) + out := ports.SCMMergeabilityObservation{State: string(domain.MergeUnknown)} + addBlocker := func(b string) { out.Blockers = append(out.Blockers, b) } + if state == "DIRTY" || mergeable == "CONFLICTING" { + out.State = string(domain.MergeConflicting) + out.Conflict = true + addBlocker("conflicts") + return out + } + if state == "BEHIND" || state == "BEHIND_BASE" { + out.BehindBase = true + addBlocker("behind_base") + } + if state == "BLOCKED" { + out.State = string(domain.MergeBlocked) + addBlocker("blocked_by_provider") + } + if ci == string(domain.CIFailing) { + out.State = string(domain.MergeBlocked) + addBlocker("ci_failing") + } + if review == string(domain.ReviewChangesRequest) { + out.State = string(domain.MergeBlocked) + addBlocker("changes_requested") + } + if out.State == string(domain.MergeBlocked) { + return out + } + if state == "UNSTABLE" { + out.State = string(domain.MergeUnstable) + return out + } + if mergeable == "MERGEABLE" && (state == "CLEAN" || state == "HAS_HOOKS" || state == "") { + out.State = string(domain.MergeMergeable) + out.Mergeable = true + return out + } + return out +} + +func buildReviewThreadsQuery(ref ports.SCMPRRef, cursor string) string { + after := "null" + if cursor != "" { + after = graphQLString(cursor) + } + return fmt.Sprintf(`query{ +repo: repository(owner:%s,name:%s){ pullRequest(number:%d){ reviewDecision reviewThreads(first:100, after:%s){ nodes{ + id isResolved path line + comments(first:100){ nodes{ id body url author{ login __typename } } } +} pageInfo{ hasNextPage endCursor } } } } +}`, graphQLString(ref.Repo.Owner), graphQLString(ref.Repo.Name), ref.Number, after) +} + +func scmThreadFromGraphQL(th map[string]any) ports.SCMReviewThreadObservation { + out := ports.SCMReviewThreadObservation{ + ID: str(th["id"]), + Path: str(th["path"]), + Line: int(num(th["line"])), + Resolved: boolv(th["isResolved"]), + } + comments, _ := th["comments"].(map[string]any) + for _, cn := range nodes(comments["nodes"]) { + author, _ := cn["author"].(map[string]any) + isBot := isBotAuthor(author) + if isBot { + out.IsBot = true + } + out.Comments = append(out.Comments, ports.SCMReviewCommentObservation{ + ID: str(cn["id"]), + Author: str(author["login"]), + Body: str(cn["body"]), + URL: str(cn["url"]), + IsBot: isBot, + }) + } + return out +} + +func parseGitHubRepo(remote string) (ports.SCMRepo, bool) { + raw := strings.TrimSpace(remote) + if raw == "" { + return ports.SCMRepo{}, false + } + if strings.HasPrefix(raw, "git@") { + raw = strings.TrimPrefix(raw, "git@") + parts := strings.SplitN(raw, ":", 2) + if len(parts) != 2 { + return ports.SCMRepo{}, false + } + host := strings.ToLower(parts[0]) + owner, name, ok := splitOwnerRepo(parts[1]) + return makeGitHubRepo(host, owner, name), ok && isGitHubHost(host) + } + if !strings.Contains(raw, "://") && strings.Count(strings.Trim(raw, "/"), "/") == 1 { + owner, name, ok := splitOwnerRepo(raw) + return makeGitHubRepo("github.com", owner, name), ok + } + u, err := url.Parse(raw) + if err != nil { + return ports.SCMRepo{}, false + } + host := strings.ToLower(u.Host) + owner, name, ok := splitOwnerRepo(u.Path) + return makeGitHubRepo(host, owner, name), ok && isGitHubHost(host) +} + +func splitOwnerRepo(p string) (string, string, bool) { + parts := strings.Split(strings.Trim(p, "/"), "/") + if len(parts) < 2 { + return "", "", false + } + owner := parts[0] + name := strings.TrimSuffix(parts[1], ".git") + return owner, name, owner != "" && name != "" +} + +func makeGitHubRepo(host, owner, name string) ports.SCMRepo { + return ports.SCMRepo{Provider: "github", Host: host, Owner: owner, Name: name, Repo: owner + "/" + name} +} + +func isGitHubHost(host string) bool { + return host == "github.com" || host == "www.github.com" || host == "api.github.com" || strings.HasSuffix(host, ".github.com") || strings.HasSuffix(host, ".ghe.io") +} + +func graphQLString(s string) string { + b, err := json.Marshal(s) + if err != nil { + return `""` + } + return string(b) +} + +func repoFullName(repo ports.SCMRepo) string { + if repo.Repo != "" { + return repo.Repo + } + return repo.Owner + "/" + repo.Name +} + +func normalizePRState(draft, merged, closed bool) string { + switch { + case merged: + return string(domain.PRStateMerged) + case closed: + return string(domain.PRStateClosed) + case draft: + return string(domain.PRStateDraft) + default: + return string(domain.PRStateOpen) + } +} + +func parseGitHubTime(s string) time.Time { + if strings.TrimSpace(s) == "" { + return time.Time{} + } + if t, err := time.Parse(time.RFC3339, s); err == nil { + return t + } + return time.Time{} +} + +func authorLogin(v any) string { + author, _ := v.(map[string]any) + return str(author["login"]) +} + +func mergeCommitOID(pr map[string]any) string { + mc, _ := pr["mergeCommit"].(map[string]any) + return str(mc["oid"]) +} + +func latestCommitOID(pr map[string]any) string { + commits, _ := pr["commits"].(map[string]any) + for _, n := range nodes(commits["nodes"]) { + commit, _ := n["commit"].(map[string]any) + if oid := str(commit["oid"]); oid != "" { + return oid + } + } + return "" +} diff --git a/backend/internal/daemon/daemon.go b/backend/internal/daemon/daemon.go index 3b75e4fb..e514addf 100644 --- a/backend/internal/daemon/daemon.go +++ b/backend/internal/daemon/daemon.go @@ -70,6 +70,7 @@ func Run() error { // lifecycle write path live (reducer write -> store -> DB trigger -> // change_log -> poller -> broadcaster) and gives startSession the shared LCM. lcStack := startLifecycle(ctx, store, runtimeAdapter, log) + lcStack.scmDone = startSCMObserver(ctx, store, lcStack.LCM, log) // Wire the controller-facing session service over the same store + LCM, the // zellij runtime, a gitworktree workspace, and the per-session agent resolver diff --git a/backend/internal/daemon/lifecycle_wiring.go b/backend/internal/daemon/lifecycle_wiring.go index 34685670..80bbcb20 100644 --- a/backend/internal/daemon/lifecycle_wiring.go +++ b/backend/internal/daemon/lifecycle_wiring.go @@ -28,6 +28,7 @@ type lifecycleStack struct { // standing up a second store+LCM pair that would diverge under writes. LCM *lifecycle.Manager reaperDone <-chan struct{} + scmDone <-chan struct{} } // startLifecycle constructs the Lifecycle Manager over the store and starts the @@ -40,7 +41,12 @@ func startLifecycle(ctx context.Context, store *sqlite.Store, runtime ports.Runt // Stop waits for the reaper goroutine to exit. The caller must cancel the ctx // passed to startLifecycle before calling Stop. -func (l *lifecycleStack) Stop() { <-l.reaperDone } +func (l *lifecycleStack) Stop() { + <-l.reaperDone + if l.scmDone != nil { + <-l.scmDone + } +} // noopMessenger is a stub ports.AgentMessenger: durable writes and notifications // work without it; only live agent nudges are absent until the runtime/agent diff --git a/backend/internal/daemon/scm_wiring.go b/backend/internal/daemon/scm_wiring.go new file mode 100644 index 00000000..f0faead0 --- /dev/null +++ b/backend/internal/daemon/scm_wiring.go @@ -0,0 +1,40 @@ +package daemon + +import ( + "context" + "errors" + "log/slog" + + scmgithub "github.com/aoagents/agent-orchestrator/backend/internal/adapters/scm/github" + "github.com/aoagents/agent-orchestrator/backend/internal/lifecycle" + scmobserve "github.com/aoagents/agent-orchestrator/backend/internal/observe/scm" + "github.com/aoagents/agent-orchestrator/backend/internal/storage/sqlite" +) + +// startSCMObserver wires the provider-neutral SCM observer with the GitHub +// provider used by v1. Missing credentials do not fail daemon startup; the +// observer is skipped and a warning is logged so local/offline development keeps +// working. +func startSCMObserver(ctx context.Context, store *sqlite.Store, lcm *lifecycle.Manager, logger *slog.Logger) <-chan struct{} { + tokens := scmgithub.FallbackTokenSource{ + scmgithub.EnvTokenSource{EnvVars: []string{"AO_GITHUB_TOKEN"}}, + &scmgithub.GHTokenSource{}, + } + provider, err := scmgithub.NewProvider(scmgithub.ProviderOptions{Token: tokens}) + if err != nil { + if errors.Is(err, scmgithub.ErrNoToken) || errors.Is(err, scmgithub.ErrAuthFailed) { + logger.Warn("scm observer disabled: no usable GitHub token", "err", err) + } else { + logger.Warn("scm observer disabled: GitHub provider setup failed", "err", err) + } + return closedDone() + } + observer := scmobserve.New(provider, store, lcm, scmobserve.Config{Logger: logger}) + return observer.Start(ctx) +} + +func closedDone() <-chan struct{} { + done := make(chan struct{}) + close(done) + return done +} diff --git a/backend/internal/domain/pr.go b/backend/internal/domain/pr.go index 8d1c2451..6195dc3c 100644 --- a/backend/internal/domain/pr.go +++ b/backend/internal/domain/pr.go @@ -32,6 +32,39 @@ type PullRequest struct { Review ReviewDecision Mergeability Mergeability UpdatedAt time.Time + + Provider string + Host string + Repo string + + SourceBranch string + TargetBranch string + HeadSHA string + Title string + Additions int + Deletions int + ChangedFiles int + Author string + BaseSHA string + MergeCommitSHA string + + ProviderState string + ProviderMergeable string + ProviderMergeStateStatus string + HTMLURL string + + CreatedAtProvider time.Time + UpdatedAtProvider time.Time + MergedAtProvider time.Time + ClosedAtProvider time.Time + + MetadataHash string + CIHash string + ReviewHash string + + ObservedAt time.Time + CIObservedAt time.Time + ReviewObservedAt time.Time } // PullRequestCheck is one normalized CI check run for a pull request. @@ -39,22 +72,38 @@ type PullRequestCheck struct { Name string CommitHash string Status PRCheckStatus + Conclusion string URL string + Details string LogTail string CreatedAt time.Time } // PullRequestComment is one normalized review comment for a pull request. type PullRequestComment struct { + ThreadID string ID string Author string File string Line int Body string + URL string Resolved bool + IsBot bool CreatedAt time.Time } +// PullRequestReviewThread is one normalized review thread for a pull request. +type PullRequestReviewThread struct { + ThreadID string + Path string + Line int + Resolved bool + IsBot bool + SemanticHash string + UpdatedAt time.Time +} + // CIState is the aggregate CI status of a PR. type CIState string diff --git a/backend/internal/lifecycle/manager_test.go b/backend/internal/lifecycle/manager_test.go index 3a075085..cb9f17e3 100644 --- a/backend/internal/lifecycle/manager_test.go +++ b/backend/internal/lifecycle/manager_test.go @@ -142,6 +142,28 @@ func TestPRObservation_ReviewCommentsNudgeAgent(t *testing.T) { } } +func TestSCMObservationProjectsToExistingPRReactions(t *testing.T) { + m, st, msg := newManager() + st.sessions["mer-1"] = working("mer-1") + o := ports.SCMObservation{ + Fetched: true, + PR: ports.SCMPRObservation{URL: "pr1", Number: 1}, + CI: ports.SCMCIObservation{ + Summary: string(domain.CIFailing), + HeadSHA: "c1", + FailedChecks: []ports.SCMCheckObservation{{ + Name: "build", Status: string(domain.PRCheckFailed), LogTail: "boom", + }}, + }, + } + if err := m.ApplySCMObservation(ctx, "mer-1", o); err != nil { + t.Fatal(err) + } + if len(msg.msgs) != 1 || !strings.Contains(msg.msgs[0], "boom") { + t.Fatalf("want SCM CI nudge with log tail, got %v", msg.msgs) + } +} + func TestPRObservation_MergeConflictNudgesAgent(t *testing.T) { m, st, msg := newManager() st.sessions["mer-1"] = working("mer-1") diff --git a/backend/internal/lifecycle/reactions.go b/backend/internal/lifecycle/reactions.go index 024bda21..ebf737b5 100644 --- a/backend/internal/lifecycle/reactions.go +++ b/backend/internal/lifecycle/reactions.go @@ -70,6 +70,83 @@ func (m *Manager) ApplyPRObservation(ctx context.Context, id domain.SessionID, o return nil } +// ApplySCMObservation is the provider-neutral lifecycle entrypoint used by the +// SCM observer. The existing reaction logic still operates on PRObservation, so +// lifecycle performs the compatibility projection internally instead of leaking +// the old PR DTO back into the observer/provider boundary. +func (m *Manager) ApplySCMObservation(ctx context.Context, id domain.SessionID, o ports.SCMObservation) error { + if !o.Fetched { + return nil + } + return m.ApplyPRObservation(ctx, id, scmToPRObservation(o)) +} + +func scmToPRObservation(o ports.SCMObservation) ports.PRObservation { + pr := ports.PRObservation{ + Fetched: o.Fetched, + URL: firstSCMNonEmpty(o.PR.URL, o.PR.HTMLURL), + Number: o.PR.Number, + Draft: o.PR.Draft, + Merged: o.PR.Merged, + Closed: o.PR.Closed, + CI: domain.CIState(o.CI.Summary), + Review: domain.ReviewDecision(o.Review.Decision), + Mergeability: domain.Mergeability(o.Mergeability.State), + } + if pr.CI == "" { + pr.CI = domain.CIUnknown + } + if pr.Review == "" { + pr.Review = domain.ReviewNone + } + if pr.Mergeability == "" { + pr.Mergeability = domain.MergeUnknown + } + for _, ch := range o.CI.FailedChecks { + status := domain.PRCheckStatus(ch.Status) + if status == "" { + status = domain.PRCheckFailed + } + logTail := ch.LogTail + if logTail == "" { + logTail = o.CI.FailureLogTail + } + pr.Checks = append(pr.Checks, ports.PRCheckObservation{ + Name: ch.Name, + CommitHash: o.CI.HeadSHA, + Status: status, + URL: ch.URL, + LogTail: logTail, + }) + } + for _, th := range o.Review.Threads { + if th.Resolved || th.IsBot { + continue + } + for _, c := range th.Comments { + if c.IsBot { + continue + } + pr.Comments = append(pr.Comments, ports.PRCommentObservation{ + ID: c.ID, + Author: c.Author, + File: th.Path, + Line: th.Line, + Body: c.Body, + Resolved: th.Resolved, + }) + } + } + return pr +} + +func firstSCMNonEmpty(a, b string) string { + if strings.TrimSpace(a) != "" { + return a + } + return b +} + func hasUnresolvedComments(comments []ports.PRCommentObservation) bool { for _, c := range comments { if !c.Resolved { @@ -80,8 +157,8 @@ func hasUnresolvedComments(comments []ports.PRCommentObservation) bool { } func reviewContent(comments []ports.PRCommentObservation) (string, string) { - var bodies []string - var ids []string + bodies := make([]string, 0, len(comments)) + ids := make([]string, 0, len(comments)) for _, c := range comments { if c.Resolved { continue diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go new file mode 100644 index 00000000..3d3d6b3c --- /dev/null +++ b/backend/internal/observe/scm/observer.go @@ -0,0 +1,770 @@ +// Package scm implements the provider-neutral SCM polling observer. It owns the +// polling loop, ETag/cache checks, semantic diffing, DB persistence, and +// lifecycle notification; provider adapters only normalize provider-specific +// APIs into ports.SCMObservation values. +package scm + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "log/slog" + "strings" + "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +const ( + // DefaultTickInterval is the SCM observer's PR/CI polling cadence. + DefaultTickInterval = 30 * time.Second + // DefaultReviewInterval is the minimum interval between review-thread polls + // for a PR whose review state warrants thread refresh. + DefaultReviewInterval = 2 * time.Minute + // DefaultCacheMax bounds each in-memory ETag/review cache map. + DefaultCacheMax = 512 + // BatchSize is the maximum number of PRs in one provider batch fetch. + BatchSize = 25 +) + +// Provider is the normalized SCM provider contract used by the observer. +type Provider interface { + ParseRepository(remote string) (ports.SCMRepo, bool) + RepoPRListGuard(ctx context.Context, repo ports.SCMRepo, etag string) (ports.SCMGuardResult, error) + DetectPRByBranch(ctx context.Context, repo ports.SCMRepo, branch string) (ports.SCMPRObservation, error) + CommitChecksGuard(ctx context.Context, repo ports.SCMRepo, headSHA, etag string) (ports.SCMGuardResult, error) + FetchPullRequests(ctx context.Context, refs []ports.SCMPRRef) ([]ports.SCMObservation, error) + FetchFailedCheckLogTail(ctx context.Context, repo ports.SCMRepo, check ports.SCMCheckObservation) (string, error) + FetchReviewThreads(ctx context.Context, ref ports.SCMPRRef) (ports.SCMReviewObservation, error) +} + +// Store is the persistence contract the observer needs for discovery, local +// hash reads, and transactional SCM writes. +type Store interface { + ListAllSessions(ctx context.Context) ([]domain.SessionRecord, error) + GetProject(ctx context.Context, id string) (domain.ProjectRecord, bool, error) + ListPRsBySession(ctx context.Context, sessionID domain.SessionID) ([]domain.PullRequest, error) + ListChecks(ctx context.Context, prURL string) ([]domain.PullRequestCheck, error) + WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceReview bool) error +} + +// Lifecycle is the provider-neutral lifecycle notification sink. +type Lifecycle interface { + ApplySCMObservation(ctx context.Context, sessionID domain.SessionID, obs ports.SCMObservation) error +} + +// Config holds optional observer knobs. Zero values use production defaults. +type Config struct { + Tick time.Duration + ReviewInterval time.Duration + Clock func() time.Time + Logger *slog.Logger + CacheMax int +} + +// ObserverCache stores provider ETags and review polling timestamps in memory. +// It is intentionally non-persistent for v1; cold restarts simply revalidate. +type ObserverCache struct { + RepoPRListETag map[string]string + CommitChecksETag map[string]string + ReviewETag map[string]string + LastReviewPollAt map[string]time.Time + repoOrder []string + commitOrder []string + lastReviewPollOrder []string + max int +} + +func newCache(maxEntries int) ObserverCache { + if maxEntries <= 0 { + maxEntries = DefaultCacheMax + } + return ObserverCache{ + RepoPRListETag: map[string]string{}, + CommitChecksETag: map[string]string{}, + ReviewETag: map[string]string{}, + LastReviewPollAt: map[string]time.Time{}, + max: maxEntries, + } +} + +// Observer coordinates provider polling, semantic diffing, persistence, and +// lifecycle notifications for SCM observations. +type Observer struct { + provider Provider + store Store + lifecycle Lifecycle + tick time.Duration + reviewInterval time.Duration + clock func() time.Time + logger *slog.Logger + Cache ObserverCache +} + +// New constructs an Observer with default cadence/cache settings for zero +// values in cfg. +func New(provider Provider, store Store, lifecycle Lifecycle, cfg Config) *Observer { + o := &Observer{provider: provider, store: store, lifecycle: lifecycle, tick: cfg.Tick, reviewInterval: cfg.ReviewInterval, clock: cfg.Clock, logger: cfg.Logger, Cache: newCache(cfg.CacheMax)} + if o.tick <= 0 { + o.tick = DefaultTickInterval + } + if o.reviewInterval <= 0 { + o.reviewInterval = DefaultReviewInterval + } + if o.clock == nil { + o.clock = time.Now + } + if o.logger == nil { + o.logger = slog.Default() + } + return o +} + +// Start launches the observer loop. The first Poll runs immediately inside the +// goroutine so daemon startup is not blocked; subsequent polls run on the tick. +func (o *Observer) Start(ctx context.Context) <-chan struct{} { + done := make(chan struct{}) + go o.loop(ctx, done) + return done +} + +func (o *Observer) loop(ctx context.Context, done chan<- struct{}) { + defer close(done) + if err := o.Poll(ctx); err != nil && !errors.Is(err, context.Canceled) { + o.logger.Error("scm observer: initial poll failed", "err", err) + } + t := time.NewTicker(o.tick) + defer t.Stop() + for { + select { + case <-ctx.Done(): + return + case <-t.C: + if err := o.Poll(ctx); err != nil && !errors.Is(err, context.Canceled) { + o.logger.Error("scm observer: poll failed", "err", err) + } + } + } +} + +type subject struct { + session domain.SessionRecord + project domain.ProjectRecord + repo ports.SCMRepo + branch string + known domain.PullRequest + hasPR bool + newlyDetected bool +} + +type repoGuardState struct { + result ports.SCMGuardResult + hadETag bool + err error +} + +// Poll runs one synchronous SCM observation cycle. +func (o *Observer) Poll(ctx context.Context) error { + now := o.clock().UTC() + if err := ctx.Err(); err != nil { + return err + } + subjects, err := o.discoverSubjects(ctx) + if err != nil { + return err + } + if len(subjects) == 0 { + return nil + } + + repoGuards := o.guardRepos(ctx, subjects) + if err := ctx.Err(); err != nil { + return err + } + o.detectMissingPRs(ctx, subjects, repoGuards, now) + if err := ctx.Err(); err != nil { + return err + } + + refs, subjectsByPR := o.selectRefreshCandidates(ctx, subjects, repoGuards) + observations := map[string]ports.SCMObservation{} + for _, chunk := range chunks(refs, BatchSize) { + if err := ctx.Err(); err != nil { + return err + } + batch, err := o.provider.FetchPullRequests(ctx, chunk) + if err != nil { + o.logger.Error("scm observer: GraphQL PR batch failed", "err", err) + continue + } + for _, obs := range batch { + obs.ObservedAt = now + key := prKeyFromObs(obs) + if key == "" { + continue + } + observations[key] = obs + } + } + + for key, subj := range subjectsByPR { + if err := ctx.Err(); err != nil { + return err + } + obs, ok := observations[key] + if !ok { + continue + } + local := subj.known + o.enrichFailureLogs(ctx, &obs, local) + observations[key] = obs + } + + reviewRefreshed := map[string]bool{} + o.refreshReviews(ctx, subjects, observations, subjectsByPR, reviewRefreshed, now) + if err := ctx.Err(); err != nil { + return err + } + + for key, obs := range observations { + if err := ctx.Err(); err != nil { + return err + } + subj, ok := subjectsByPR[key] + if !ok { + continue + } + local := subj.known + prepared := o.prepareForPersistence(obs, local, reviewRefreshed[key], now) + if !prepared.Changed.Metadata && !prepared.Changed.CI && !prepared.Changed.Review { + continue + } + pr, checks, threads, comments := domainFromObservation(subj.session.ID, prepared, local, reviewRefreshed[key], now) + if err := o.store.WriteSCMObservation(ctx, pr, checks, threads, comments, reviewRefreshed[key]); err != nil { + o.logger.Error("scm observer: DB write failed", "session", subj.session.ID, "pr", pr.URL, "err", err) + continue + } + if o.lifecycle != nil { + if err := o.lifecycle.ApplySCMObservation(ctx, subj.session.ID, prepared); err != nil { + o.logger.Error("scm observer: lifecycle notification failed", "session", subj.session.ID, "pr", pr.URL, "err", err) + } + } + } + return nil +} + +func (o *Observer) discoverSubjects(ctx context.Context) (map[string]*subject, error) { + sessions, err := o.store.ListAllSessions(ctx) + if err != nil { + return nil, err + } + projects := map[domain.ProjectID]domain.ProjectRecord{} + out := map[string]*subject{} + for _, sess := range sessions { + if sess.IsTerminated { + continue + } + branch := strings.TrimSpace(sess.Metadata.Branch) + if branch == "" { + continue + } + proj, ok := projects[sess.ProjectID] + if !ok { + p, found, err := o.store.GetProject(ctx, string(sess.ProjectID)) + if err != nil { + return nil, err + } + if !found || !p.ArchivedAt.IsZero() { + continue + } + projects[sess.ProjectID] = p + proj = p + } + repo, ok := o.provider.ParseRepository(proj.RepoOriginURL) + if !ok { + o.logger.Debug("scm observer: project has no supported SCM origin", "project", proj.ID, "origin", proj.RepoOriginURL) + continue + } + prs, err := o.store.ListPRsBySession(ctx, sess.ID) + if err != nil { + return nil, err + } + known, hasPR := chooseKnownPR(prs) + s := &subject{session: sess, project: proj, repo: repo, branch: branch, known: known, hasPR: hasPR} + if hasPR && known.Number > 0 { + out[prKey(repo, known.Number)] = s + } else { + out["session:"+string(sess.ID)] = s + } + } + return out, nil +} + +func chooseKnownPR(prs []domain.PullRequest) (domain.PullRequest, bool) { + if len(prs) == 0 { + return domain.PullRequest{}, false + } + for _, pr := range prs { + if pr.Number > 0 && !pr.Merged && !pr.Closed { + return pr, true + } + } + for _, pr := range prs { + if pr.Number > 0 { + return pr, true + } + } + return domain.PullRequest{}, false +} + +func (o *Observer) guardRepos(ctx context.Context, subjects map[string]*subject) map[string]repoGuardState { + repos := map[string]ports.SCMRepo{} + for _, s := range subjects { + repos[prKey(s.repo, 0)] = s.repo + } + out := map[string]repoGuardState{} + for key, repo := range repos { + prev, had := o.Cache.RepoPRListETag[key] + res, err := o.provider.RepoPRListGuard(ctx, repo, prev) + if err != nil { + o.logger.Error("scm observer: repo PR-list guard failed", "repo", repoFullName(repo), "err", err) + out[key] = repoGuardState{hadETag: had, err: err} + continue + } + if res.ETag != "" { + o.cacheSetString(o.Cache.RepoPRListETag, &o.Cache.repoOrder, key, res.ETag) + } + out[key] = repoGuardState{result: res, hadETag: had} + } + return out +} + +func (o *Observer) detectMissingPRs(ctx context.Context, subjects map[string]*subject, guards map[string]repoGuardState, now time.Time) { + for oldKey, s := range subjects { + if s.hasPR { + continue + } + g := guards[prKey(s.repo, 0)] + if g.err != nil { + continue + } + if g.result.NotModified && g.hadETag { + continue + } + pr, err := o.provider.DetectPRByBranch(ctx, s.repo, s.branch) + if err != nil { + o.logger.Debug("scm observer: no PR detected for branch", "session", s.session.ID, "branch", s.branch, "err", err) + continue + } + if pr.Number <= 0 { + continue + } + s.known = domain.PullRequest{URL: pr.URL, SessionID: s.session.ID, Number: pr.Number, SourceBranch: pr.SourceBranch, TargetBranch: pr.TargetBranch, HeadSHA: pr.HeadSHA, Provider: s.repo.Provider, Host: s.repo.Host, Repo: repoFullName(s.repo), UpdatedAt: now} + s.hasPR = true + s.newlyDetected = true + delete(subjects, oldKey) + subjects[prKey(s.repo, pr.Number)] = s + } +} + +func (o *Observer) selectRefreshCandidates(ctx context.Context, subjects map[string]*subject, guards map[string]repoGuardState) ([]ports.SCMPRRef, map[string]*subject) { + var refs []ports.SCMPRRef + subjectsByPR := map[string]*subject{} + seen := map[string]bool{} + for _, s := range subjects { + if !s.hasPR || s.known.Number <= 0 { + continue + } + key := prKey(s.repo, s.known.Number) + subjectsByPR[key] = s + candidate := s.newlyDetected || missingLocalState(s.known) + g := guards[prKey(s.repo, 0)] + if g.err == nil && !g.result.NotModified { + candidate = true + } + if s.known.HeadSHA != "" { + commitKey := commitKey(s.repo, s.known.HeadSHA) + prev := o.Cache.CommitChecksETag[commitKey] + res, err := o.provider.CommitChecksGuard(ctx, s.repo, s.known.HeadSHA, prev) + if err != nil { + o.logger.Error("scm observer: commit check-runs guard failed", "pr", s.known.URL, "sha", s.known.HeadSHA, "err", err) + } else { + if res.ETag != "" { + o.cacheSetString(o.Cache.CommitChecksETag, &o.Cache.commitOrder, commitKey, res.ETag) + } + if !res.NotModified { + candidate = true + } + } + } + if candidate && !seen[key] { + refs = append(refs, ports.SCMPRRef{Repo: s.repo, Number: s.known.Number, URL: s.known.URL}) + seen[key] = true + } + } + return refs, subjectsByPR +} + +func missingLocalState(pr domain.PullRequest) bool { + return pr.URL == "" || pr.HeadSHA == "" || pr.MetadataHash == "" || pr.CIHash == "" +} + +func (o *Observer) enrichFailureLogs(ctx context.Context, obs *ports.SCMObservation, local domain.PullRequest) { + if obs.CI.Summary != string(domain.CIFailing) || obs.CI.FailedFingerprint == "" { + return + } + if strings.HasPrefix(local.CIHash, obs.CI.FailedFingerprint+":") { + checks, err := o.store.ListChecks(ctx, local.URL) + if err == nil && applyStoredFailedLogTails(obs, checks) { + return + } + } + tails := make([]string, 0, len(obs.CI.FailedChecks)) + for i := range obs.CI.FailedChecks { + tail, err := o.provider.FetchFailedCheckLogTail(ctx, ports.SCMRepo{Provider: obs.Provider, Host: obs.Host, Repo: obs.Repo, Owner: ownerOf(obs.Repo), Name: nameOf(obs.Repo)}, obs.CI.FailedChecks[i]) + if err != nil { + tail = "" + } + obs.CI.FailedChecks[i].LogTail = tail + if tail != "" { + tails = append(tails, tail) + } + for j := range obs.CI.Checks { + if obs.CI.Checks[j].Name == obs.CI.FailedChecks[i].Name && obs.CI.Checks[j].ProviderID == obs.CI.FailedChecks[i].ProviderID { + obs.CI.Checks[j].LogTail = tail + } + } + } + obs.CI.FailureLogTail = strings.Join(tails, "\n---\n") +} + +func applyStoredFailedLogTails(obs *ports.SCMObservation, checks []domain.PullRequestCheck) bool { + tailsByName := map[string]string{} + for _, ch := range checks { + if ch.LogTail != "" && (ch.Status == domain.PRCheckFailed || ch.Status == domain.PRCheckCancelled) { + tailsByName[ch.Name] = ch.LogTail + } + } + if len(tailsByName) == 0 { + return false + } + tails := make([]string, 0, len(obs.CI.FailedChecks)) + for i := range obs.CI.FailedChecks { + tail := tailsByName[obs.CI.FailedChecks[i].Name] + if tail == "" { + return false + } + obs.CI.FailedChecks[i].LogTail = tail + tails = append(tails, tail) + } + for i := range obs.CI.Checks { + if tail := tailsByName[obs.CI.Checks[i].Name]; tail != "" { + obs.CI.Checks[i].LogTail = tail + } + } + obs.CI.FailureLogTail = strings.Join(tails, "\n---\n") + return true +} + +func (o *Observer) refreshReviews(ctx context.Context, subjects map[string]*subject, observations map[string]ports.SCMObservation, subjectsByPR map[string]*subject, reviewRefreshed map[string]bool, now time.Time) { + for _, s := range subjects { + if !s.hasPR || s.known.Number <= 0 { + continue + } + pkey := prKey(s.repo, s.known.Number) + obs, hasObs := observations[pkey] + decision := string(s.known.Review) + if hasObs && obs.Review.Decision != "" { + decision = obs.Review.Decision + } + if !o.needsReviewRefresh(pkey, s.known, decision, hasObs, now) { + continue + } + review, err := o.provider.FetchReviewThreads(ctx, ports.SCMPRRef{Repo: s.repo, Number: s.known.Number, URL: s.known.URL}) + if err != nil { + o.logger.Error("scm observer: review refresh failed", "pr", s.known.URL, "err", err) + continue + } + if !hasObs { + obs = observationFromLocal(s.repo, s.known) + } + if review.Decision != "" { + obs.Review.Decision = review.Decision + } + obs.Review.Threads = review.Threads + obs.ObservedAt = now + observations[pkey] = obs + subjectsByPR[pkey] = s + reviewRefreshed[pkey] = true + o.cacheSetTime(o.Cache.LastReviewPollAt, &o.Cache.lastReviewPollOrder, pkey, now) + } +} + +func (o *Observer) needsReviewRefresh(key string, local domain.PullRequest, decision string, hasObs bool, now time.Time) bool { + if local.ReviewHash == "" { + return true + } + if decision == string(domain.ReviewChangesRequest) { + last := o.Cache.LastReviewPollAt[key] + return last.IsZero() || now.Sub(last) >= o.reviewInterval + } + if hasObs && decision != string(local.Review) { + return true + } + if local.ReviewHash != "" && string(local.Review) == string(domain.ReviewChangesRequest) && decision != string(domain.ReviewChangesRequest) { + return true + } + return false +} + +func (o *Observer) prepareForPersistence(obs ports.SCMObservation, local domain.PullRequest, reviewFetched bool, now time.Time) ports.SCMObservation { + metadataHash := metadataSemanticHash(obs) + ciHash := ciSemanticHash(obs.CI) + reviewHash := local.ReviewHash + if reviewFetched || local.ReviewHash == "" || obs.Review.Decision != string(local.Review) { + reviewHash = reviewSemanticHash(obs.Review) + } + obs.Changed = ports.SCMChanged{ + Metadata: metadataHash != local.MetadataHash, + CI: ciHash != local.CIHash, + Review: reviewHash != local.ReviewHash, + } + obs.PR.State = firstNonEmpty(obs.PR.State, normalizePRState(obs.PR.Draft, obs.PR.Merged, obs.PR.Closed)) + obs.ObservedAt = firstTime(obs.ObservedAt, now) + return obs +} + +func domainFromObservation(sessionID domain.SessionID, obs ports.SCMObservation, local domain.PullRequest, reviewFetched bool, now time.Time) (domain.PullRequest, []domain.PullRequestCheck, []domain.PullRequestReviewThread, []domain.PullRequestComment) { + metadataHash := metadataSemanticHash(obs) + ciHash := ciSemanticHash(obs.CI) + reviewHash := reviewSemanticHash(obs.Review) + reviewDecision := domain.ReviewDecision(firstNonEmpty(obs.Review.Decision, string(domain.ReviewNone))) + if !reviewFetched && local.ReviewHash != "" && reviewDecision == local.Review { + reviewHash = local.ReviewHash + } + reviewObservedAt := local.ReviewObservedAt + if reviewFetched || reviewObservedAt.IsZero() { + reviewObservedAt = obs.ObservedAt + } + pr := domain.PullRequest{ + URL: firstNonEmpty(obs.PR.URL, obs.PR.HTMLURL), + SessionID: sessionID, + Number: obs.PR.Number, + Draft: obs.PR.Draft, + Merged: obs.PR.Merged, + Closed: obs.PR.Closed, + CI: domain.CIState(firstNonEmpty(obs.CI.Summary, string(domain.CIUnknown))), + Review: reviewDecision, + Mergeability: domain.Mergeability(firstNonEmpty(obs.Mergeability.State, string(domain.MergeUnknown))), + UpdatedAt: now, + Provider: obs.Provider, + Host: obs.Host, + Repo: obs.Repo, + SourceBranch: obs.PR.SourceBranch, + TargetBranch: obs.PR.TargetBranch, + HeadSHA: obs.PR.HeadSHA, + Title: obs.PR.Title, + Additions: obs.PR.Additions, + Deletions: obs.PR.Deletions, + ChangedFiles: obs.PR.ChangedFiles, + Author: obs.PR.Author, + BaseSHA: obs.PR.BaseSHA, + MergeCommitSHA: obs.PR.MergeCommitSHA, + ProviderState: obs.PR.ProviderState, + ProviderMergeable: obs.PR.ProviderMergeable, + ProviderMergeStateStatus: obs.PR.ProviderMergeStateStatus, + HTMLURL: obs.PR.HTMLURL, + CreatedAtProvider: obs.PR.CreatedAtProvider, + UpdatedAtProvider: obs.PR.UpdatedAtProvider, + MergedAtProvider: obs.PR.MergedAtProvider, + ClosedAtProvider: obs.PR.ClosedAtProvider, + MetadataHash: metadataHash, + CIHash: ciHash, + ReviewHash: reviewHash, + ObservedAt: obs.ObservedAt, + CIObservedAt: obs.ObservedAt, + ReviewObservedAt: reviewObservedAt, + } + checks := make([]domain.PullRequestCheck, 0, len(obs.CI.Checks)) + for _, ch := range obs.CI.Checks { + checks = append(checks, domain.PullRequestCheck{Name: ch.Name, CommitHash: obs.CI.HeadSHA, Status: domain.PRCheckStatus(ch.Status), Conclusion: ch.Conclusion, URL: ch.URL, Details: ch.ProviderID, LogTail: ch.LogTail, CreatedAt: now}) + } + threads := make([]domain.PullRequestReviewThread, 0, len(obs.Review.Threads)) + commentCount := 0 + for _, th := range obs.Review.Threads { + commentCount += len(th.Comments) + } + comments := make([]domain.PullRequestComment, 0, commentCount) + for _, th := range obs.Review.Threads { + threads = append(threads, domain.PullRequestReviewThread{ThreadID: th.ID, Path: th.Path, Line: th.Line, Resolved: th.Resolved, IsBot: th.IsBot, SemanticHash: threadSemanticHash(th), UpdatedAt: now}) + for _, c := range th.Comments { + comments = append(comments, domain.PullRequestComment{ThreadID: th.ID, ID: c.ID, Author: c.Author, File: th.Path, Line: th.Line, Body: c.Body, URL: c.URL, Resolved: th.Resolved, IsBot: c.IsBot || th.IsBot, CreatedAt: now}) + } + } + return pr, checks, threads, comments +} + +func observationFromLocal(repo ports.SCMRepo, pr domain.PullRequest) ports.SCMObservation { + return ports.SCMObservation{ + Fetched: true, + Provider: firstNonEmpty(pr.Provider, repo.Provider), + Host: firstNonEmpty(pr.Host, repo.Host), + Repo: firstNonEmpty(pr.Repo, repoFullName(repo)), + PR: ports.SCMPRObservation{URL: pr.URL, Number: pr.Number, State: normalizePRState(pr.Draft, pr.Merged, pr.Closed), Draft: pr.Draft, Merged: pr.Merged, Closed: pr.Closed, SourceBranch: pr.SourceBranch, TargetBranch: pr.TargetBranch, HeadSHA: pr.HeadSHA, Title: pr.Title, Additions: pr.Additions, Deletions: pr.Deletions, ChangedFiles: pr.ChangedFiles, Author: pr.Author, BaseSHA: pr.BaseSHA, MergeCommitSHA: pr.MergeCommitSHA, ProviderState: pr.ProviderState, ProviderMergeable: pr.ProviderMergeable, ProviderMergeStateStatus: pr.ProviderMergeStateStatus, HTMLURL: pr.HTMLURL, CreatedAtProvider: pr.CreatedAtProvider, UpdatedAtProvider: pr.UpdatedAtProvider, MergedAtProvider: pr.MergedAtProvider, ClosedAtProvider: pr.ClosedAtProvider}, + CI: ports.SCMCIObservation{Summary: string(pr.CI), HeadSHA: pr.HeadSHA}, + Review: ports.SCMReviewObservation{Decision: string(pr.Review)}, + Mergeability: ports.SCMMergeabilityObservation{State: string(pr.Mergeability)}, + } +} + +func chunks[T any](in []T, n int) [][]T { + if n <= 0 || len(in) == 0 { + return nil + } + out := make([][]T, 0, (len(in)+n-1)/n) + for len(in) > 0 { + end := n + if len(in) < end { + end = len(in) + } + out = append(out, in[:end]) + in = in[end:] + } + return out +} + +func metadataSemanticHash(obs ports.SCMObservation) string { + return stableHash(map[string]any{"provider": obs.Provider, "host": obs.Host, "repo": obs.Repo, "pr": obs.PR, "mergeability": obs.Mergeability}) +} + +func ciSemanticHash(ci ports.SCMCIObservation) string { + h := stableHash(map[string]any{"summary": ci.Summary, "head": ci.HeadSHA, "checks": ci.Checks, "failed": ci.FailedChecks, "tail": ci.FailureLogTail}) + if ci.FailedFingerprint != "" { + return ci.FailedFingerprint + ":" + h + } + return h +} + +func reviewSemanticHash(review ports.SCMReviewObservation) string { + return stableHash(review) +} + +func threadSemanticHash(th ports.SCMReviewThreadObservation) string { + return stableHash(th) +} + +func stableHash(v any) string { + b, err := json.Marshal(v) + if err != nil { + b = []byte(fmt.Sprintf("%#v", v)) + } + sum := sha256.Sum256(b) + return hex.EncodeToString(sum[:]) +} + +func prKeyFromObs(obs ports.SCMObservation) string { + if obs.Repo == "" || obs.PR.Number <= 0 { + return "" + } + return obs.Provider + ":" + obs.Host + ":" + obs.Repo + "#" + fmt.Sprint(obs.PR.Number) +} + +func prKey(repo ports.SCMRepo, number int) string { + base := repo.Provider + ":" + repo.Host + ":" + repoFullName(repo) + if number <= 0 { + return base + } + return base + "#" + fmt.Sprint(number) +} + +func commitKey(repo ports.SCMRepo, sha string) string { return prKey(repo, 0) + "@" + sha } + +func repoFullName(repo ports.SCMRepo) string { + if repo.Repo != "" { + return repo.Repo + } + return repo.Owner + "/" + repo.Name +} + +func ownerOf(full string) string { + parts := strings.SplitN(full, "/", 2) + if len(parts) == 2 { + return parts[0] + } + return "" +} + +func nameOf(full string) string { + parts := strings.SplitN(full, "/", 2) + if len(parts) == 2 { + return parts[1] + } + return full +} + +func firstNonEmpty(vals ...string) string { + for _, v := range vals { + if strings.TrimSpace(v) != "" { + return v + } + } + return "" +} + +func firstTime(a, b time.Time) time.Time { + if !a.IsZero() { + return a + } + return b +} + +func normalizePRState(draft, merged, closed bool) string { + switch { + case merged: + return string(domain.PRStateMerged) + case closed: + return string(domain.PRStateClosed) + case draft: + return string(domain.PRStateDraft) + default: + return string(domain.PRStateOpen) + } +} + +func scrubLine(s string) string { + s = strings.ReplaceAll(s, "\n", " ") + s = strings.ReplaceAll(s, "\r", " ") + return strings.TrimSpace(s) +} + +func (o *Observer) cacheSetString(m map[string]string, order *[]string, key, value string) { + if _, ok := m[key]; !ok { + *order = append(*order, key) + } + m[key] = value + o.evictStrings(m, order) +} + +func (o *Observer) cacheSetTime(m map[string]time.Time, order *[]string, key string, value time.Time) { + if _, ok := m[key]; !ok { + *order = append(*order, key) + } + m[key] = value + for len(*order) > o.Cache.max { + evict := (*order)[0] + *order = (*order)[1:] + delete(m, evict) + } +} + +func (o *Observer) evictStrings(m map[string]string, order *[]string) { + for len(*order) > o.Cache.max { + evict := (*order)[0] + *order = (*order)[1:] + delete(m, evict) + } +} diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go new file mode 100644 index 00000000..db498beb --- /dev/null +++ b/backend/internal/observe/scm/observer_test.go @@ -0,0 +1,388 @@ +package scm + +import ( + "context" + "errors" + "fmt" + "io" + "log/slog" + "sync" + "testing" + "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var testRepo = ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "o", Name: "r", Repo: "o/r"} + +type fakeStore struct { + mu sync.Mutex + + sessions []domain.SessionRecord + projects map[string]domain.ProjectRecord + prs map[domain.SessionID][]domain.PullRequest + checks map[string][]domain.PullRequestCheck + + writes []fakeWrite + + listEntered chan struct{} + listRelease chan struct{} +} + +type fakeWrite struct { + pr domain.PullRequest + checks []domain.PullRequestCheck + threads []domain.PullRequestReviewThread + comments []domain.PullRequestComment + replaceReview bool +} + +func (s *fakeStore) ListAllSessions(ctx context.Context) ([]domain.SessionRecord, error) { + if s.listEntered != nil { + select { + case <-s.listEntered: + default: + close(s.listEntered) + } + } + if s.listRelease != nil { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-s.listRelease: + } + } + s.mu.Lock() + defer s.mu.Unlock() + return append([]domain.SessionRecord(nil), s.sessions...), nil +} + +func (s *fakeStore) GetProject(_ context.Context, id string) (domain.ProjectRecord, bool, error) { + s.mu.Lock() + defer s.mu.Unlock() + p, ok := s.projects[id] + return p, ok, nil +} + +func (s *fakeStore) ListPRsBySession(_ context.Context, id domain.SessionID) ([]domain.PullRequest, error) { + s.mu.Lock() + defer s.mu.Unlock() + return append([]domain.PullRequest(nil), s.prs[id]...), nil +} + +func (s *fakeStore) ListChecks(_ context.Context, prURL string) ([]domain.PullRequestCheck, error) { + s.mu.Lock() + defer s.mu.Unlock() + return append([]domain.PullRequestCheck(nil), s.checks[prURL]...), nil +} + +func (s *fakeStore) WriteSCMObservation(_ context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceReview bool) error { + s.mu.Lock() + defer s.mu.Unlock() + s.writes = append(s.writes, fakeWrite{pr: pr, checks: append([]domain.PullRequestCheck(nil), checks...), threads: append([]domain.PullRequestReviewThread(nil), threads...), comments: append([]domain.PullRequestComment(nil), comments...), replaceReview: replaceReview}) + return nil +} + +type fakeProvider struct { + mu sync.Mutex + repoGuards map[string]ports.SCMGuardResult + checkGuards map[string]ports.SCMGuardResult + detected map[string]ports.SCMPRObservation + observations map[string]ports.SCMObservation + reviews map[string]ports.SCMReviewObservation + logTails map[string]string + + detectCalls int + fetchBatches [][]ports.SCMPRRef + logCalls int + reviewCalls int +} + +func (p *fakeProvider) ParseRepository(remote string) (ports.SCMRepo, bool) { + return testRepo, remote != "" +} +func (p *fakeProvider) RepoPRListGuard(_ context.Context, repo ports.SCMRepo, _ string) (ports.SCMGuardResult, error) { + return p.repoGuards[prKey(repo, 0)], nil +} +func (p *fakeProvider) DetectPRByBranch(_ context.Context, _ ports.SCMRepo, branch string) (ports.SCMPRObservation, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.detectCalls++ + pr, ok := p.detected[branch] + if !ok { + return ports.SCMPRObservation{}, errors.New("not found") + } + return pr, nil +} +func (p *fakeProvider) CommitChecksGuard(_ context.Context, repo ports.SCMRepo, sha, _ string) (ports.SCMGuardResult, error) { + return p.checkGuards[commitKey(repo, sha)], nil +} +func (p *fakeProvider) FetchPullRequests(_ context.Context, refs []ports.SCMPRRef) ([]ports.SCMObservation, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.fetchBatches = append(p.fetchBatches, append([]ports.SCMPRRef(nil), refs...)) + out := make([]ports.SCMObservation, 0, len(refs)) + for _, ref := range refs { + if obs, ok := p.observations[prKey(ref.Repo, ref.Number)]; ok { + out = append(out, obs) + } + } + return out, nil +} +func (p *fakeProvider) FetchFailedCheckLogTail(_ context.Context, _ ports.SCMRepo, check ports.SCMCheckObservation) (string, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.logCalls++ + return p.logTails[check.Name], nil +} +func (p *fakeProvider) FetchReviewThreads(_ context.Context, ref ports.SCMPRRef) (ports.SCMReviewObservation, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.reviewCalls++ + return p.reviews[prKey(ref.Repo, ref.Number)], nil +} + +type fakeLifecycle struct{ observed []ports.SCMObservation } + +func (l *fakeLifecycle) ApplySCMObservation(_ context.Context, _ domain.SessionID, obs ports.SCMObservation) error { + l.observed = append(l.observed, obs) + return nil +} + +func newTestObserver(store *fakeStore, provider *fakeProvider, lc *fakeLifecycle, now time.Time) *Observer { + return New(provider, store, lc, Config{Clock: func() time.Time { return now }, Tick: time.Hour, Logger: quietSlog(), CacheMax: 128}) +} + +func quietSlog() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) } + +func testStoreWithSession() *fakeStore { + return &fakeStore{ + sessions: []domain.SessionRecord{{ID: "p-1", ProjectID: "p", Metadata: domain.SessionMetadata{Branch: "feat"}}}, + projects: map[string]domain.ProjectRecord{"p": {ID: "p", RepoOriginURL: "https://github.com/o/r.git"}}, + prs: map[domain.SessionID][]domain.PullRequest{}, + checks: map[string][]domain.PullRequestCheck{}, + } +} + +func testObs(num int) ports.SCMObservation { + return ports.SCMObservation{ + Fetched: true, Provider: "github", Host: "github.com", Repo: "o/r", + PR: ports.SCMPRObservation{URL: "https://github.com/o/r/pull/" + fmt.Sprint(num), Number: num, State: "open", SourceBranch: "feat", TargetBranch: "main", HeadSHA: "sha" + fmt.Sprint(num), Title: "PR"}, + CI: ports.SCMCIObservation{Summary: string(domain.CIPassing), HeadSHA: "sha" + fmt.Sprint(num), Checks: []ports.SCMCheckObservation{{Name: "build", Status: string(domain.PRCheckPassed), Conclusion: "success", URL: "ci"}}}, + Review: ports.SCMReviewObservation{Decision: string(domain.ReviewNone)}, + Mergeability: ports.SCMMergeabilityObservation{State: string(domain.MergeMergeable), Mergeable: true}, + } +} + +func knownPR(num int) domain.PullRequest { + obs := testObs(num) + pr, _, _, _ := domainFromObservation("p-1", obs, domain.PullRequest{}, false, time.Unix(1, 0).UTC()) + return pr +} + +func TestStartAsyncPerformsImmediatePollAndStopsOnCancel(t *testing.T) { + store := testStoreWithSession() + store.listEntered = make(chan struct{}) + store.listRelease = make(chan struct{}) + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{}, observations: map[string]ports.SCMObservation{}} + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + ctx, cancel := context.WithCancel(context.Background()) + done := obs.Start(ctx) + select { + case <-store.listEntered: + case <-time.After(time.Second): + t.Fatal("initial poll did not start asynchronously") + } + cancel() + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("observer did not exit after context cancellation") + } +} + +func TestPoll_RepoETag304SkipsDetectPR(t *testing.T) { + store := testStoreWithSession() + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "v1", NotModified: true}}, observations: map[string]ports.SCMObservation{}} + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "v1" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.detectCalls != 0 { + t.Fatalf("detectPR called on 304: %d", provider.detectCalls) + } +} + +func TestPoll_RepoETag200DetectsPRAndRefreshesSamePoll(t *testing.T) { + store := testStoreWithSession() + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "v2"}}, + detected: map[string]ports.SCMPRObservation{"feat": {URL: "https://github.com/o/r/pull/1", Number: 1, SourceBranch: "feat", TargetBranch: "main", HeadSHA: "sha1"}}, + observations: map[string]ports.SCMObservation{prKey(testRepo, 1): testObs(1)}, + } + lc := &fakeLifecycle{} + obs := newTestObserver(store, provider, lc, time.Unix(1, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.detectCalls != 1 { + t.Fatalf("detectPR calls = %d, want 1", provider.detectCalls) + } + if len(provider.fetchBatches) != 1 || len(provider.fetchBatches[0]) != 1 || provider.fetchBatches[0][0].Number != 1 { + t.Fatalf("new PR not refreshed in same poll: %#v", provider.fetchBatches) + } + if len(store.writes) != 1 || len(lc.observed) != 1 { + t.Fatalf("write/lifecycle missing: writes=%d lifecycle=%d", len(store.writes), len(lc.observed)) + } +} + +func TestPoll_CIETagChangeRefreshesWhenRepoUnchanged(t *testing.T) { + store := testStoreWithSession() + store.prs["p-1"] = []domain.PullRequest{knownPR(1)} + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, + checkGuards: map[string]ports.SCMGuardResult{commitKey(testRepo, "sha1"): {ETag: "ci2"}}, + observations: map[string]ports.SCMObservation{prKey(testRepo, 1): testObs(1)}, + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(2, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")] = "ci1" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(provider.fetchBatches) != 1 { + t.Fatalf("CI ETag 200 should trigger batch fetch, got %d", len(provider.fetchBatches)) + } +} + +func TestPoll_GraphQLBatchChunksAt25(t *testing.T) { + store := &fakeStore{projects: map[string]domain.ProjectRecord{"p": {ID: "p", RepoOriginURL: "https://github.com/o/r.git"}}, prs: map[domain.SessionID][]domain.PullRequest{}, checks: map[string][]domain.PullRequestCheck{}} + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo"}}, observations: map[string]ports.SCMObservation{}} + for i := 1; i <= 26; i++ { + id := domain.SessionID("p-" + fmt.Sprint(i)) + store.sessions = append(store.sessions, domain.SessionRecord{ID: id, ProjectID: "p", Metadata: domain.SessionMetadata{Branch: "b" + fmt.Sprint(i)}}) + pr := knownPR(i) + pr.SessionID = id + pr.MetadataHash = "" // force candidate + store.prs[id] = []domain.PullRequest{pr} + provider.observations[prKey(testRepo, i)] = testObs(i) + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(provider.fetchBatches) != 2 || len(provider.fetchBatches[0]) != 25 || len(provider.fetchBatches[1]) != 1 { + t.Fatalf("batch sizes = %#v", provider.fetchBatches) + } +} + +func TestPoll_FailingCIFetchesLogTailOnlyWhenFingerprintChanged(t *testing.T) { + failing := testObs(1) + failing.CI.Summary = string(domain.CIFailing) + failing.CI.Checks = []ports.SCMCheckObservation{{Name: "build", Status: string(domain.PRCheckFailed), Conclusion: "failure", ProviderID: "99"}} + failing.CI.FailedChecks = failing.CI.Checks + failing.CI.FailedFingerprint = "fp" + + store := testStoreWithSession() + local := knownPR(1) + local.CIHash = "old" + store.prs["p-1"] = []domain.PullRequest{local} + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo"}}, checkGuards: map[string]ports.SCMGuardResult{commitKey(testRepo, "sha1"): {ETag: "ci2"}}, observations: map[string]ports.SCMObservation{prKey(testRepo, 1): failing}, logTails: map[string]string{"build": "tail"}} + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.logCalls != 1 { + t.Fatalf("log calls = %d, want 1", provider.logCalls) + } + + provider.logCalls = 0 + store.writes = nil + withTail := failing + withTail.CI.Checks[0].LogTail = "tail" + withTail.CI.FailedChecks[0].LogTail = "tail" + withTail.CI.FailureLogTail = "tail" + local.CIHash = ciSemanticHash(withTail.CI) + store.prs["p-1"] = []domain.PullRequest{local} + store.checks[local.URL] = []domain.PullRequestCheck{{Name: "build", Status: domain.PRCheckFailed, LogTail: "tail"}} + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.logCalls != 0 { + t.Fatalf("unchanged fingerprint fetched logs again: %d", provider.logCalls) + } + if len(store.writes) != 0 { + t.Fatalf("unchanged failed fingerprint with stored log tail should not write, got %d writes", len(store.writes)) + } +} + +func TestPoll_ReviewPollingRespectsInterval(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + local.Review = domain.ReviewChangesRequest + local.ReviewHash = "old-review" + store.prs["p-1"] = []domain.PullRequest{local} + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, observations: map[string]ports.SCMObservation{}, reviews: map[string]ports.SCMReviewObservation{prKey(testRepo, 1): {Decision: string(domain.ReviewChangesRequest), Threads: []ports.SCMReviewThreadObservation{{ID: "t1", Path: "f.go", Line: 1, Comments: []ports.SCMReviewCommentObservation{{ID: "c1", Body: "fix"}}}}}}} + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(120, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + obs.Cache.LastReviewPollAt[prKey(testRepo, 1)] = time.Unix(90, 0).UTC() + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.reviewCalls != 0 { + t.Fatalf("review fetched before interval: %d", provider.reviewCalls) + } + obs.Cache.LastReviewPollAt[prKey(testRepo, 1)] = time.Unix(0, 0).UTC() + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.reviewCalls != 1 { + t.Fatalf("review not fetched after interval: %d", provider.reviewCalls) + } + if len(store.writes) != 1 || !store.writes[0].replaceReview { + t.Fatalf("review refresh not persisted with replaceReview: %#v", store.writes) + } +} + +func TestPoll_UnchangedHashesDoNotWriteOrNotify(t *testing.T) { + store := testStoreWithSession() + obsValue := testObs(1) + local := knownPR(1) + local.MetadataHash = metadataSemanticHash(obsValue) + local.CIHash = ciSemanticHash(obsValue.CI) + local.ReviewHash = reviewSemanticHash(obsValue.Review) + store.prs["p-1"] = []domain.PullRequest{local} + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo"}}, observations: map[string]ports.SCMObservation{prKey(testRepo, 1): obsValue}} + lc := &fakeLifecycle{} + obs := newTestObserver(store, provider, lc, time.Unix(1, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(store.writes) != 0 || len(lc.observed) != 0 { + t.Fatalf("unchanged hashes wrote/notified: writes=%d observed=%d", len(store.writes), len(lc.observed)) + } +} + +func TestPoll_ReviewHashDrivesPersistenceAndLifecycle(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + local.ReviewHash = "old" + local.Review = domain.ReviewChangesRequest + store.prs["p-1"] = []domain.PullRequest{local} + review := ports.SCMReviewObservation{Decision: string(domain.ReviewChangesRequest), Threads: []ports.SCMReviewThreadObservation{{ID: "t1", Path: "f.go", Line: 2, Comments: []ports.SCMReviewCommentObservation{{ID: "c1", Author: "ann", Body: "fix this"}}}}} + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, observations: map[string]ports.SCMObservation{}, reviews: map[string]ports.SCMReviewObservation{prKey(testRepo, 1): review}} + lc := &fakeLifecycle{} + obs := newTestObserver(store, provider, lc, time.Unix(200, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(store.writes) != 1 || len(store.writes[0].comments) != 1 { + t.Fatalf("review change not persisted: %#v", store.writes) + } + if len(lc.observed) != 1 || !lc.observed[0].Changed.Review { + t.Fatalf("review change not notified: %#v", lc.observed) + } +} diff --git a/backend/internal/ports/outbound.go b/backend/internal/ports/outbound.go index 2614f5bb..414a0218 100644 --- a/backend/internal/ports/outbound.go +++ b/backend/internal/ports/outbound.go @@ -15,6 +15,13 @@ type PRWriter interface { WritePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, comments []domain.PullRequestComment) error } +// SCMWriter records provider-neutral SCM observations. Review facts are only +// replaced when replaceReview is true so metadata/CI-only refreshes preserve the +// last known review threads/comments. +type SCMWriter interface { + WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceReview bool) error +} + // AgentMessenger injects a message into a running agent. type AgentMessenger interface { Send(ctx context.Context, id domain.SessionID, message string) error diff --git a/backend/internal/ports/scm_observations.go b/backend/internal/ports/scm_observations.go new file mode 100644 index 00000000..07d9e549 --- /dev/null +++ b/backend/internal/ports/scm_observations.go @@ -0,0 +1,140 @@ +package ports + +import "time" + +// SCMRepo identifies a repository without assuming a provider-specific URL +// shape. Repo is conventionally "owner/name" for providers that expose an +// owner namespace, while Owner/Name are kept split for provider calls. +type SCMRepo struct { + Provider string + Host string + Owner string + Name string + Repo string +} + +// SCMPRRef identifies a pull request within a provider-neutral repository. +type SCMPRRef struct { + Repo SCMRepo + Number int + URL string +} + +// SCMGuardResult is an ETag-style cache guard result. NotModified maps to HTTP +// 304 for providers that support it. +type SCMGuardResult struct { + ETag string + NotModified bool +} + +// SCMObservation is the provider-neutral pull-request observation emitted by +// the SCM observer and consumed by lifecycle. Provider adapters normalize their +// SCM-specific payloads into this DTO before the observer persists/notifies. +type SCMObservation struct { + Fetched bool + ObservedAt time.Time + + Provider string + Host string + Repo string + + PR SCMPRObservation + CI SCMCIObservation + Review SCMReviewObservation + Mergeability SCMMergeabilityObservation + + Changed SCMChanged +} + +// SCMChanged marks which semantic state buckets changed in the successful poll. +type SCMChanged struct { + Metadata bool + CI bool + Review bool +} + +// SCMPRObservation carries provider-neutral PR metadata. +type SCMPRObservation struct { + URL string + Number int + State string + Draft bool + Merged bool + Closed bool + SourceBranch string + TargetBranch string + HeadSHA string + Title string + Additions int + Deletions int + ChangedFiles int + Author string + BaseSHA string + MergeCommitSHA string + + ProviderState string + ProviderMergeable string + ProviderMergeStateStatus string + HTMLURL string + + CreatedAtProvider time.Time + UpdatedAtProvider time.Time + MergedAtProvider time.Time + ClosedAtProvider time.Time +} + +// SCMCIObservation carries aggregate CI state plus failing-check details. +type SCMCIObservation struct { + Summary string + HeadSHA string + FailedFingerprint string + Checks []SCMCheckObservation + FailedChecks []SCMCheckObservation + FailureLogTail string +} + +// SCMCheckObservation is one normalized check/status context. ProviderID is an +// optional provider-owned identifier (for GitHub, Actions job/check-run id) used +// by the provider to fetch logs; consumers should not attach meaning to it. +type SCMCheckObservation struct { + Name string + Status string + Conclusion string + URL string + LogTail string + ProviderID string +} + +// SCMReviewObservation carries normalized review-decision and review-thread facts. +type SCMReviewObservation struct { + Decision string + Threads []SCMReviewThreadObservation +} + +// SCMReviewThreadObservation is a normalized review thread with comments. +type SCMReviewThreadObservation struct { + ID string + Path string + Line int + Resolved bool + IsBot bool + Comments []SCMReviewCommentObservation +} + +// SCMReviewCommentObservation is one normalized review comment. +type SCMReviewCommentObservation struct { + ID string + Author string + Body string + URL string + IsBot bool +} + +// SCMMergeabilityObservation is the normalized mergeability verdict. +type SCMMergeabilityObservation struct { + State string + Mergeable bool + Conflict bool + BehindBase bool + Blockers []string +} diff --git a/backend/internal/session_manager/manager.go b/backend/internal/session_manager/manager.go index d1cc1007..8648e7a4 100644 --- a/backend/internal/session_manager/manager.go +++ b/backend/internal/session_manager/manager.go @@ -263,7 +263,7 @@ func (m *Manager) Cleanup(ctx context.Context, project domain.ProjectID) ([]doma if err != nil { return nil, fmt.Errorf("cleanup %s: %w", project, err) } - var cleaned []domain.SessionID + cleaned := make([]domain.SessionID, 0, len(recs)) for _, rec := range recs { if !rec.IsTerminated { continue diff --git a/backend/internal/storage/sqlite/gen/models.go b/backend/internal/storage/sqlite/gen/models.go index e65add74..b76bd2b7 100644 --- a/backend/internal/storage/sqlite/gen/models.go +++ b/backend/internal/storage/sqlite/gen/models.go @@ -22,14 +22,44 @@ type ChangeLog struct { } type PR struct { - URL string - SessionID domain.SessionID - Number int64 - PRState domain.PRState - ReviewDecision domain.ReviewDecision - CIState domain.CIState - Mergeability domain.Mergeability - UpdatedAt time.Time + URL string + SessionID domain.SessionID + Number int64 + PRState domain.PRState + ReviewDecision domain.ReviewDecision + CIState domain.CIState + Mergeability domain.Mergeability + UpdatedAt time.Time + Provider string + Host string + Repo string + SourceBranch string + TargetBranch string + HeadSha string + Title string + Additions int64 + Deletions int64 + ChangedFiles int64 + Author string + BaseSha string + MergeCommitSha string + IsDraft int64 + IsMerged int64 + IsClosed int64 + ProviderState string + ProviderMergeable string + ProviderMergeStateStatus string + HtmlURL string + CreatedAtProvider sql.NullTime + UpdatedAtProvider sql.NullTime + MergedAtProvider sql.NullTime + ClosedAtProvider sql.NullTime + MetadataHash string + CIHash string + ReviewHash string + ObservedAt sql.NullTime + CIObservedAt sql.NullTime + ReviewObservedAt sql.NullTime } type PRCheck struct { @@ -40,6 +70,8 @@ type PRCheck struct { URL string LogTail string CreatedAt time.Time + Conclusion string + Details string } type PRComment struct { @@ -51,6 +83,20 @@ type PRComment struct { Body string Resolved bool CreatedAt time.Time + ThreadID string + URL string + IsBot int64 +} + +type PRReviewThread struct { + PRURL string + ThreadID string + Path string + Line int64 + Resolved int64 + IsBot int64 + SemanticHash string + UpdatedAt time.Time } type Project struct { diff --git a/backend/internal/storage/sqlite/gen/pr.sql.go b/backend/internal/storage/sqlite/gen/pr.sql.go index 154885cf..63b1dbfa 100644 --- a/backend/internal/storage/sqlite/gen/pr.sql.go +++ b/backend/internal/storage/sqlite/gen/pr.sql.go @@ -7,6 +7,7 @@ package gen import ( "context" + "database/sql" "time" "github.com/aoagents/agent-orchestrator/backend/internal/domain" @@ -25,6 +26,7 @@ SELECT FROM pr_comment WHERE pr_comment.pr_url = pr.url AND pr_comment.resolved = 0 + AND pr_comment.is_bot = 0 ) AS review_comments FROM pr WHERE pr.session_id = ? @@ -60,7 +62,14 @@ func (q *Queries) GetDisplayPRFactsBySession(ctx context.Context, sessionID doma } const getPR = `-- name: GetPR :one -SELECT url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at +SELECT + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + provider, host, repo, source_branch, target_branch, head_sha, title, + additions, deletions, changed_files, author, base_sha, merge_commit_sha, + is_draft, is_merged, is_closed, + provider_state, provider_mergeable, provider_merge_state_status, html_url, + created_at_provider, updated_at_provider, merged_at_provider, closed_at_provider, + metadata_hash, ci_hash, review_hash, observed_at, ci_observed_at, review_observed_at FROM pr WHERE url = ? ` @@ -77,12 +86,49 @@ func (q *Queries) GetPR(ctx context.Context, url string) (PR, error) { &i.CIState, &i.Mergeability, &i.UpdatedAt, + &i.Provider, + &i.Host, + &i.Repo, + &i.SourceBranch, + &i.TargetBranch, + &i.HeadSha, + &i.Title, + &i.Additions, + &i.Deletions, + &i.ChangedFiles, + &i.Author, + &i.BaseSha, + &i.MergeCommitSha, + &i.IsDraft, + &i.IsMerged, + &i.IsClosed, + &i.ProviderState, + &i.ProviderMergeable, + &i.ProviderMergeStateStatus, + &i.HtmlURL, + &i.CreatedAtProvider, + &i.UpdatedAtProvider, + &i.MergedAtProvider, + &i.ClosedAtProvider, + &i.MetadataHash, + &i.CIHash, + &i.ReviewHash, + &i.ObservedAt, + &i.CIObservedAt, + &i.ReviewObservedAt, ) return i, err } const listPRsBySession = `-- name: ListPRsBySession :many -SELECT url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at +SELECT + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + provider, host, repo, source_branch, target_branch, head_sha, title, + additions, deletions, changed_files, author, base_sha, merge_commit_sha, + is_draft, is_merged, is_closed, + provider_state, provider_mergeable, provider_merge_state_status, html_url, + created_at_provider, updated_at_provider, merged_at_provider, closed_at_provider, + metadata_hash, ci_hash, review_hash, observed_at, ci_observed_at, review_observed_at FROM pr WHERE session_id = ? ORDER BY updated_at DESC @@ -106,6 +152,36 @@ func (q *Queries) ListPRsBySession(ctx context.Context, sessionID domain.Session &i.CIState, &i.Mergeability, &i.UpdatedAt, + &i.Provider, + &i.Host, + &i.Repo, + &i.SourceBranch, + &i.TargetBranch, + &i.HeadSha, + &i.Title, + &i.Additions, + &i.Deletions, + &i.ChangedFiles, + &i.Author, + &i.BaseSha, + &i.MergeCommitSha, + &i.IsDraft, + &i.IsMerged, + &i.IsClosed, + &i.ProviderState, + &i.ProviderMergeable, + &i.ProviderMergeStateStatus, + &i.HtmlURL, + &i.CreatedAtProvider, + &i.UpdatedAtProvider, + &i.MergedAtProvider, + &i.ClosedAtProvider, + &i.MetadataHash, + &i.CIHash, + &i.ReviewHash, + &i.ObservedAt, + &i.CIObservedAt, + &i.ReviewObservedAt, ); err != nil { return nil, err } @@ -121,26 +197,94 @@ func (q *Queries) ListPRsBySession(ctx context.Context, sessionID domain.Session } const upsertPR = `-- name: UpsertPR :exec -INSERT INTO pr (url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at) -VALUES (?, ?, ?, ?, ?, ?, ?, ?) +INSERT INTO pr ( + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + provider, host, repo, source_branch, target_branch, head_sha, title, + additions, deletions, changed_files, author, base_sha, merge_commit_sha, + is_draft, is_merged, is_closed, + provider_state, provider_mergeable, provider_merge_state_status, html_url, + created_at_provider, updated_at_provider, merged_at_provider, closed_at_provider, + metadata_hash, ci_hash, review_hash, observed_at, ci_observed_at, review_observed_at +) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT (url) DO UPDATE SET number = excluded.number, pr_state = excluded.pr_state, review_decision = excluded.review_decision, ci_state = excluded.ci_state, mergeability = excluded.mergeability, - updated_at = excluded.updated_at + updated_at = excluded.updated_at, + provider = excluded.provider, + host = excluded.host, + repo = excluded.repo, + source_branch = excluded.source_branch, + target_branch = excluded.target_branch, + head_sha = excluded.head_sha, + title = excluded.title, + additions = excluded.additions, + deletions = excluded.deletions, + changed_files = excluded.changed_files, + author = excluded.author, + base_sha = excluded.base_sha, + merge_commit_sha = excluded.merge_commit_sha, + is_draft = excluded.is_draft, + is_merged = excluded.is_merged, + is_closed = excluded.is_closed, + provider_state = excluded.provider_state, + provider_mergeable = excluded.provider_mergeable, + provider_merge_state_status = excluded.provider_merge_state_status, + html_url = excluded.html_url, + created_at_provider = excluded.created_at_provider, + updated_at_provider = excluded.updated_at_provider, + merged_at_provider = excluded.merged_at_provider, + closed_at_provider = excluded.closed_at_provider, + metadata_hash = excluded.metadata_hash, + ci_hash = excluded.ci_hash, + review_hash = excluded.review_hash, + observed_at = excluded.observed_at, + ci_observed_at = excluded.ci_observed_at, + review_observed_at = excluded.review_observed_at ` type UpsertPRParams struct { - URL string - SessionID domain.SessionID - Number int64 - PRState domain.PRState - ReviewDecision domain.ReviewDecision - CIState domain.CIState - Mergeability domain.Mergeability - UpdatedAt time.Time + URL string + SessionID domain.SessionID + Number int64 + PRState domain.PRState + ReviewDecision domain.ReviewDecision + CIState domain.CIState + Mergeability domain.Mergeability + UpdatedAt time.Time + Provider string + Host string + Repo string + SourceBranch string + TargetBranch string + HeadSha string + Title string + Additions int64 + Deletions int64 + ChangedFiles int64 + Author string + BaseSha string + MergeCommitSha string + IsDraft int64 + IsMerged int64 + IsClosed int64 + ProviderState string + ProviderMergeable string + ProviderMergeStateStatus string + HtmlURL string + CreatedAtProvider sql.NullTime + UpdatedAtProvider sql.NullTime + MergedAtProvider sql.NullTime + ClosedAtProvider sql.NullTime + MetadataHash string + CIHash string + ReviewHash string + ObservedAt sql.NullTime + CIObservedAt sql.NullTime + ReviewObservedAt sql.NullTime } func (q *Queries) UpsertPR(ctx context.Context, arg UpsertPRParams) error { @@ -153,6 +297,36 @@ func (q *Queries) UpsertPR(ctx context.Context, arg UpsertPRParams) error { arg.CIState, arg.Mergeability, arg.UpdatedAt, + arg.Provider, + arg.Host, + arg.Repo, + arg.SourceBranch, + arg.TargetBranch, + arg.HeadSha, + arg.Title, + arg.Additions, + arg.Deletions, + arg.ChangedFiles, + arg.Author, + arg.BaseSha, + arg.MergeCommitSha, + arg.IsDraft, + arg.IsMerged, + arg.IsClosed, + arg.ProviderState, + arg.ProviderMergeable, + arg.ProviderMergeStateStatus, + arg.HtmlURL, + arg.CreatedAtProvider, + arg.UpdatedAtProvider, + arg.MergedAtProvider, + arg.ClosedAtProvider, + arg.MetadataHash, + arg.CIHash, + arg.ReviewHash, + arg.ObservedAt, + arg.CIObservedAt, + arg.ReviewObservedAt, ) return err } diff --git a/backend/internal/storage/sqlite/gen/pr_checks.sql.go b/backend/internal/storage/sqlite/gen/pr_checks.sql.go index fde21e67..1ee0df2c 100644 --- a/backend/internal/storage/sqlite/gen/pr_checks.sql.go +++ b/backend/internal/storage/sqlite/gen/pr_checks.sql.go @@ -13,7 +13,7 @@ import ( ) const listChecksByPR = `-- name: ListChecksByPR :many -SELECT pr_url, name, commit_hash, status, url, log_tail, created_at +SELECT pr_url, name, commit_hash, status, url, log_tail, created_at, conclusion, details FROM pr_checks WHERE pr_url = ? ORDER BY name, created_at ` @@ -34,6 +34,8 @@ func (q *Queries) ListChecksByPR(ctx context.Context, prUrl string) ([]PRCheck, &i.URL, &i.LogTail, &i.CreatedAt, + &i.Conclusion, + &i.Details, ); err != nil { return nil, err } @@ -49,12 +51,14 @@ func (q *Queries) ListChecksByPR(ctx context.Context, prUrl string) ([]PRCheck, } const upsertPRCheck = `-- name: UpsertPRCheck :exec -INSERT INTO pr_checks (pr_url, name, commit_hash, status, url, log_tail, created_at) -VALUES (?, ?, ?, ?, ?, ?, ?) +INSERT INTO pr_checks (pr_url, name, commit_hash, status, url, log_tail, created_at, conclusion, details) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT (pr_url, name, commit_hash) DO UPDATE SET status = excluded.status, url = excluded.url, - log_tail = excluded.log_tail + log_tail = excluded.log_tail, + conclusion = excluded.conclusion, + details = excluded.details ` type UpsertPRCheckParams struct { @@ -65,6 +69,8 @@ type UpsertPRCheckParams struct { URL string LogTail string CreatedAt time.Time + Conclusion string + Details string } func (q *Queries) UpsertPRCheck(ctx context.Context, arg UpsertPRCheckParams) error { @@ -76,6 +82,8 @@ func (q *Queries) UpsertPRCheck(ctx context.Context, arg UpsertPRCheckParams) er arg.URL, arg.LogTail, arg.CreatedAt, + arg.Conclusion, + arg.Details, ) return err } diff --git a/backend/internal/storage/sqlite/gen/pr_comment.sql.go b/backend/internal/storage/sqlite/gen/pr_comment.sql.go index f69cc17b..b87bf173 100644 --- a/backend/internal/storage/sqlite/gen/pr_comment.sql.go +++ b/backend/internal/storage/sqlite/gen/pr_comment.sql.go @@ -20,8 +20,8 @@ func (q *Queries) DeletePRComments(ctx context.Context, prUrl string) error { } const insertPRComment = `-- name: InsertPRComment :exec -INSERT INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at) -VALUES (?, ?, ?, ?, ?, ?, ?, ?) +INSERT INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ` type InsertPRCommentParams struct { @@ -33,6 +33,9 @@ type InsertPRCommentParams struct { Body string Resolved bool CreatedAt time.Time + ThreadID string + URL string + IsBot int64 } func (q *Queries) InsertPRComment(ctx context.Context, arg InsertPRCommentParams) error { @@ -45,12 +48,15 @@ func (q *Queries) InsertPRComment(ctx context.Context, arg InsertPRCommentParams arg.Body, arg.Resolved, arg.CreatedAt, + arg.ThreadID, + arg.URL, + arg.IsBot, ) return err } const listPRComments = `-- name: ListPRComments :many -SELECT pr_url, comment_id, author, file, line, body, resolved, created_at +SELECT pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot FROM pr_comment WHERE pr_url = ? ORDER BY created_at, comment_id ` @@ -72,6 +78,9 @@ func (q *Queries) ListPRComments(ctx context.Context, prUrl string) ([]PRComment &i.Body, &i.Resolved, &i.CreatedAt, + &i.ThreadID, + &i.URL, + &i.IsBot, ); err != nil { return nil, err } diff --git a/backend/internal/storage/sqlite/gen/pr_review_threads.sql.go b/backend/internal/storage/sqlite/gen/pr_review_threads.sql.go new file mode 100644 index 00000000..a9b67273 --- /dev/null +++ b/backend/internal/storage/sqlite/gen/pr_review_threads.sql.go @@ -0,0 +1,94 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.31.1 +// source: pr_review_threads.sql + +package gen + +import ( + "context" + "time" +) + +const deletePRReviewThreads = `-- name: DeletePRReviewThreads :exec +DELETE FROM pr_review_threads WHERE pr_url = ? +` + +func (q *Queries) DeletePRReviewThreads(ctx context.Context, prUrl string) error { + _, err := q.db.ExecContext(ctx, deletePRReviewThreads, prUrl) + return err +} + +const listPRReviewThreads = `-- name: ListPRReviewThreads :many +SELECT pr_url, thread_id, path, line, resolved, is_bot, semantic_hash, updated_at +FROM pr_review_threads WHERE pr_url = ? ORDER BY updated_at, thread_id +` + +func (q *Queries) ListPRReviewThreads(ctx context.Context, prUrl string) ([]PRReviewThread, error) { + rows, err := q.db.QueryContext(ctx, listPRReviewThreads, prUrl) + if err != nil { + return nil, err + } + defer rows.Close() + items := []PRReviewThread{} + for rows.Next() { + var i PRReviewThread + if err := rows.Scan( + &i.PRURL, + &i.ThreadID, + &i.Path, + &i.Line, + &i.Resolved, + &i.IsBot, + &i.SemanticHash, + &i.UpdatedAt, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const upsertPRReviewThread = `-- name: UpsertPRReviewThread :exec +INSERT INTO pr_review_threads (pr_url, thread_id, path, line, resolved, is_bot, semantic_hash, updated_at) +VALUES (?, ?, ?, ?, ?, ?, ?, ?) +ON CONFLICT (pr_url, thread_id) DO UPDATE SET + path = excluded.path, + line = excluded.line, + resolved = excluded.resolved, + is_bot = excluded.is_bot, + semantic_hash = excluded.semantic_hash, + updated_at = excluded.updated_at +` + +type UpsertPRReviewThreadParams struct { + PRURL string + ThreadID string + Path string + Line int64 + Resolved int64 + IsBot int64 + SemanticHash string + UpdatedAt time.Time +} + +func (q *Queries) UpsertPRReviewThread(ctx context.Context, arg UpsertPRReviewThreadParams) error { + _, err := q.db.ExecContext(ctx, upsertPRReviewThread, + arg.PRURL, + arg.ThreadID, + arg.Path, + arg.Line, + arg.Resolved, + arg.IsBot, + arg.SemanticHash, + arg.UpdatedAt, + ) + return err +} diff --git a/backend/internal/storage/sqlite/gen/projects.sql.go b/backend/internal/storage/sqlite/gen/projects.sql.go index 89c99d1e..dea720c6 100644 --- a/backend/internal/storage/sqlite/gen/projects.sql.go +++ b/backend/internal/storage/sqlite/gen/projects.sql.go @@ -14,7 +14,7 @@ import ( ) const archiveProject = `-- name: ArchiveProject :execrows -UPDATE projects SET archived_at = ? WHERE id = ? +UPDATE projects SET archived_at = ? WHERE id = ? AND archived_at IS NULL ` type ArchiveProjectParams struct { diff --git a/backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql b/backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql new file mode 100644 index 00000000..6aa72f08 --- /dev/null +++ b/backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql @@ -0,0 +1,93 @@ +-- +goose Up +-- +goose StatementBegin +ALTER TABLE pr ADD COLUMN provider TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN host TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN repo TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN source_branch TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN target_branch TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN head_sha TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN title TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN additions INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pr ADD COLUMN deletions INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pr ADD COLUMN changed_files INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pr ADD COLUMN author TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN base_sha TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN merge_commit_sha TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN is_draft INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pr ADD COLUMN is_merged INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pr ADD COLUMN is_closed INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pr ADD COLUMN provider_state TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN provider_mergeable TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN provider_merge_state_status TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN html_url TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN created_at_provider TIMESTAMP; +ALTER TABLE pr ADD COLUMN updated_at_provider TIMESTAMP; +ALTER TABLE pr ADD COLUMN merged_at_provider TIMESTAMP; +ALTER TABLE pr ADD COLUMN closed_at_provider TIMESTAMP; +ALTER TABLE pr ADD COLUMN metadata_hash TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN ci_hash TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN review_hash TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN observed_at TIMESTAMP; +ALTER TABLE pr ADD COLUMN ci_observed_at TIMESTAMP; +ALTER TABLE pr ADD COLUMN review_observed_at TIMESTAMP; + +ALTER TABLE pr_checks ADD COLUMN conclusion TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr_checks ADD COLUMN details TEXT NOT NULL DEFAULT ''; + +ALTER TABLE pr_comment ADD COLUMN thread_id TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr_comment ADD COLUMN url TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr_comment ADD COLUMN is_bot INTEGER NOT NULL DEFAULT 0; + +CREATE TABLE pr_review_threads ( + pr_url TEXT NOT NULL REFERENCES pr (url) ON DELETE CASCADE, + thread_id TEXT NOT NULL, + path TEXT NOT NULL DEFAULT '', + line INTEGER NOT NULL DEFAULT 0, + resolved INTEGER NOT NULL DEFAULT 0, + is_bot INTEGER NOT NULL DEFAULT 0, + semantic_hash TEXT NOT NULL DEFAULT '', + updated_at TIMESTAMP NOT NULL, + PRIMARY KEY (pr_url, thread_id) +); +CREATE INDEX idx_pr_review_threads_lookup ON pr_review_threads (pr_url, updated_at); +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +DROP TABLE pr_review_threads; +ALTER TABLE pr_comment DROP COLUMN is_bot; +ALTER TABLE pr_comment DROP COLUMN url; +ALTER TABLE pr_comment DROP COLUMN thread_id; +ALTER TABLE pr_checks DROP COLUMN details; +ALTER TABLE pr_checks DROP COLUMN conclusion; +ALTER TABLE pr DROP COLUMN review_observed_at; +ALTER TABLE pr DROP COLUMN ci_observed_at; +ALTER TABLE pr DROP COLUMN observed_at; +ALTER TABLE pr DROP COLUMN review_hash; +ALTER TABLE pr DROP COLUMN ci_hash; +ALTER TABLE pr DROP COLUMN metadata_hash; +ALTER TABLE pr DROP COLUMN closed_at_provider; +ALTER TABLE pr DROP COLUMN merged_at_provider; +ALTER TABLE pr DROP COLUMN updated_at_provider; +ALTER TABLE pr DROP COLUMN created_at_provider; +ALTER TABLE pr DROP COLUMN html_url; +ALTER TABLE pr DROP COLUMN provider_merge_state_status; +ALTER TABLE pr DROP COLUMN provider_mergeable; +ALTER TABLE pr DROP COLUMN provider_state; +ALTER TABLE pr DROP COLUMN is_closed; +ALTER TABLE pr DROP COLUMN is_merged; +ALTER TABLE pr DROP COLUMN is_draft; +ALTER TABLE pr DROP COLUMN merge_commit_sha; +ALTER TABLE pr DROP COLUMN base_sha; +ALTER TABLE pr DROP COLUMN author; +ALTER TABLE pr DROP COLUMN changed_files; +ALTER TABLE pr DROP COLUMN deletions; +ALTER TABLE pr DROP COLUMN additions; +ALTER TABLE pr DROP COLUMN title; +ALTER TABLE pr DROP COLUMN head_sha; +ALTER TABLE pr DROP COLUMN target_branch; +ALTER TABLE pr DROP COLUMN source_branch; +ALTER TABLE pr DROP COLUMN repo; +ALTER TABLE pr DROP COLUMN host; +ALTER TABLE pr DROP COLUMN provider; +-- +goose StatementEnd diff --git a/backend/internal/storage/sqlite/queries/pr.sql b/backend/internal/storage/sqlite/queries/pr.sql index 508eddd4..149c27d0 100644 --- a/backend/internal/storage/sqlite/queries/pr.sql +++ b/backend/internal/storage/sqlite/queries/pr.sql @@ -1,26 +1,77 @@ -- name: UpsertPR :exec -INSERT INTO pr (url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at) -VALUES (?, ?, ?, ?, ?, ?, ?, ?) +INSERT INTO pr ( + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + provider, host, repo, source_branch, target_branch, head_sha, title, + additions, deletions, changed_files, author, base_sha, merge_commit_sha, + is_draft, is_merged, is_closed, + provider_state, provider_mergeable, provider_merge_state_status, html_url, + created_at_provider, updated_at_provider, merged_at_provider, closed_at_provider, + metadata_hash, ci_hash, review_hash, observed_at, ci_observed_at, review_observed_at +) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT (url) DO UPDATE SET number = excluded.number, pr_state = excluded.pr_state, review_decision = excluded.review_decision, ci_state = excluded.ci_state, mergeability = excluded.mergeability, - updated_at = excluded.updated_at; + updated_at = excluded.updated_at, + provider = excluded.provider, + host = excluded.host, + repo = excluded.repo, + source_branch = excluded.source_branch, + target_branch = excluded.target_branch, + head_sha = excluded.head_sha, + title = excluded.title, + additions = excluded.additions, + deletions = excluded.deletions, + changed_files = excluded.changed_files, + author = excluded.author, + base_sha = excluded.base_sha, + merge_commit_sha = excluded.merge_commit_sha, + is_draft = excluded.is_draft, + is_merged = excluded.is_merged, + is_closed = excluded.is_closed, + provider_state = excluded.provider_state, + provider_mergeable = excluded.provider_mergeable, + provider_merge_state_status = excluded.provider_merge_state_status, + html_url = excluded.html_url, + created_at_provider = excluded.created_at_provider, + updated_at_provider = excluded.updated_at_provider, + merged_at_provider = excluded.merged_at_provider, + closed_at_provider = excluded.closed_at_provider, + metadata_hash = excluded.metadata_hash, + ci_hash = excluded.ci_hash, + review_hash = excluded.review_hash, + observed_at = excluded.observed_at, + ci_observed_at = excluded.ci_observed_at, + review_observed_at = excluded.review_observed_at; -- name: GetPR :one -SELECT url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at +SELECT + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + provider, host, repo, source_branch, target_branch, head_sha, title, + additions, deletions, changed_files, author, base_sha, merge_commit_sha, + is_draft, is_merged, is_closed, + provider_state, provider_mergeable, provider_merge_state_status, html_url, + created_at_provider, updated_at_provider, merged_at_provider, closed_at_provider, + metadata_hash, ci_hash, review_hash, observed_at, ci_observed_at, review_observed_at FROM pr WHERE url = ?; -- name: ListPRsBySession :many -SELECT url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at +SELECT + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + provider, host, repo, source_branch, target_branch, head_sha, title, + additions, deletions, changed_files, author, base_sha, merge_commit_sha, + is_draft, is_merged, is_closed, + provider_state, provider_mergeable, provider_merge_state_status, html_url, + created_at_provider, updated_at_provider, merged_at_provider, closed_at_provider, + metadata_hash, ci_hash, review_hash, observed_at, ci_observed_at, review_observed_at FROM pr WHERE session_id = ? ORDER BY updated_at DESC; - -- name: GetDisplayPRFactsBySession :one SELECT pr.url, @@ -34,6 +85,7 @@ SELECT FROM pr_comment WHERE pr_comment.pr_url = pr.url AND pr_comment.resolved = 0 + AND pr_comment.is_bot = 0 ) AS review_comments FROM pr WHERE pr.session_id = ? diff --git a/backend/internal/storage/sqlite/queries/pr_checks.sql b/backend/internal/storage/sqlite/queries/pr_checks.sql index 2e223729..e0d42946 100644 --- a/backend/internal/storage/sqlite/queries/pr_checks.sql +++ b/backend/internal/storage/sqlite/queries/pr_checks.sql @@ -1,11 +1,13 @@ -- name: UpsertPRCheck :exec -INSERT INTO pr_checks (pr_url, name, commit_hash, status, url, log_tail, created_at) -VALUES (?, ?, ?, ?, ?, ?, ?) +INSERT INTO pr_checks (pr_url, name, commit_hash, status, url, log_tail, created_at, conclusion, details) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT (pr_url, name, commit_hash) DO UPDATE SET status = excluded.status, url = excluded.url, - log_tail = excluded.log_tail; + log_tail = excluded.log_tail, + conclusion = excluded.conclusion, + details = excluded.details; -- name: ListChecksByPR :many -SELECT pr_url, name, commit_hash, status, url, log_tail, created_at +SELECT pr_url, name, commit_hash, status, url, log_tail, created_at, conclusion, details FROM pr_checks WHERE pr_url = ? ORDER BY name, created_at; diff --git a/backend/internal/storage/sqlite/queries/pr_comment.sql b/backend/internal/storage/sqlite/queries/pr_comment.sql index 870a87d7..c180c97a 100644 --- a/backend/internal/storage/sqlite/queries/pr_comment.sql +++ b/backend/internal/storage/sqlite/queries/pr_comment.sql @@ -1,10 +1,10 @@ -- name: InsertPRComment :exec -INSERT INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at) -VALUES (?, ?, ?, ?, ?, ?, ?, ?); +INSERT INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); -- name: DeletePRComments :exec DELETE FROM pr_comment WHERE pr_url = ?; -- name: ListPRComments :many -SELECT pr_url, comment_id, author, file, line, body, resolved, created_at +SELECT pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot FROM pr_comment WHERE pr_url = ? ORDER BY created_at, comment_id; diff --git a/backend/internal/storage/sqlite/queries/pr_review_threads.sql b/backend/internal/storage/sqlite/queries/pr_review_threads.sql new file mode 100644 index 00000000..c00ea491 --- /dev/null +++ b/backend/internal/storage/sqlite/queries/pr_review_threads.sql @@ -0,0 +1,17 @@ +-- name: UpsertPRReviewThread :exec +INSERT INTO pr_review_threads (pr_url, thread_id, path, line, resolved, is_bot, semantic_hash, updated_at) +VALUES (?, ?, ?, ?, ?, ?, ?, ?) +ON CONFLICT (pr_url, thread_id) DO UPDATE SET + path = excluded.path, + line = excluded.line, + resolved = excluded.resolved, + is_bot = excluded.is_bot, + semantic_hash = excluded.semantic_hash, + updated_at = excluded.updated_at; + +-- name: DeletePRReviewThreads :exec +DELETE FROM pr_review_threads WHERE pr_url = ?; + +-- name: ListPRReviewThreads :many +SELECT pr_url, thread_id, path, line, resolved, is_bot, semantic_hash, updated_at +FROM pr_review_threads WHERE pr_url = ? ORDER BY updated_at, thread_id; diff --git a/backend/internal/storage/sqlite/store/pr_store.go b/backend/internal/storage/sqlite/store/pr_store.go index 0f609f7b..c4fc572f 100644 --- a/backend/internal/storage/sqlite/store/pr_store.go +++ b/backend/internal/storage/sqlite/store/pr_store.go @@ -5,6 +5,7 @@ import ( "database/sql" "errors" "fmt" + "time" "github.com/aoagents/agent-orchestrator/backend/internal/domain" "github.com/aoagents/agent-orchestrator/backend/internal/ports" @@ -22,7 +23,8 @@ import ( // drift between either interface and this implementation fails here at the point // of definition rather than later at the call sites in lifecycle_wiring / tests. var ( - _ ports.PRWriter = (*Store)(nil) + _ ports.PRWriter = (*Store)(nil) + _ ports.SCMWriter = (*Store)(nil) ) // WritePR persists a full PR observation — scalar facts, check runs, and the @@ -31,6 +33,18 @@ var ( // PR upsert runs first so the checks'/comments' CDC triggers can resolve the // session id from the pr row within the same transaction. func (s *Store) WritePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, comments []domain.PullRequestComment) error { + return s.writePR(ctx, pr, checks, nil, comments, true) +} + +// WriteSCMObservation persists a provider-neutral SCM observation in one write +// transaction. It upserts the full PR metadata row and CI checks. Review threads +// and comments are replaced only when replaceReview is true, because review +// polling runs at a slower cadence than metadata/CI polling. +func (s *Store) WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceReview bool) error { + return s.writePR(ctx, pr, checks, threads, comments, replaceReview) +} + +func (s *Store) writePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceReview bool) error { s.writeMu.Lock() defer s.writeMu.Unlock() return s.inTx(ctx, "write pr observation", func(q *gen.Queries) error { @@ -49,12 +63,22 @@ func (s *Store) WritePR(ctx context.Context, pr domain.PullRequest, checks []dom return err } } - if err := q.DeletePRComments(ctx, pr.URL); err != nil { - return err - } - for _, c := range comments { - if err := q.InsertPRComment(ctx, genCommentParams(pr.URL, c)); err != nil { - return fmt.Errorf("comment %q: %w", c.ID, err) + if replaceReview { + if err := q.DeletePRReviewThreads(ctx, pr.URL); err != nil { + return err + } + if err := q.DeletePRComments(ctx, pr.URL); err != nil { + return err + } + for _, th := range threads { + if err := q.UpsertPRReviewThread(ctx, genReviewThreadParams(pr.URL, th)); err != nil { + return fmt.Errorf("review thread %q: %w", th.ThreadID, err) + } + } + for _, c := range comments { + if err := q.InsertPRComment(ctx, genCommentParams(pr.URL, c)); err != nil { + return fmt.Errorf("comment %q: %w", c.ID, err) + } } } return nil @@ -112,6 +136,19 @@ func (s *Store) ListPRComments(ctx context.Context, prURL string) ([]domain.Pull return out, nil } +// ListPRReviewThreads returns a PR's review threads, oldest first. +func (s *Store) ListPRReviewThreads(ctx context.Context, prURL string) ([]domain.PullRequestReviewThread, error) { + rows, err := s.qr.ListPRReviewThreads(ctx, prURL) + if err != nil { + return nil, fmt.Errorf("list pr review threads %s: %w", prURL, err) + } + out := make([]domain.PullRequestReviewThread, 0, len(rows)) + for _, th := range rows { + out = append(out, reviewThreadFromGen(th)) + } + return out, nil +} + // ---- domain <-> gen mapping ---- // prState collapses the PR's bools into the single pr.state column value. @@ -130,14 +167,44 @@ func prState(r domain.PullRequest) domain.PRState { func genPRParams(r domain.PullRequest) gen.UpsertPRParams { return gen.UpsertPRParams{ - URL: r.URL, - SessionID: r.SessionID, - Number: int64(r.Number), - PRState: prState(r), - ReviewDecision: reviewOrDefault(r.Review), - CIState: ciOrDefault(r.CI), - Mergeability: mergeabilityOrDefault(r.Mergeability), - UpdatedAt: r.UpdatedAt, + URL: r.URL, + SessionID: r.SessionID, + Number: int64(r.Number), + PRState: prState(r), + ReviewDecision: reviewOrDefault(r.Review), + CIState: ciOrDefault(r.CI), + Mergeability: mergeabilityOrDefault(r.Mergeability), + UpdatedAt: r.UpdatedAt, + Provider: r.Provider, + Host: r.Host, + Repo: r.Repo, + SourceBranch: r.SourceBranch, + TargetBranch: r.TargetBranch, + HeadSha: r.HeadSHA, + Title: r.Title, + Additions: int64(r.Additions), + Deletions: int64(r.Deletions), + ChangedFiles: int64(r.ChangedFiles), + Author: r.Author, + BaseSha: r.BaseSHA, + MergeCommitSha: r.MergeCommitSHA, + IsDraft: boolInt(r.Draft), + IsMerged: boolInt(r.Merged), + IsClosed: boolInt(r.Closed), + ProviderState: r.ProviderState, + ProviderMergeable: r.ProviderMergeable, + ProviderMergeStateStatus: r.ProviderMergeStateStatus, + HtmlURL: r.HTMLURL, + CreatedAtProvider: nullTime(r.CreatedAtProvider), + UpdatedAtProvider: nullTime(r.UpdatedAtProvider), + MergedAtProvider: nullTime(r.MergedAtProvider), + ClosedAtProvider: nullTime(r.ClosedAtProvider), + MetadataHash: r.MetadataHash, + CIHash: r.CIHash, + ReviewHash: r.ReviewHash, + ObservedAt: nullTime(r.ObservedAt), + CIObservedAt: nullTime(r.CIObservedAt), + ReviewObservedAt: nullTime(r.ReviewObservedAt), } } @@ -164,16 +231,43 @@ func mergeabilityOrDefault(v domain.Mergeability) domain.Mergeability { func prRowFromGen(p gen.PR) domain.PullRequest { return domain.PullRequest{ - URL: p.URL, - SessionID: p.SessionID, - Number: int(p.Number), - Draft: p.PRState == domain.PRStateDraft, - Merged: p.PRState == domain.PRStateMerged, - Closed: p.PRState == domain.PRStateClosed, - CI: p.CIState, - Review: p.ReviewDecision, - Mergeability: p.Mergeability, - UpdatedAt: p.UpdatedAt, + URL: p.URL, + SessionID: p.SessionID, + Number: int(p.Number), + Draft: p.PRState == domain.PRStateDraft || p.IsDraft != 0, + Merged: p.PRState == domain.PRStateMerged || p.IsMerged != 0, + Closed: p.PRState == domain.PRStateClosed || p.IsClosed != 0, + CI: p.CIState, + Review: p.ReviewDecision, + Mergeability: p.Mergeability, + UpdatedAt: p.UpdatedAt, + Provider: p.Provider, + Host: p.Host, + Repo: p.Repo, + SourceBranch: p.SourceBranch, + TargetBranch: p.TargetBranch, + HeadSHA: p.HeadSha, + Title: p.Title, + Additions: int(p.Additions), + Deletions: int(p.Deletions), + ChangedFiles: int(p.ChangedFiles), + Author: p.Author, + BaseSHA: p.BaseSha, + MergeCommitSHA: p.MergeCommitSha, + ProviderState: p.ProviderState, + ProviderMergeable: p.ProviderMergeable, + ProviderMergeStateStatus: p.ProviderMergeStateStatus, + HTMLURL: p.HtmlURL, + CreatedAtProvider: timeFromNull(p.CreatedAtProvider), + UpdatedAtProvider: timeFromNull(p.UpdatedAtProvider), + MergedAtProvider: timeFromNull(p.MergedAtProvider), + ClosedAtProvider: timeFromNull(p.ClosedAtProvider), + MetadataHash: p.MetadataHash, + CIHash: p.CIHash, + ReviewHash: p.ReviewHash, + ObservedAt: timeFromNull(p.ObservedAt), + CIObservedAt: timeFromNull(p.CIObservedAt), + ReviewObservedAt: timeFromNull(p.ReviewObservedAt), } } @@ -185,13 +279,15 @@ func genCheckParams(prURL string, c domain.PullRequestCheck) gen.UpsertPRCheckPa return gen.UpsertPRCheckParams{ PRURL: prURL, Name: c.Name, CommitHash: c.CommitHash, Status: status, URL: c.URL, LogTail: c.LogTail, CreatedAt: c.CreatedAt, + Conclusion: c.Conclusion, Details: c.Details, } } func checkRowFromGen(c gen.PRCheck) domain.PullRequestCheck { return domain.PullRequestCheck{ Name: c.Name, CommitHash: c.CommitHash, Status: c.Status, - URL: c.URL, LogTail: c.LogTail, CreatedAt: c.CreatedAt, + Conclusion: c.Conclusion, URL: c.URL, Details: c.Details, + LogTail: c.LogTail, CreatedAt: c.CreatedAt, } } @@ -199,12 +295,45 @@ func genCommentParams(prURL string, c domain.PullRequestComment) gen.InsertPRCom return gen.InsertPRCommentParams{ PRURL: prURL, CommentID: c.ID, Author: c.Author, File: c.File, Line: int64(c.Line), Body: c.Body, Resolved: c.Resolved, CreatedAt: c.CreatedAt, + ThreadID: c.ThreadID, URL: c.URL, IsBot: boolInt(c.IsBot), } } func commentFromGen(c gen.PRComment) domain.PullRequestComment { return domain.PullRequestComment{ - ID: c.CommentID, Author: c.Author, File: c.File, Line: int(c.Line), - Body: c.Body, Resolved: c.Resolved, CreatedAt: c.CreatedAt, + ThreadID: c.ThreadID, ID: c.CommentID, Author: c.Author, + File: c.File, Line: int(c.Line), Body: c.Body, URL: c.URL, + Resolved: c.Resolved, IsBot: c.IsBot != 0, CreatedAt: c.CreatedAt, + } +} + +func genReviewThreadParams(prURL string, th domain.PullRequestReviewThread) gen.UpsertPRReviewThreadParams { + return gen.UpsertPRReviewThreadParams{ + PRURL: prURL, ThreadID: th.ThreadID, Path: th.Path, + Line: int64(th.Line), Resolved: boolInt(th.Resolved), + IsBot: boolInt(th.IsBot), SemanticHash: th.SemanticHash, + UpdatedAt: th.UpdatedAt, + } +} + +func reviewThreadFromGen(th gen.PRReviewThread) domain.PullRequestReviewThread { + return domain.PullRequestReviewThread{ + ThreadID: th.ThreadID, Path: th.Path, Line: int(th.Line), + Resolved: th.Resolved != 0, IsBot: th.IsBot != 0, + SemanticHash: th.SemanticHash, UpdatedAt: th.UpdatedAt, + } +} + +func boolInt(v bool) int64 { + if v { + return 1 + } + return 0 +} + +func timeFromNull(t sql.NullTime) time.Time { + if !t.Valid { + return time.Time{} } + return t.Time } diff --git a/backend/internal/storage/sqlite/store/store_test.go b/backend/internal/storage/sqlite/store/store_test.go index 7731e5ca..64491e40 100644 --- a/backend/internal/storage/sqlite/store/store_test.go +++ b/backend/internal/storage/sqlite/store/store_test.go @@ -216,6 +216,53 @@ func TestPRCommentsReplace(t *testing.T) { } } +func TestWriteSCMObservationPersistsMetadataChecksReviewsAndComments(t *testing.T) { + s := newTestStore(t) + ctx := context.Background() + seedProject(t, s, "mer") + r, _ := s.CreateSession(ctx, sampleRecord("mer")) + now := time.Now().UTC().Truncate(time.Second) + + pr := domain.PullRequest{ + URL: "https://github.com/o/r/pull/1", SessionID: r.ID, Number: 1, + Provider: "github", Host: "github.com", Repo: "o/r", + SourceBranch: "feat/75", TargetBranch: "main", HeadSHA: "h1", + Title: "SCM observer", Additions: 10, Deletions: 2, ChangedFiles: 3, + Author: "dev", BaseSHA: "b1", MergeCommitSHA: "m1", + ProviderState: "OPEN", ProviderMergeable: "MERGEABLE", ProviderMergeStateStatus: "CLEAN", + HTMLURL: "https://github.com/o/r/pull/1", + CI: domain.CIFailing, Review: domain.ReviewChangesRequest, Mergeability: domain.MergeBlocked, + MetadataHash: "mh", CIHash: "ch", ReviewHash: "rh", + UpdatedAt: now, ObservedAt: now, CIObservedAt: now, ReviewObservedAt: now, + } + checks := []domain.PullRequestCheck{{Name: "build", CommitHash: "h1", Status: domain.PRCheckFailed, Conclusion: "failure", URL: "ci", Details: "99", LogTail: "boom", CreatedAt: now}} + threads := []domain.PullRequestReviewThread{{ThreadID: "t1", Path: "main.go", Line: 7, SemanticHash: "th", UpdatedAt: now}} + comments := []domain.PullRequestComment{{ThreadID: "t1", ID: "c1", Author: "reviewer", File: "main.go", Line: 7, Body: "fix", URL: "comment", CreatedAt: now}} + + if err := s.WriteSCMObservation(ctx, pr, checks, threads, comments, true); err != nil { + t.Fatal(err) + } + got, ok, err := s.GetPR(ctx, pr.URL) + if err != nil || !ok { + t.Fatalf("get pr: ok=%v err=%v", ok, err) + } + if got.Provider != "github" || got.HeadSHA != "h1" || got.MetadataHash != "mh" || got.CIHash != "ch" || got.ReviewHash != "rh" { + t.Fatalf("SCM metadata not persisted: %+v", got) + } + gotChecks, _ := s.ListChecks(ctx, pr.URL) + if len(gotChecks) != 1 || gotChecks[0].Conclusion != "failure" || gotChecks[0].Details != "99" || gotChecks[0].LogTail != "boom" { + t.Fatalf("checks not persisted: %+v", gotChecks) + } + gotThreads, _ := s.ListPRReviewThreads(ctx, pr.URL) + if len(gotThreads) != 1 || gotThreads[0].ThreadID != "t1" || gotThreads[0].SemanticHash != "th" { + t.Fatalf("threads not persisted: %+v", gotThreads) + } + gotComments, _ := s.ListPRComments(ctx, pr.URL) + if len(gotComments) != 1 || gotComments[0].ThreadID != "t1" || gotComments[0].URL != "comment" { + t.Fatalf("comments not persisted: %+v", gotComments) + } +} + func TestCDCTriggersPopulateChangeLog(t *testing.T) { s := newTestStore(t) ctx := context.Background() From c76caebb844a77f4363bd6a9c142dff5416f382a Mon Sep 17 00:00:00 2001 From: whoisasx Date: Tue, 2 Jun 2026 06:11:25 +0530 Subject: [PATCH 02/20] fix: satisfy scm batch query lint --- backend/internal/adapters/scm/github/observer_provider.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/internal/adapters/scm/github/observer_provider.go b/backend/internal/adapters/scm/github/observer_provider.go index 5ce75c11..64caa9fc 100644 --- a/backend/internal/adapters/scm/github/observer_provider.go +++ b/backend/internal/adapters/scm/github/observer_provider.go @@ -217,8 +217,8 @@ func buildSCMBatchQuery(refs []ports.SCMPRRef) (string, []string) { for i, ref := range refs { alias := fmt.Sprintf("pr%d", i) aliases[i] = alias - b.WriteString(fmt.Sprintf("%s: repository(owner:%s,name:%s){ pullRequest(number:%d){ %s } }\n", - alias, graphQLString(ref.Repo.Owner), graphQLString(ref.Repo.Name), ref.Number, scmPRFields())) + _, _ = fmt.Fprintf(&b, "%s: repository(owner:%s,name:%s){ pullRequest(number:%d){ %s } }\n", + alias, graphQLString(ref.Repo.Owner), graphQLString(ref.Repo.Name), ref.Number, scmPRFields()) } b.WriteString("}") return b.String(), aliases From 6391542d26d98b1bb283d80ad3146d77fa5abe21 Mon Sep 17 00:00:00 2001 From: whoisasx Date: Tue, 2 Jun 2026 06:12:56 +0530 Subject: [PATCH 03/20] fix: avoid scm token preflight on startup --- backend/internal/daemon/scm_wiring.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/internal/daemon/scm_wiring.go b/backend/internal/daemon/scm_wiring.go index f0faead0..6da2c75b 100644 --- a/backend/internal/daemon/scm_wiring.go +++ b/backend/internal/daemon/scm_wiring.go @@ -20,7 +20,12 @@ func startSCMObserver(ctx context.Context, store *sqlite.Store, lcm *lifecycle.M scmgithub.EnvTokenSource{EnvVars: []string{"AO_GITHUB_TOKEN"}}, &scmgithub.GHTokenSource{}, } - provider, err := scmgithub.NewProvider(scmgithub.ProviderOptions{Token: tokens}) + // Avoid token preflight on daemon startup. GHTokenSource may shell out to `gh`, + // which is too slow/flaky for the startup readiness path (especially on + // Windows CI). The provider will resolve credentials lazily in its background + // observer goroutine when it actually needs to call GitHub. + client := scmgithub.NewClient(scmgithub.ClientOptions{Token: tokens}) + provider, err := scmgithub.NewProvider(scmgithub.ProviderOptions{Client: client}) if err != nil { if errors.Is(err, scmgithub.ErrNoToken) || errors.Is(err, scmgithub.ErrAuthFailed) { logger.Warn("scm observer disabled: no usable GitHub token", "err", err) From 19e4db923ef54fa5d4dc67ed48fd804903369823 Mon Sep 17 00:00:00 2001 From: whoisasx Date: Tue, 2 Jun 2026 12:32:21 +0530 Subject: [PATCH 04/20] fix: bound github scm review refresh --- .../adapters/scm/github/observer_provider.go | 18 +++++-- .../internal/adapters/scm/github/provider.go | 9 +++- .../adapters/scm/github/provider_test.go | 51 +++++++++++++++++++ 3 files changed, 73 insertions(+), 5 deletions(-) diff --git a/backend/internal/adapters/scm/github/observer_provider.go b/backend/internal/adapters/scm/github/observer_provider.go index 64caa9fc..ac7b225a 100644 --- a/backend/internal/adapters/scm/github/observer_provider.go +++ b/backend/internal/adapters/scm/github/observer_provider.go @@ -19,6 +19,11 @@ import ( const scmBatchCheckContextLimit = 100 +// githubReviewThreadMaxPages bounds a single review-thread refresh. GitHub +// returns 100 threads per page, so this permits up to 5000 threads while keeping +// one poll from allocating unbounded memory or issuing unlimited GraphQL calls. +const githubReviewThreadMaxPages = 50 + // ParseRepository normalizes a GitHub remote/origin URL into a provider-neutral // repository key. It accepts https://github.com/owner/repo(.git), // git@github.com:owner/repo(.git), and path-only owner/repo inputs used by tests. @@ -138,7 +143,7 @@ func (p *Provider) FetchReviewThreads(ctx context.Context, ref ports.SCMPRRef) ( var all []ports.SCMReviewThreadObservation var decision string var cursor string - for { + for page := 0; page < githubReviewThreadMaxPages; page++ { query := buildReviewThreadsQuery(ref, cursor) data, err := p.client.doGraphQL(ctx, query, nil) if err != nil { @@ -162,6 +167,12 @@ func (p *Provider) FetchReviewThreads(ctx context.Context, ref ports.SCMPRRef) ( if cursor == "" { break } + if page == githubReviewThreadMaxPages-1 { + p.logger.Warn("github scm: review thread page limit reached", + "repo", repoFullName(ref.Repo), "pr", ref.Number, + "max_pages", githubReviewThreadMaxPages) + break + } } return ports.SCMReviewObservation{Decision: decision, Threads: all}, nil } @@ -304,7 +315,7 @@ func scmChecksFromGraphQL(pr map[string]any) []ports.SCMCheckObservation { switch typ { case "CheckRun": ch.Name = str(n["name"]) - ch.Status = strings.ToLower(str(n["status"])) + ch.Status = string(checkStatusFromGraphQL(n)) ch.Conclusion = strings.ToLower(str(n["conclusion"])) ch.URL = firstNonEmpty(str(n["detailsUrl"]), str(n["url"])) if id := int64(num(n["databaseId"])); id > 0 { @@ -312,7 +323,7 @@ func scmChecksFromGraphQL(pr map[string]any) []ports.SCMCheckObservation { } case "StatusContext": ch.Name = str(n["context"]) - ch.Status = strings.ToLower(str(n["state"])) + ch.Status = string(checkStatusFromGraphQL(n)) ch.Conclusion = strings.ToLower(str(n["state"])) ch.URL = str(n["targetUrl"]) default: @@ -321,7 +332,6 @@ func scmChecksFromGraphQL(pr map[string]any) []ports.SCMCheckObservation { if ch.Name == "" { continue } - ch.Status = string(checkStatusFromGraphQL(n)) out = append(out, ch) } return out diff --git a/backend/internal/adapters/scm/github/provider.go b/backend/internal/adapters/scm/github/provider.go index b57babdb..764199e9 100644 --- a/backend/internal/adapters/scm/github/provider.go +++ b/backend/internal/adapters/scm/github/provider.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "net/http" "net/url" "path" @@ -29,6 +30,7 @@ type ProviderOptions struct { RESTBase string GraphQLURL string UserAgent string + Logger *slog.Logger } // Provider observes one GitHub pull request and returns a normalized @@ -37,6 +39,7 @@ type ProviderOptions struct { // observation primitive that loop will call. type Provider struct { client *Client + logger *slog.Logger } // NewProvider returns a Provider. If opts.Client is supplied it is used @@ -60,7 +63,11 @@ func NewProvider(opts ProviderOptions) (*Provider, error) { UserAgent: opts.UserAgent, }) } - return &Provider{client: c}, nil + logger := opts.Logger + if logger == nil { + logger = slog.Default() + } + return &Provider{client: c, logger: logger}, nil } // Observe fetches the current state of one PR by its github.com URL and diff --git a/backend/internal/adapters/scm/github/provider_test.go b/backend/internal/adapters/scm/github/provider_test.go index eb407bdd..9e5f221e 100644 --- a/backend/internal/adapters/scm/github/provider_test.go +++ b/backend/internal/adapters/scm/github/provider_test.go @@ -1120,3 +1120,54 @@ func TestObserve_AssertsPRObservationShape(t *testing.T) { o.Comments = nil _ = o } + +func TestSCMChecksFromGraphQL_StatusContextUsesState(t *testing.T) { + pr := map[string]any{ + "commits": map[string]any{"nodes": []any{ + map[string]any{"commit": map[string]any{"statusCheckRollup": map[string]any{ + "contexts": map[string]any{"nodes": []any{ + map[string]any{"__typename": "StatusContext", "context": "legacy", "state": "FAILURE", "targetUrl": "https://ci/legacy"}, + map[string]any{"__typename": "CheckRun", "name": "actions", "status": "COMPLETED", "conclusion": "SUCCESS", "detailsUrl": "https://ci/actions"}, + }}, + }}}, + }}, + } + checks := scmChecksFromGraphQL(pr) + if len(checks) != 2 { + t.Fatalf("checks = %d, want 2: %+v", len(checks), checks) + } + if checks[0].Name != "legacy" || checks[0].Status != string(domain.PRCheckFailed) || checks[0].Conclusion != "failure" { + t.Fatalf("legacy StatusContext not normalized from state: %+v", checks[0]) + } + if checks[1].Name != "actions" || checks[1].Status != string(domain.PRCheckPassed) || checks[1].Conclusion != "success" { + t.Fatalf("CheckRun not normalized from conclusion: %+v", checks[1]) + } +} + +func TestFetchReviewThreadsStopsAtPageLimit(t *testing.T) { + fake := newFakeGH(t) + fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { + page := fake.callsTo(http.MethodPost, "/graphql") + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"repo": map[string]any{"pullRequest": map[string]any{ + "reviewDecision": "CHANGES_REQUESTED", + "reviewThreads": map[string]any{ + "nodes": []any{map[string]any{"id": "thread-" + strconv.Itoa(page), "path": "main.go", "line": page, "isResolved": false, "comments": map[string]any{"nodes": []any{}}}}, + "pageInfo": map[string]any{"hasNextPage": true, "endCursor": "cursor-" + strconv.Itoa(page)}, + }, + }}}, + }) + }) + p := newProviderForTest(t, fake) + review, err := p.FetchReviewThreads(ctx(), ports.SCMPRRef{Repo: ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "o", Name: "r", Repo: "o/r"}, Number: 1}) + if err != nil { + t.Fatalf("FetchReviewThreads: %v", err) + } + if got := fake.callsTo(http.MethodPost, "/graphql"); got != githubReviewThreadMaxPages { + t.Fatalf("graphql calls = %d, want capped at %d", got, githubReviewThreadMaxPages) + } + if len(review.Threads) != githubReviewThreadMaxPages { + t.Fatalf("threads = %d, want %d", len(review.Threads), githubReviewThreadMaxPages) + } +} From 452fb95e84a9c9ed66406e298ff12e4468479e5e Mon Sep 17 00:00:00 2001 From: whoisasx Date: Tue, 2 Jun 2026 12:39:24 +0530 Subject: [PATCH 05/20] docs: clarify scm observer fields --- .../adapters/scm/github/observer_provider.go | 4 + backend/internal/daemon/scm_wiring.go | 4 + backend/internal/observe/scm/observer.go | 57 +++-- backend/internal/observe/scm/observer_test.go | 4 + backend/internal/ports/scm_observations.go | 198 +++++++++++++----- .../migrations/0003_scm_observer_schema.sql | 2 + .../sqlite/queries/pr_review_threads.sql | 1 + 7 files changed, 194 insertions(+), 76 deletions(-) diff --git a/backend/internal/adapters/scm/github/observer_provider.go b/backend/internal/adapters/scm/github/observer_provider.go index ac7b225a..e95631be 100644 --- a/backend/internal/adapters/scm/github/observer_provider.go +++ b/backend/internal/adapters/scm/github/observer_provider.go @@ -1,5 +1,9 @@ package github +// This file contains the GitHub implementation of the provider-neutral SCM observer contract. +// It handles repository parsing, REST ETag guards, branch PR discovery, GraphQL +// batch PR reads, failed-check log tails, and review-thread pagination. + import ( "context" "crypto/sha256" diff --git a/backend/internal/daemon/scm_wiring.go b/backend/internal/daemon/scm_wiring.go index 6da2c75b..3b2b18df 100644 --- a/backend/internal/daemon/scm_wiring.go +++ b/backend/internal/daemon/scm_wiring.go @@ -1,5 +1,9 @@ package daemon +// This file wires the provider-neutral SCM observer into daemon startup using +// the GitHub provider for v1. It keeps provider setup non-blocking for readiness +// by resolving tokens lazily inside the background observer path. + import ( "context" "errors" diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go index 3d3d6b3c..353f7842 100644 --- a/backend/internal/observe/scm/observer.go +++ b/backend/internal/observe/scm/observer.go @@ -59,24 +59,37 @@ type Lifecycle interface { // Config holds optional observer knobs. Zero values use production defaults. type Config struct { - Tick time.Duration + // Tick is the fast PR/CI polling interval. Zero uses DefaultTickInterval. + Tick time.Duration + // ReviewInterval is the slower review-thread refresh interval. ReviewInterval time.Duration - Clock func() time.Time - Logger *slog.Logger - CacheMax int + // Clock supplies timestamps for observations and tests. Nil uses time.Now. + Clock func() time.Time + // Logger receives operational diagnostics for provider/store/lifecycle failures. + Logger *slog.Logger + // CacheMax bounds each in-memory ETag/review cache. Zero uses DefaultCacheMax. + CacheMax int } // ObserverCache stores provider ETags and review polling timestamps in memory. // It is intentionally non-persistent for v1; cold restarts simply revalidate. type ObserverCache struct { - RepoPRListETag map[string]string - CommitChecksETag map[string]string - ReviewETag map[string]string - LastReviewPollAt map[string]time.Time - repoOrder []string - commitOrder []string + // RepoPRListETag maps repository keys to the last open-PR-list ETag. + RepoPRListETag map[string]string + // CommitChecksETag maps repo+commit keys to the last check-runs ETag. + CommitChecksETag map[string]string + // ReviewETag is reserved for provider review-thread ETags when supported. + ReviewETag map[string]string + // LastReviewPollAt maps PR keys to the last review-thread fetch timestamp. + LastReviewPollAt map[string]time.Time + // repoOrder tracks FIFO eviction order for RepoPRListETag. + repoOrder []string + // commitOrder tracks FIFO eviction order for CommitChecksETag. + commitOrder []string + // lastReviewPollOrder tracks FIFO eviction order for LastReviewPollAt. lastReviewPollOrder []string - max int + // max is the maximum number of entries each cache map retains. + max int } func newCache(maxEntries int) ObserverCache { @@ -95,14 +108,22 @@ func newCache(maxEntries int) ObserverCache { // Observer coordinates provider polling, semantic diffing, persistence, and // lifecycle notifications for SCM observations. type Observer struct { - provider Provider - store Store - lifecycle Lifecycle - tick time.Duration + // provider is the SCM adapter used for all provider/network operations. + provider Provider + // store supplies sessions/projects/local PR state and receives transactional writes. + store Store + // lifecycle is notified after successful persistence of meaningful changes. + lifecycle Lifecycle + // tick is the active PR/CI polling cadence. + tick time.Duration + // reviewInterval is the minimum duration between review-thread fetches per PR. reviewInterval time.Duration - clock func() time.Time - logger *slog.Logger - Cache ObserverCache + // clock supplies observation timestamps. + clock func() time.Time + // logger receives non-fatal operational failures. + logger *slog.Logger + // Cache holds bounded in-memory provider ETags and review poll timestamps. + Cache ObserverCache } // New constructs an Observer with default cadence/cache settings for zero diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go index db498beb..c9806d82 100644 --- a/backend/internal/observe/scm/observer_test.go +++ b/backend/internal/observe/scm/observer_test.go @@ -1,5 +1,9 @@ package scm +// This file tests the SCM observer orchestration contract with fake provider, +// store, and lifecycle collaborators so ETag decisions, batching, log fetching, +// review cadence, semantic hashes, and notification behavior stay provider-neutral. + import ( "context" "errors" diff --git a/backend/internal/ports/scm_observations.go b/backend/internal/ports/scm_observations.go index 07d9e549..03ba0dc3 100644 --- a/backend/internal/ports/scm_observations.go +++ b/backend/internal/ports/scm_observations.go @@ -1,3 +1,7 @@ +// This file defines provider-neutral SCM DTOs used at the boundary between the +// SCM observer, persistence layer, and lifecycle manager. Provider adapters fill +// these structs with normalized facts so downstream code does not depend on raw +// GitHub payloads or GitHub-specific enum names. package ports import "time" @@ -6,24 +10,34 @@ import "time" // shape. Repo is conventionally "owner/name" for providers that expose an // owner namespace, while Owner/Name are kept split for provider calls. type SCMRepo struct { + // Provider is the normalized SCM provider name, e.g. "github". Provider string - Host string - Owner string - Name string - Repo string + // Host is the SCM host, e.g. "github.com" or a GitHub Enterprise host. + Host string + // Owner is the provider-specific namespace/organization/user. + Owner string + // Name is the repository name without the owner namespace. + Name string + // Repo is the display/stable full repository name, usually "owner/name". + Repo string } // SCMPRRef identifies a pull request within a provider-neutral repository. type SCMPRRef struct { - Repo SCMRepo + // Repo is the normalized repository that owns the pull request. + Repo SCMRepo + // Number is the provider's pull request number within the repository. Number int - URL string + // URL is the canonical browser URL when already known locally. + URL string } // SCMGuardResult is an ETag-style cache guard result. NotModified maps to HTTP // 304 for providers that support it. type SCMGuardResult struct { - ETag string + // ETag is the latest provider cache validator for this guard endpoint. + ETag string + // NotModified is true when the provider reported no change since the ETag. NotModified bool } @@ -31,110 +45,178 @@ type SCMGuardResult struct { // the SCM observer and consumed by lifecycle. Provider adapters normalize their // SCM-specific payloads into this DTO before the observer persists/notifies. type SCMObservation struct { - Fetched bool + // Fetched is true only when the provider refresh succeeded and the nested + // facts are authoritative for this poll. + Fetched bool + // ObservedAt is the observer timestamp for this normalized snapshot. ObservedAt time.Time + // Provider is the normalized SCM provider name, e.g. "github". Provider string - Host string - Repo string - - PR SCMPRObservation - CI SCMCIObservation - Review SCMReviewObservation + // Host is the SCM host that served this observation. + Host string + // Repo is the full repository name shown to AO users, usually "owner/name". + Repo string + + // PR contains pull-request metadata such as branches, title, state, and diff stats. + PR SCMPRObservation + // CI contains the rolled-up CI state, checks, failing fingerprint, and log tail. + CI SCMCIObservation + // Review contains review decision plus normalized review threads/comments. + Review SCMReviewObservation + // Mergeability contains AO's mergeability verdict and blockers. Mergeability SCMMergeabilityObservation + // Changed marks which semantic buckets changed compared with the DB snapshot. Changed SCMChanged } // SCMChanged marks which semantic state buckets changed in the successful poll. type SCMChanged struct { + // Metadata is true when PR metadata or mergeability facts changed. Metadata bool - CI bool - Review bool + // CI is true when check/CI facts or failure logs changed. + CI bool + // Review is true when review decision, threads, or comments changed. + Review bool } // SCMPRObservation carries provider-neutral PR metadata. type SCMPRObservation struct { - URL string - Number int - State string - Draft bool - Merged bool - Closed bool - SourceBranch string - TargetBranch string - HeadSHA string - Title string - Additions int - Deletions int - ChangedFiles int - Author string - BaseSHA string + // URL is the canonical PR URL used as the persistence key. + URL string + // Number is the provider's PR number in the repository. + Number int + // State is AO's normalized PR state: draft, open, merged, or closed. + State string + // Draft is true when the PR is marked draft/work-in-progress. + Draft bool + // Merged is true when the PR has been merged. + Merged bool + // Closed is true when the PR is closed without being merged. + Closed bool + // SourceBranch is the PR head/source branch name. + SourceBranch string + // TargetBranch is the PR base/target branch name. + TargetBranch string + // HeadSHA is the current head commit SHA for the PR. + HeadSHA string + // Title is the provider PR title. + Title string + // Additions is the provider-reported added line count. + Additions int + // Deletions is the provider-reported deleted line count. + Deletions int + // ChangedFiles is the provider-reported changed file count. + ChangedFiles int + // Author is the provider login/name of the PR author. + Author string + // BaseSHA is the current base branch SHA when the provider supplies it. + BaseSHA string + // MergeCommitSHA is the merge commit SHA when the PR has one. MergeCommitSHA string - ProviderState string - ProviderMergeable string + // ProviderState preserves the provider's raw PR state enum/string. + ProviderState string + // ProviderMergeable preserves the provider's raw mergeable enum/string. + ProviderMergeable string + // ProviderMergeStateStatus preserves provider-specific merge status detail. ProviderMergeStateStatus string - HTMLURL string + // HTMLURL is the provider browser URL; it usually matches URL. + HTMLURL string + // CreatedAtProvider is the provider's PR creation timestamp. CreatedAtProvider time.Time + // UpdatedAtProvider is the provider's last PR update timestamp. UpdatedAtProvider time.Time - MergedAtProvider time.Time - ClosedAtProvider time.Time + // MergedAtProvider is the provider's merge timestamp when merged. + MergedAtProvider time.Time + // ClosedAtProvider is the provider's close timestamp when closed. + ClosedAtProvider time.Time } // SCMCIObservation carries aggregate CI state plus failing-check details. type SCMCIObservation struct { - Summary string - HeadSHA string + // Summary is AO's normalized aggregate CI state: unknown, pending, passing, or failing. + Summary string + // HeadSHA is the commit SHA that the check data applies to. + HeadSHA string + // FailedFingerprint is a stable semantic signature of current failing checks. FailedFingerprint string - Checks []SCMCheckObservation - FailedChecks []SCMCheckObservation - FailureLogTail string + // Checks contains all normalized visible check/status contexts. + Checks []SCMCheckObservation + // FailedChecks contains only failing/cancelled checks that may need agent action. + FailedChecks []SCMCheckObservation + // FailureLogTail is the combined tail of newly fetched failed-check logs. + FailureLogTail string } // SCMCheckObservation is one normalized check/status context. ProviderID is an // optional provider-owned identifier (for GitHub, Actions job/check-run id) used // by the provider to fetch logs; consumers should not attach meaning to it. type SCMCheckObservation struct { - Name string - Status string + // Name is the check run name or commit status context name. + Name string + // Status is AO's normalized check status. + Status string + // Conclusion is the provider conclusion/state string preserved for detail. Conclusion string - URL string - LogTail string + // URL is a provider link to the check/status details. + URL string + // LogTail is the last 20 lines of a failed job log when fetched. + LogTail string + // ProviderID is an opaque provider id used for follow-up provider calls. ProviderID string } // SCMReviewObservation carries normalized review-decision and review-thread facts. type SCMReviewObservation struct { + // Decision is AO's normalized review decision. Decision string - Threads []SCMReviewThreadObservation + // Threads contains normalized review threads fetched on the slower review cadence. + Threads []SCMReviewThreadObservation } // SCMReviewThreadObservation is a normalized review thread with comments. type SCMReviewThreadObservation struct { - ID string - Path string - Line int + // ID is the provider's stable review thread identifier. + ID string + // Path is the file path the thread is anchored to. + Path string + // Line is the line number the thread is anchored to when supplied. + Line int + // Resolved is true when the provider marks the thread resolved. Resolved bool - IsBot bool + // IsBot is true when the thread's comments are all/primarily bot-authored. + IsBot bool + // Comments contains normalized comments in this review thread. Comments []SCMReviewCommentObservation } // SCMReviewCommentObservation is one normalized review comment. type SCMReviewCommentObservation struct { - ID string + // ID is the provider's stable review comment identifier. + ID string + // Author is the provider login/name of the commenter. Author string - Body string - URL string - IsBot bool + // Body is the review comment text. + Body string + // URL is a provider link to the comment. + URL string + // IsBot is true when the provider identifies the author as a bot. + IsBot bool } // SCMMergeabilityObservation is the normalized mergeability verdict. type SCMMergeabilityObservation struct { - State string - Mergeable bool - Conflict bool + // State is AO's normalized mergeability state. + State string + // Mergeable is true when the PR is currently mergeable. + Mergeable bool + // Conflict is true when merge conflicts block merging. + Conflict bool + // BehindBase is true when the head branch is behind the base branch. BehindBase bool - Blockers []string + // Blockers lists normalized reasons preventing merge. + Blockers []string } diff --git a/backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql b/backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql index 6aa72f08..ee57e8ba 100644 --- a/backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql +++ b/backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql @@ -1,3 +1,5 @@ +-- Summary: extend PR persistence for provider-neutral SCM observations, CI/check detail, +-- review-thread storage, and semantic hashes used by the SCM observer. -- +goose Up -- +goose StatementBegin ALTER TABLE pr ADD COLUMN provider TEXT NOT NULL DEFAULT ''; diff --git a/backend/internal/storage/sqlite/queries/pr_review_threads.sql b/backend/internal/storage/sqlite/queries/pr_review_threads.sql index c00ea491..19f54481 100644 --- a/backend/internal/storage/sqlite/queries/pr_review_threads.sql +++ b/backend/internal/storage/sqlite/queries/pr_review_threads.sql @@ -1,3 +1,4 @@ +-- Summary: SQLC queries for replacing and reading normalized PR review threads. -- name: UpsertPRReviewThread :exec INSERT INTO pr_review_threads (pr_url, thread_id, path, line, resolved, is_bot, semantic_hash, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?) From eee8aec56f68ff89135098f13eece8192e6afb32 Mon Sep 17 00:00:00 2001 From: whoisasx Date: Tue, 2 Jun 2026 12:53:44 +0530 Subject: [PATCH 06/20] fix: gate scm observer credentials lazily --- .../internal/adapters/scm/github/provider.go | 35 ++++++++++--- backend/internal/daemon/scm_wiring.go | 9 ++-- backend/internal/observe/scm/observer.go | 42 ++++++++++++++++ backend/internal/observe/scm/observer_test.go | 50 +++++++++++++++++-- 4 files changed, 120 insertions(+), 16 deletions(-) diff --git a/backend/internal/adapters/scm/github/provider.go b/backend/internal/adapters/scm/github/provider.go index 764199e9..86926aca 100644 --- a/backend/internal/adapters/scm/github/provider.go +++ b/backend/internal/adapters/scm/github/provider.go @@ -27,10 +27,13 @@ type ProviderOptions struct { Client *Client HTTPClient *http.Client Token TokenSource - RESTBase string - GraphQLURL string - UserAgent string - Logger *slog.Logger + // SkipTokenPreflight defers token validation until the first provider call. + // Daemon wiring uses this so gh-token shell-out never blocks readiness. + SkipTokenPreflight bool + RESTBase string + GraphQLURL string + UserAgent string + Logger *slog.Logger } // Provider observes one GitHub pull request and returns a normalized @@ -45,10 +48,11 @@ type Provider struct { // NewProvider returns a Provider. If opts.Client is supplied it is used // verbatim; otherwise a Client is built from the other options. When a // Token source is supplied it is exercised once so missing credentials -// surface at daemon startup rather than at first observation. Tests that -// want an unauthenticated fake pass opts.Client directly. +// surface at daemon startup rather than at first observation, unless +// SkipTokenPreflight is set. Tests that want an unauthenticated fake pass +// opts.Client directly. func NewProvider(opts ProviderOptions) (*Provider, error) { - if opts.Client == nil && opts.Token != nil { + if opts.Client == nil && opts.Token != nil && !opts.SkipTokenPreflight { if _, err := opts.Token.Token(context.Background()); err != nil { return nil, err } @@ -70,6 +74,23 @@ func NewProvider(opts ProviderOptions) (*Provider, error) { return &Provider{client: c, logger: logger}, nil } +// SCMCredentialsAvailable checks whether this provider can obtain a token. The +// SCM observer calls it lazily during the first poll that has SCM subjects, so +// daemon readiness is not blocked by shelling out to gh auth token and idle +// daemons do not warn about missing credentials. +func (p *Provider) SCMCredentialsAvailable(ctx context.Context) (bool, error) { + if p.client == nil || p.client.tokens == nil { + return true, nil + } + if _, err := p.client.tokens.Token(ctx); err != nil { + if errors.Is(err, ErrNoToken) { + return false, nil + } + return false, err + } + return true, nil +} + // Observe fetches the current state of one PR by its github.com URL and // returns a normalized ports.PRObservation. Any required network call // failing yields Fetched=false (caller must not infer "PR closed" from a diff --git a/backend/internal/daemon/scm_wiring.go b/backend/internal/daemon/scm_wiring.go index 3b2b18df..772bbd52 100644 --- a/backend/internal/daemon/scm_wiring.go +++ b/backend/internal/daemon/scm_wiring.go @@ -17,8 +17,8 @@ import ( // startSCMObserver wires the provider-neutral SCM observer with the GitHub // provider used by v1. Missing credentials do not fail daemon startup; the -// observer is skipped and a warning is logged so local/offline development keeps -// working. +// observer performs a lazy credential check in its background goroutine, logs +// one warning, and disables itself before any provider API calls. func startSCMObserver(ctx context.Context, store *sqlite.Store, lcm *lifecycle.Manager, logger *slog.Logger) <-chan struct{} { tokens := scmgithub.FallbackTokenSource{ scmgithub.EnvTokenSource{EnvVars: []string{"AO_GITHUB_TOKEN"}}, @@ -27,9 +27,8 @@ func startSCMObserver(ctx context.Context, store *sqlite.Store, lcm *lifecycle.M // Avoid token preflight on daemon startup. GHTokenSource may shell out to `gh`, // which is too slow/flaky for the startup readiness path (especially on // Windows CI). The provider will resolve credentials lazily in its background - // observer goroutine when it actually needs to call GitHub. - client := scmgithub.NewClient(scmgithub.ClientOptions{Token: tokens}) - provider, err := scmgithub.NewProvider(scmgithub.ProviderOptions{Client: client}) + // observer goroutine before it makes any GitHub API call. + provider, err := scmgithub.NewProvider(scmgithub.ProviderOptions{Token: tokens, SkipTokenPreflight: true}) if err != nil { if errors.Is(err, scmgithub.ErrNoToken) || errors.Is(err, scmgithub.ErrAuthFailed) { logger.Warn("scm observer disabled: no usable GitHub token", "err", err) diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go index 353f7842..1eba9fdf 100644 --- a/backend/internal/observe/scm/observer.go +++ b/backend/internal/observe/scm/observer.go @@ -57,6 +57,10 @@ type Lifecycle interface { ApplySCMObservation(ctx context.Context, sessionID domain.SessionID, obs ports.SCMObservation) error } +type credentialChecker interface { + SCMCredentialsAvailable(ctx context.Context) (bool, error) +} + // Config holds optional observer knobs. Zero values use production defaults. type Config struct { // Tick is the fast PR/CI polling interval. Zero uses DefaultTickInterval. @@ -122,6 +126,10 @@ type Observer struct { clock func() time.Time // logger receives non-fatal operational failures. logger *slog.Logger + // credentialsChecked records whether an optional provider credential gate ran. + credentialsChecked bool + // disabled is set after the credential gate reports unavailable credentials. + disabled bool // Cache holds bounded in-memory provider ETags and review poll timestamps. Cache ObserverCache } @@ -194,6 +202,9 @@ func (o *Observer) Poll(ctx context.Context) error { if err := ctx.Err(); err != nil { return err } + if o.disabled { + return nil + } subjects, err := o.discoverSubjects(ctx) if err != nil { return err @@ -201,6 +212,12 @@ func (o *Observer) Poll(ctx context.Context) error { if len(subjects) == 0 { return nil } + if err := o.checkCredentials(ctx); err != nil { + return err + } + if o.disabled { + return nil + } repoGuards := o.guardRepos(ctx, subjects) if err := ctx.Err(); err != nil { @@ -278,6 +295,31 @@ func (o *Observer) Poll(ctx context.Context) error { return nil } +func (o *Observer) checkCredentials(ctx context.Context) error { + if o.credentialsChecked { + return nil + } + if err := ctx.Err(); err != nil { + return err + } + o.credentialsChecked = true + checker, ok := o.provider.(credentialChecker) + if !ok { + return nil + } + available, err := checker.SCMCredentialsAvailable(ctx) + if err != nil { + o.disabled = true + o.logger.Warn("scm observer disabled: provider credentials unavailable", "err", err) + return nil + } + if !available { + o.disabled = true + o.logger.Warn("scm observer disabled: provider credentials unavailable") + } + return nil +} + func (o *Observer) discoverSubjects(ctx context.Context) (map[string]*subject, error) { sessions, err := o.store.ListAllSessions(ctx) if err != nil { diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go index c9806d82..4e17c6a4 100644 --- a/backend/internal/observe/scm/observer_test.go +++ b/backend/internal/observe/scm/observer_test.go @@ -97,16 +97,34 @@ type fakeProvider struct { reviews map[string]ports.SCMReviewObservation logTails map[string]string - detectCalls int - fetchBatches [][]ports.SCMPRRef - logCalls int - reviewCalls int + credentialGate bool + credentialOK bool + credentialErr error + credentialChecks int + repoGuardCalls int + detectCalls int + fetchBatches [][]ports.SCMPRRef + logCalls int + reviewCalls int +} + +func (p *fakeProvider) SCMCredentialsAvailable(context.Context) (bool, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.credentialChecks++ + if !p.credentialGate { + return true, nil + } + return p.credentialOK, p.credentialErr } func (p *fakeProvider) ParseRepository(remote string) (ports.SCMRepo, bool) { return testRepo, remote != "" } func (p *fakeProvider) RepoPRListGuard(_ context.Context, repo ports.SCMRepo, _ string) (ports.SCMGuardResult, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.repoGuardCalls++ return p.repoGuards[prKey(repo, 0)], nil } func (p *fakeProvider) DetectPRByBranch(_ context.Context, _ ports.SCMRepo, branch string) (ports.SCMPRObservation, error) { @@ -206,6 +224,30 @@ func TestStartAsyncPerformsImmediatePollAndStopsOnCancel(t *testing.T) { } } +func TestPoll_DisablesOnceWhenCredentialsUnavailable(t *testing.T) { + store := testStoreWithSession() + provider := &fakeProvider{ + credentialGate: true, + credentialOK: false, + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "v1"}}, + observations: map[string]ports.SCMObservation{}, + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.credentialChecks != 1 { + t.Fatalf("credential checks = %d, want one lazy check", provider.credentialChecks) + } + if provider.repoGuardCalls != 0 || provider.detectCalls != 0 || len(provider.fetchBatches) != 0 { + t.Fatalf("provider API calls should be skipped without credentials: guards=%d detects=%d batches=%d", + provider.repoGuardCalls, provider.detectCalls, len(provider.fetchBatches)) + } +} + func TestPoll_RepoETag304SkipsDetectPR(t *testing.T) { store := testStoreWithSession() provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "v1", NotModified: true}}, observations: map[string]ports.SCMObservation{}} From 40fdb9b8208877fb06eb204b92a2d4964ff1efc4 Mon Sep 17 00:00:00 2001 From: whoisasx Date: Tue, 2 Jun 2026 13:04:31 +0530 Subject: [PATCH 07/20] fix: preserve scm review threads in legacy pr writes --- .../internal/storage/sqlite/store/pr_store.go | 20 ++++++++++----- .../storage/sqlite/store/store_test.go | 25 +++++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/backend/internal/storage/sqlite/store/pr_store.go b/backend/internal/storage/sqlite/store/pr_store.go index c4fc572f..1519cb82 100644 --- a/backend/internal/storage/sqlite/store/pr_store.go +++ b/backend/internal/storage/sqlite/store/pr_store.go @@ -27,13 +27,15 @@ var ( _ ports.SCMWriter = (*Store)(nil) ) -// WritePR persists a full PR observation — scalar facts, check runs, and the +// WritePR persists a legacy PR observation — scalar facts, check runs, and the // replacement comment set — in one write transaction, so the rows and the // change_log events their triggers emit are committed all-or-nothing. The scalar // PR upsert runs first so the checks'/comments' CDC triggers can resolve the -// session id from the pr row within the same transaction. +// session id from the pr row within the same transaction. It intentionally does +// not touch pr_review_threads: those rows are owned by WriteSCMObservation's +// slower review-thread refresh path. func (s *Store) WritePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, comments []domain.PullRequestComment) error { - return s.writePR(ctx, pr, checks, nil, comments, true) + return s.writePR(ctx, pr, checks, nil, comments, false, true) } // WriteSCMObservation persists a provider-neutral SCM observation in one write @@ -41,10 +43,10 @@ func (s *Store) WritePR(ctx context.Context, pr domain.PullRequest, checks []dom // and comments are replaced only when replaceReview is true, because review // polling runs at a slower cadence than metadata/CI polling. func (s *Store) WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceReview bool) error { - return s.writePR(ctx, pr, checks, threads, comments, replaceReview) + return s.writePR(ctx, pr, checks, threads, comments, replaceReview, replaceReview) } -func (s *Store) writePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceReview bool) error { +func (s *Store) writePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceThreads, replaceComments bool) error { s.writeMu.Lock() defer s.writeMu.Unlock() return s.inTx(ctx, "write pr observation", func(q *gen.Queries) error { @@ -63,18 +65,24 @@ func (s *Store) writePR(ctx context.Context, pr domain.PullRequest, checks []dom return err } } - if replaceReview { + if replaceThreads { if err := q.DeletePRReviewThreads(ctx, pr.URL); err != nil { return err } + } + if replaceComments { if err := q.DeletePRComments(ctx, pr.URL); err != nil { return err } + } + if replaceThreads { for _, th := range threads { if err := q.UpsertPRReviewThread(ctx, genReviewThreadParams(pr.URL, th)); err != nil { return fmt.Errorf("review thread %q: %w", th.ThreadID, err) } } + } + if replaceComments { for _, c := range comments { if err := q.InsertPRComment(ctx, genCommentParams(pr.URL, c)); err != nil { return fmt.Errorf("comment %q: %w", c.ID, err) diff --git a/backend/internal/storage/sqlite/store/store_test.go b/backend/internal/storage/sqlite/store/store_test.go index 64491e40..b94dab02 100644 --- a/backend/internal/storage/sqlite/store/store_test.go +++ b/backend/internal/storage/sqlite/store/store_test.go @@ -263,6 +263,31 @@ func TestWriteSCMObservationPersistsMetadataChecksReviewsAndComments(t *testing. } } +func TestWritePRPreservesSCMReviewThreads(t *testing.T) { + s := newTestStore(t) + ctx := context.Background() + seedProject(t, s, "mer") + r, _ := s.CreateSession(ctx, sampleRecord("mer")) + now := time.Now().UTC().Truncate(time.Second) + pr := domain.PullRequest{URL: "https://github.com/o/r/pull/1", SessionID: r.ID, Number: 1, UpdatedAt: now} + threads := []domain.PullRequestReviewThread{{ThreadID: "t1", Path: "main.go", Line: 7, SemanticHash: "thread-v1", UpdatedAt: now}} + + if err := s.WriteSCMObservation(ctx, pr, nil, threads, nil, true); err != nil { + t.Fatal(err) + } + if err := s.WritePR(ctx, domain.PullRequest{URL: pr.URL, SessionID: r.ID, Number: 1, CI: domain.CIPassing, UpdatedAt: now.Add(time.Second)}, nil, nil); err != nil { + t.Fatal(err) + } + + gotThreads, err := s.ListPRReviewThreads(ctx, pr.URL) + if err != nil { + t.Fatal(err) + } + if len(gotThreads) != 1 || gotThreads[0].ThreadID != "t1" || gotThreads[0].SemanticHash != "thread-v1" { + t.Fatalf("legacy WritePR must preserve SCM-owned review threads, got %+v", gotThreads) + } +} + func TestCDCTriggersPopulateChangeLog(t *testing.T) { s := newTestStore(t) ctx := context.Background() From b3c10b060c8aacce4ce4d64707116790aa4e079a Mon Sep 17 00:00:00 2001 From: whoisasx Date: Tue, 2 Jun 2026 22:13:17 +0530 Subject: [PATCH 08/20] fix: harden scm observer state refresh --- .../adapters/scm/github/observer_provider.go | 27 +- .../internal/adapters/scm/github/provider.go | 5 +- .../adapters/scm/github/provider_test.go | 47 +++ backend/internal/observe/scm/observer.go | 390 +++++++++++++++--- backend/internal/observe/scm/observer_test.go | 219 +++++++++- 5 files changed, 633 insertions(+), 55 deletions(-) diff --git a/backend/internal/adapters/scm/github/observer_provider.go b/backend/internal/adapters/scm/github/observer_provider.go index e95631be..eb94986c 100644 --- a/backend/internal/adapters/scm/github/observer_provider.go +++ b/backend/internal/adapters/scm/github/observer_provider.go @@ -257,7 +257,7 @@ commits(last:1){ nodes{ commit{ oid statusCheckRollup{ state contexts(first:CONT func scmObservationFromGraphQL(ref ports.SCMPRRef, pr map[string]any) ports.SCMObservation { checks := scmChecksFromGraphQL(pr) failed := failedSCMChecks(checks) - ci := string(ciSummaryFromGraphQL(pr)) + ci := string(ciSummaryFromRollupState(pr)) review := string(reviewDecisionFromGraphQL(pr)) providerMergeable := str(pr["mergeable"]) providerMergeState := str(pr["mergeStateStatus"]) @@ -304,10 +304,18 @@ func scmObservationFromGraphQL(ref ports.SCMPRRef, pr map[string]any) ports.SCMO }, Review: ports.SCMReviewObservation{Decision: review}, } - obs.Mergeability = mergeabilityObservation(providerMergeable, providerMergeState, ci, review) + obs.Mergeability = mergeabilityObservation(providerMergeable, providerMergeState, ci, review, draft) return obs } +func ciSummaryFromRollupState(pr map[string]any) domain.CIState { + roll := statusRollup(pr) + if roll == nil { + return domain.CIUnknown + } + return mapRollupState(str(roll["state"])) +} + func scmChecksFromGraphQL(pr map[string]any) []ports.SCMCheckObservation { roll := statusRollup(pr) contexts, _ := roll["contexts"].(map[string]any) @@ -365,7 +373,7 @@ func githubFailedFingerprint(head string, checks []ports.SCMCheckObservation) st return hex.EncodeToString(sum[:]) } -func mergeabilityObservation(providerMergeable, providerMergeState, ci, review string) ports.SCMMergeabilityObservation { +func mergeabilityObservation(providerMergeable, providerMergeState, ci, review string, draft bool) ports.SCMMergeabilityObservation { state := strings.ToUpper(strings.TrimSpace(providerMergeState)) mergeable := strings.ToUpper(strings.TrimSpace(providerMergeable)) out := ports.SCMMergeabilityObservation{State: string(domain.MergeUnknown)} @@ -384,13 +392,21 @@ func mergeabilityObservation(providerMergeable, providerMergeState, ci, review s out.State = string(domain.MergeBlocked) addBlocker("blocked_by_provider") } + if draft { + out.State = string(domain.MergeBlocked) + addBlocker("draft") + } if ci == string(domain.CIFailing) { out.State = string(domain.MergeBlocked) addBlocker("ci_failing") } - if review == string(domain.ReviewChangesRequest) { + switch review { + case string(domain.ReviewChangesRequest): out.State = string(domain.MergeBlocked) addBlocker("changes_requested") + case string(domain.ReviewRequired): + out.State = string(domain.MergeBlocked) + addBlocker("review_required") } if out.State == string(domain.MergeBlocked) { return out @@ -399,7 +415,8 @@ func mergeabilityObservation(providerMergeable, providerMergeState, ci, review s out.State = string(domain.MergeUnstable) return out } - if mergeable == "MERGEABLE" && (state == "CLEAN" || state == "HAS_HOOKS" || state == "") { + if mergeable == "MERGEABLE" && (state == "CLEAN" || state == "HAS_HOOKS" || state == "") && + (review == "" || review == string(domain.ReviewApproved) || review == string(domain.ReviewNone)) && !draft { out.State = string(domain.MergeMergeable) out.Mergeable = true return out diff --git a/backend/internal/adapters/scm/github/provider.go b/backend/internal/adapters/scm/github/provider.go index 86926aca..f961cdaa 100644 --- a/backend/internal/adapters/scm/github/provider.go +++ b/backend/internal/adapters/scm/github/provider.go @@ -390,7 +390,10 @@ func mergeabilityFromGraphQL(pr map[string]any, rest restPull, ci domain.CIState return domain.MergeConflicting } - if review == domain.ReviewChangesRequest { + if rest.Draft || boolv(pr["isDraft"]) { + return domain.MergeBlocked + } + if review == domain.ReviewChangesRequest || review == domain.ReviewRequired { return domain.MergeBlocked } if ci == domain.CIFailing { diff --git a/backend/internal/adapters/scm/github/provider_test.go b/backend/internal/adapters/scm/github/provider_test.go index 9e5f221e..2502e71d 100644 --- a/backend/internal/adapters/scm/github/provider_test.go +++ b/backend/internal/adapters/scm/github/provider_test.go @@ -1144,6 +1144,53 @@ func TestSCMChecksFromGraphQL_StatusContextUsesState(t *testing.T) { } } +func TestSCMObservationUsesRollupStateWhenContextsPaginated(t *testing.T) { + fx := basePRFixture() + var pr map[string]any + fx.prData(func(m map[string]any) { + pr = m + commits := m["commits"].(map[string]any)["nodes"].([]any)[0].(map[string]any) + commit := commits["commit"].(map[string]any) + roll := commit["statusCheckRollup"].(map[string]any) + roll["state"] = "FAILURE" + ctxs := roll["contexts"].(map[string]any) + ctxs["nodes"] = []any{ + map[string]any{"__typename": "CheckRun", "name": "visible-pass", "status": "COMPLETED", "conclusion": "SUCCESS"}, + } + ctxs["pageInfo"] = map[string]any{"hasNextPage": true} + }) + obs := scmObservationFromGraphQL(ports.SCMPRRef{Repo: ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "octocat", Name: "hello", Repo: "octocat/hello"}, Number: 42}, pr) + if obs.CI.Summary != string(domain.CIFailing) { + t.Fatalf("observer CI summary = %q, want failing from aggregate rollup state", obs.CI.Summary) + } + if len(obs.CI.FailedChecks) != 0 { + t.Fatalf("failed checks should only contain visible failing contexts, got %#v", obs.CI.FailedChecks) + } +} + +func TestSCMMergeabilityBlocksReviewRequiredAndDraft(t *testing.T) { + cases := []struct { + name string + review string + draft bool + wantBlocker string + }{ + {name: "review required", review: string(domain.ReviewRequired), wantBlocker: "review_required"}, + {name: "draft", review: string(domain.ReviewApproved), draft: true, wantBlocker: "draft"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := mergeabilityObservation("MERGEABLE", "CLEAN", string(domain.CIPassing), tc.review, tc.draft) + if got.State != string(domain.MergeBlocked) || got.Mergeable { + t.Fatalf("mergeability = %+v, want blocked and not mergeable", got) + } + if !contains(got.Blockers, tc.wantBlocker) { + t.Fatalf("blockers = %v, want %q", got.Blockers, tc.wantBlocker) + } + }) + } +} + func TestFetchReviewThreadsStopsAtPageLimit(t *testing.T) { fake := newFakeGH(t) fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go index 1eba9fdf..7fd18a95 100644 --- a/backend/internal/observe/scm/observer.go +++ b/backend/internal/observe/scm/observer.go @@ -86,12 +86,17 @@ type ObserverCache struct { ReviewETag map[string]string // LastReviewPollAt maps PR keys to the last review-thread fetch timestamp. LastReviewPollAt map[string]time.Time + // ReviewRefreshFailed marks PRs whose review-thread refresh failed; the + // next poll retries regardless of the normal review cadence/status rules. + ReviewRefreshFailed map[string]bool // repoOrder tracks FIFO eviction order for RepoPRListETag. repoOrder []string // commitOrder tracks FIFO eviction order for CommitChecksETag. commitOrder []string // lastReviewPollOrder tracks FIFO eviction order for LastReviewPollAt. lastReviewPollOrder []string + // reviewFailedOrder tracks FIFO eviction order for ReviewRefreshFailed. + reviewFailedOrder []string // max is the maximum number of entries each cache map retains. max int } @@ -101,11 +106,12 @@ func newCache(maxEntries int) ObserverCache { maxEntries = DefaultCacheMax } return ObserverCache{ - RepoPRListETag: map[string]string{}, - CommitChecksETag: map[string]string{}, - ReviewETag: map[string]string{}, - LastReviewPollAt: map[string]time.Time{}, - max: maxEntries, + RepoPRListETag: map[string]string{}, + CommitChecksETag: map[string]string{}, + ReviewETag: map[string]string{}, + LastReviewPollAt: map[string]time.Time{}, + ReviewRefreshFailed: map[string]bool{}, + max: maxEntries, } } @@ -196,6 +202,25 @@ type repoGuardState struct { err error } +type pendingCacheString struct { + key string + value string +} + +type refreshSelection struct { + refs []ports.SCMPRRef + subjectsByPR map[string]*subject + commitETags map[string]pendingCacheString + candidateKeys map[string]bool +} + +type persistenceOptions struct { + reviewFetched bool + preserveLocalMetadata bool + preserveLocalCI bool + preserveLocalReview bool +} + // Poll runs one synchronous SCM observation cycle. func (o *Observer) Poll(ctx context.Context) error { now := o.clock().UTC() @@ -220,25 +245,40 @@ func (o *Observer) Poll(ctx context.Context) error { } repoGuards := o.guardRepos(ctx, subjects) + repoRefreshOK := pendingRepoRefreshes(repoGuards) + markRepoRefreshFailed := func(repo ports.SCMRepo) { + key := prKey(repo, 0) + if _, ok := repoRefreshOK[key]; ok { + repoRefreshOK[key] = false + } + } if err := ctx.Err(); err != nil { return err } - o.detectMissingPRs(ctx, subjects, repoGuards, now) + o.detectMissingPRs(ctx, subjects, repoGuards, now, markRepoRefreshFailed) if err := ctx.Err(); err != nil { return err } - refs, subjectsByPR := o.selectRefreshCandidates(ctx, subjects, repoGuards) + selection := o.selectRefreshCandidates(ctx, subjects, repoGuards, markRepoRefreshFailed) observations := map[string]ports.SCMObservation{} - for _, chunk := range chunks(refs, BatchSize) { + prRefreshOK := map[string]bool{} + for key := range selection.candidateKeys { + prRefreshOK[key] = false + } + for _, chunk := range chunks(selection.refs, BatchSize) { if err := ctx.Err(); err != nil { return err } batch, err := o.provider.FetchPullRequests(ctx, chunk) if err != nil { o.logger.Error("scm observer: GraphQL PR batch failed", "err", err) + for _, ref := range chunk { + markRepoRefreshFailed(ref.Repo) + } continue } + chunkSeen := map[string]bool{} for _, obs := range batch { obs.ObservedAt = now key := prKeyFromObs(obs) @@ -246,10 +286,17 @@ func (o *Observer) Poll(ctx context.Context) error { continue } observations[key] = obs + chunkSeen[key] = true + } + for _, ref := range chunk { + key := prKey(ref.Repo, ref.Number) + if !chunkSeen[key] { + markRepoRefreshFailed(ref.Repo) + } } } - for key, subj := range subjectsByPR { + for key, subj := range selection.subjectsByPR { if err := ctx.Err(); err != nil { return err } @@ -263,7 +310,9 @@ func (o *Observer) Poll(ctx context.Context) error { } reviewRefreshed := map[string]bool{} - o.refreshReviews(ctx, subjects, observations, subjectsByPR, reviewRefreshed, now) + localOnlyObservations := map[string]bool{} + reviewStale := map[string]bool{} + o.refreshReviews(ctx, subjects, observations, selection.subjectsByPR, reviewRefreshed, localOnlyObservations, reviewStale, now) if err := ctx.Err(); err != nil { return err } @@ -272,24 +321,54 @@ func (o *Observer) Poll(ctx context.Context) error { if err := ctx.Err(); err != nil { return err } - subj, ok := subjectsByPR[key] + subj, ok := selection.subjectsByPR[key] if !ok { continue } local := subj.known - prepared := o.prepareForPersistence(obs, local, reviewRefreshed[key], now) + opts := persistenceOptions{ + reviewFetched: reviewRefreshed[key], + preserveLocalMetadata: localOnlyObservations[key], + preserveLocalCI: localOnlyObservations[key], + preserveLocalReview: reviewStale[key], + } + prepared := o.prepareForPersistence(obs, local, opts, now) if !prepared.Changed.Metadata && !prepared.Changed.CI && !prepared.Changed.Review { + prRefreshOK[key] = true continue } - pr, checks, threads, comments := domainFromObservation(subj.session.ID, prepared, local, reviewRefreshed[key], now) + if o.lifecycle != nil { + if err := o.lifecycle.ApplySCMObservation(ctx, subj.session.ID, prepared); err != nil { + o.logger.Error("scm observer: lifecycle notification failed", "session", subj.session.ID, "pr", firstNonEmpty(prepared.PR.URL, prepared.PR.HTMLURL, local.URL), "err", err) + markRepoRefreshFailed(subj.repo) + continue + } + } + pr, checks, threads, comments := domainFromObservation(subj.session.ID, prepared, local, opts, now) if err := o.store.WriteSCMObservation(ctx, pr, checks, threads, comments, reviewRefreshed[key]); err != nil { o.logger.Error("scm observer: DB write failed", "session", subj.session.ID, "pr", pr.URL, "err", err) + markRepoRefreshFailed(subj.repo) continue } - if o.lifecycle != nil { - if err := o.lifecycle.ApplySCMObservation(ctx, subj.session.ID, prepared); err != nil { - o.logger.Error("scm observer: lifecycle notification failed", "session", subj.session.ID, "pr", pr.URL, "err", err) - } + prRefreshOK[key] = true + } + for key, ok := range prRefreshOK { + if !ok { + continue + } + if pending, found := selection.commitETags[key]; found { + o.cacheSetString(o.Cache.CommitChecksETag, &o.Cache.commitOrder, pending.key, pending.value) + } + if reviewRefreshed[key] { + o.cacheSetTime(o.Cache.LastReviewPollAt, &o.Cache.lastReviewPollOrder, key, now) + } + } + for key, ok := range repoRefreshOK { + if !ok { + continue + } + if etag := repoGuards[key].result.ETag; etag != "" { + o.cacheSetString(o.Cache.RepoPRListETag, &o.Cache.repoOrder, key, etag) } } return nil @@ -359,7 +438,12 @@ func (o *Observer) discoverSubjects(ctx context.Context) (map[string]*subject, e known, hasPR := chooseKnownPR(prs) s := &subject{session: sess, project: proj, repo: repo, branch: branch, known: known, hasPR: hasPR} if hasPR && known.Number > 0 { - out[prKey(repo, known.Number)] = s + key := prKey(repo, known.Number) + if existing, ok := out[key]; ok { + o.logger.Warn("scm observer: duplicate tracked PR ownership skipped", "pr", key, "kept_session", existing.session.ID, "skipped_session", sess.ID) + continue + } + out[key] = s } else { out["session:"+string(sess.ID)] = s } @@ -398,15 +482,22 @@ func (o *Observer) guardRepos(ctx context.Context, subjects map[string]*subject) out[key] = repoGuardState{hadETag: had, err: err} continue } - if res.ETag != "" { - o.cacheSetString(o.Cache.RepoPRListETag, &o.Cache.repoOrder, key, res.ETag) - } out[key] = repoGuardState{result: res, hadETag: had} } return out } -func (o *Observer) detectMissingPRs(ctx context.Context, subjects map[string]*subject, guards map[string]repoGuardState, now time.Time) { +func pendingRepoRefreshes(guards map[string]repoGuardState) map[string]bool { + out := map[string]bool{} + for key, g := range guards { + if g.err == nil && g.result.ETag != "" { + out[key] = true + } + } + return out +} + +func (o *Observer) detectMissingPRs(ctx context.Context, subjects map[string]*subject, guards map[string]repoGuardState, now time.Time, markRepoFailed func(ports.SCMRepo)) { for oldKey, s := range subjects { if s.hasPR { continue @@ -421,29 +512,40 @@ func (o *Observer) detectMissingPRs(ctx context.Context, subjects map[string]*su pr, err := o.provider.DetectPRByBranch(ctx, s.repo, s.branch) if err != nil { o.logger.Debug("scm observer: no PR detected for branch", "session", s.session.ID, "branch", s.branch, "err", err) + if markRepoFailed != nil { + markRepoFailed(s.repo) + } continue } if pr.Number <= 0 { continue } + newKey := prKey(s.repo, pr.Number) + if existing, ok := subjects[newKey]; ok && existing != s { + o.logger.Warn("scm observer: detected PR is already tracked by another session", "pr", newKey, "kept_session", existing.session.ID, "skipped_session", s.session.ID) + continue + } s.known = domain.PullRequest{URL: pr.URL, SessionID: s.session.ID, Number: pr.Number, SourceBranch: pr.SourceBranch, TargetBranch: pr.TargetBranch, HeadSHA: pr.HeadSHA, Provider: s.repo.Provider, Host: s.repo.Host, Repo: repoFullName(s.repo), UpdatedAt: now} s.hasPR = true s.newlyDetected = true delete(subjects, oldKey) - subjects[prKey(s.repo, pr.Number)] = s + subjects[newKey] = s } } -func (o *Observer) selectRefreshCandidates(ctx context.Context, subjects map[string]*subject, guards map[string]repoGuardState) ([]ports.SCMPRRef, map[string]*subject) { - var refs []ports.SCMPRRef - subjectsByPR := map[string]*subject{} +func (o *Observer) selectRefreshCandidates(ctx context.Context, subjects map[string]*subject, guards map[string]repoGuardState, markRepoFailed func(ports.SCMRepo)) refreshSelection { + selection := refreshSelection{ + subjectsByPR: map[string]*subject{}, + commitETags: map[string]pendingCacheString{}, + candidateKeys: map[string]bool{}, + } seen := map[string]bool{} for _, s := range subjects { if !s.hasPR || s.known.Number <= 0 { continue } key := prKey(s.repo, s.known.Number) - subjectsByPR[key] = s + selection.subjectsByPR[key] = s candidate := s.newlyDetected || missingLocalState(s.known) g := guards[prKey(s.repo, 0)] if g.err == nil && !g.result.NotModified { @@ -455,21 +557,23 @@ func (o *Observer) selectRefreshCandidates(ctx context.Context, subjects map[str res, err := o.provider.CommitChecksGuard(ctx, s.repo, s.known.HeadSHA, prev) if err != nil { o.logger.Error("scm observer: commit check-runs guard failed", "pr", s.known.URL, "sha", s.known.HeadSHA, "err", err) - } else { - if res.ETag != "" { - o.cacheSetString(o.Cache.CommitChecksETag, &o.Cache.commitOrder, commitKey, res.ETag) + if markRepoFailed != nil { + markRepoFailed(s.repo) } - if !res.NotModified { - candidate = true + } else if !res.NotModified { + candidate = true + if res.ETag != "" { + selection.commitETags[key] = pendingCacheString{key: commitKey, value: res.ETag} } } } if candidate && !seen[key] { - refs = append(refs, ports.SCMPRRef{Repo: s.repo, Number: s.known.Number, URL: s.known.URL}) + selection.refs = append(selection.refs, ports.SCMPRRef{Repo: s.repo, Number: s.known.Number, URL: s.known.URL}) + selection.candidateKeys[key] = true seen[key] = true } } - return refs, subjectsByPR + return selection } func missingLocalState(pr domain.PullRequest) bool { @@ -508,6 +612,9 @@ func (o *Observer) enrichFailureLogs(ctx context.Context, obs *ports.SCMObservat func applyStoredFailedLogTails(obs *ports.SCMObservation, checks []domain.PullRequestCheck) bool { tailsByName := map[string]string{} for _, ch := range checks { + if obs.CI.HeadSHA != "" && ch.CommitHash != "" && ch.CommitHash != obs.CI.HeadSHA { + continue + } if ch.LogTail != "" && (ch.Status == domain.PRCheckFailed || ch.Status == domain.PRCheckCancelled) { tailsByName[ch.Name] = ch.LogTail } @@ -533,7 +640,7 @@ func applyStoredFailedLogTails(obs *ports.SCMObservation, checks []domain.PullRe return true } -func (o *Observer) refreshReviews(ctx context.Context, subjects map[string]*subject, observations map[string]ports.SCMObservation, subjectsByPR map[string]*subject, reviewRefreshed map[string]bool, now time.Time) { +func (o *Observer) refreshReviews(ctx context.Context, subjects map[string]*subject, observations map[string]ports.SCMObservation, subjectsByPR map[string]*subject, reviewRefreshed, localOnlyObservations, reviewStale map[string]bool, now time.Time) { for _, s := range subjects { if !s.hasPR || s.known.Number <= 0 { continue @@ -550,10 +657,23 @@ func (o *Observer) refreshReviews(ctx context.Context, subjects map[string]*subj review, err := o.provider.FetchReviewThreads(ctx, ports.SCMPRRef{Repo: s.repo, Number: s.known.Number, URL: s.known.URL}) if err != nil { o.logger.Error("scm observer: review refresh failed", "pr", s.known.URL, "err", err) + o.cacheSetBool(o.Cache.ReviewRefreshFailed, &o.Cache.reviewFailedOrder, pkey, true) + if hasObs { + obs.Review.Decision = string(s.known.Review) + obs.Review.Threads = nil + observations[pkey] = obs + subjectsByPR[pkey] = s + reviewStale[pkey] = true + } continue } if !hasObs { - obs = observationFromLocal(s.repo, s.known) + checks, err := o.store.ListChecks(ctx, s.known.URL) + if err != nil { + o.logger.Error("scm observer: list local checks for review-only refresh failed", "pr", s.known.URL, "err", err) + } + obs = observationFromLocal(s.repo, s.known, checks) + localOnlyObservations[pkey] = true } if review.Decision != "" { obs.Review.Decision = review.Decision @@ -563,11 +683,16 @@ func (o *Observer) refreshReviews(ctx context.Context, subjects map[string]*subj observations[pkey] = obs subjectsByPR[pkey] = s reviewRefreshed[pkey] = true - o.cacheSetTime(o.Cache.LastReviewPollAt, &o.Cache.lastReviewPollOrder, pkey, now) + if _, ok := o.Cache.ReviewRefreshFailed[pkey]; ok { + o.Cache.ReviewRefreshFailed[pkey] = false + } } } func (o *Observer) needsReviewRefresh(key string, local domain.PullRequest, decision string, hasObs bool, now time.Time) bool { + if o.Cache.ReviewRefreshFailed[key] { + return true + } if local.ReviewHash == "" { return true } @@ -584,11 +709,17 @@ func (o *Observer) needsReviewRefresh(key string, local domain.PullRequest, deci return false } -func (o *Observer) prepareForPersistence(obs ports.SCMObservation, local domain.PullRequest, reviewFetched bool, now time.Time) ports.SCMObservation { +func (o *Observer) prepareForPersistence(obs ports.SCMObservation, local domain.PullRequest, opts persistenceOptions, now time.Time) ports.SCMObservation { metadataHash := metadataSemanticHash(obs) + if opts.preserveLocalMetadata { + metadataHash = local.MetadataHash + } ciHash := ciSemanticHash(obs.CI) + if opts.preserveLocalCI { + ciHash = local.CIHash + } reviewHash := local.ReviewHash - if reviewFetched || local.ReviewHash == "" || obs.Review.Decision != string(local.Review) { + if !opts.preserveLocalReview && (opts.reviewFetched || local.ReviewHash == "" || obs.Review.Decision != string(local.Review)) { reviewHash = reviewSemanticHash(obs.Review) } obs.Changed = ports.SCMChanged{ @@ -601,16 +732,33 @@ func (o *Observer) prepareForPersistence(obs ports.SCMObservation, local domain. return obs } -func domainFromObservation(sessionID domain.SessionID, obs ports.SCMObservation, local domain.PullRequest, reviewFetched bool, now time.Time) (domain.PullRequest, []domain.PullRequestCheck, []domain.PullRequestReviewThread, []domain.PullRequestComment) { +func domainFromObservation(sessionID domain.SessionID, obs ports.SCMObservation, local domain.PullRequest, opts persistenceOptions, now time.Time) (domain.PullRequest, []domain.PullRequestCheck, []domain.PullRequestReviewThread, []domain.PullRequestComment) { metadataHash := metadataSemanticHash(obs) + if opts.preserveLocalMetadata { + metadataHash = local.MetadataHash + } ciHash := ciSemanticHash(obs.CI) + if opts.preserveLocalCI { + ciHash = local.CIHash + } reviewHash := reviewSemanticHash(obs.Review) reviewDecision := domain.ReviewDecision(firstNonEmpty(obs.Review.Decision, string(domain.ReviewNone))) - if !reviewFetched && local.ReviewHash != "" && reviewDecision == local.Review { + if opts.preserveLocalReview { + reviewDecision = local.Review + reviewHash = local.ReviewHash + } else if !opts.reviewFetched && local.ReviewHash != "" && reviewDecision == local.Review { reviewHash = local.ReviewHash } + observedAt := obs.ObservedAt + if !obs.Changed.Metadata && !obs.Changed.CI && !local.ObservedAt.IsZero() { + observedAt = local.ObservedAt + } + ciObservedAt := local.CIObservedAt + if obs.Changed.CI || ciObservedAt.IsZero() { + ciObservedAt = obs.ObservedAt + } reviewObservedAt := local.ReviewObservedAt - if reviewFetched || reviewObservedAt.IsZero() { + if opts.reviewFetched || reviewObservedAt.IsZero() { reviewObservedAt = obs.ObservedAt } pr := domain.PullRequest{ @@ -648,8 +796,8 @@ func domainFromObservation(sessionID domain.SessionID, obs ports.SCMObservation, MetadataHash: metadataHash, CIHash: ciHash, ReviewHash: reviewHash, - ObservedAt: obs.ObservedAt, - CIObservedAt: obs.ObservedAt, + ObservedAt: observedAt, + CIObservedAt: ciObservedAt, ReviewObservedAt: reviewObservedAt, } checks := make([]domain.PullRequestCheck, 0, len(obs.CI.Checks)) @@ -671,17 +819,153 @@ func domainFromObservation(sessionID domain.SessionID, obs ports.SCMObservation, return pr, checks, threads, comments } -func observationFromLocal(repo ports.SCMRepo, pr domain.PullRequest) ports.SCMObservation { +func observationFromLocal(repo ports.SCMRepo, pr domain.PullRequest, checks []domain.PullRequestCheck) ports.SCMObservation { return ports.SCMObservation{ Fetched: true, Provider: firstNonEmpty(pr.Provider, repo.Provider), Host: firstNonEmpty(pr.Host, repo.Host), Repo: firstNonEmpty(pr.Repo, repoFullName(repo)), PR: ports.SCMPRObservation{URL: pr.URL, Number: pr.Number, State: normalizePRState(pr.Draft, pr.Merged, pr.Closed), Draft: pr.Draft, Merged: pr.Merged, Closed: pr.Closed, SourceBranch: pr.SourceBranch, TargetBranch: pr.TargetBranch, HeadSHA: pr.HeadSHA, Title: pr.Title, Additions: pr.Additions, Deletions: pr.Deletions, ChangedFiles: pr.ChangedFiles, Author: pr.Author, BaseSHA: pr.BaseSHA, MergeCommitSHA: pr.MergeCommitSHA, ProviderState: pr.ProviderState, ProviderMergeable: pr.ProviderMergeable, ProviderMergeStateStatus: pr.ProviderMergeStateStatus, HTMLURL: pr.HTMLURL, CreatedAtProvider: pr.CreatedAtProvider, UpdatedAtProvider: pr.UpdatedAtProvider, MergedAtProvider: pr.MergedAtProvider, ClosedAtProvider: pr.ClosedAtProvider}, - CI: ports.SCMCIObservation{Summary: string(pr.CI), HeadSHA: pr.HeadSHA}, + CI: ciObservationFromLocal(pr, checks), Review: ports.SCMReviewObservation{Decision: string(pr.Review)}, - Mergeability: ports.SCMMergeabilityObservation{State: string(pr.Mergeability)}, + Mergeability: mergeabilityObservationFromLocal(pr), + } +} + +func ciObservationFromLocal(pr domain.PullRequest, checks []domain.PullRequestCheck) ports.SCMCIObservation { + ci := ports.SCMCIObservation{ + Summary: firstNonEmpty(string(pr.CI), string(domain.CIUnknown)), + HeadSHA: pr.HeadSHA, + FailedFingerprint: failedFingerprintFromCIHash(pr.CIHash), + } + tails := []string{} + for _, ch := range checks { + if pr.HeadSHA != "" && ch.CommitHash != "" && ch.CommitHash != pr.HeadSHA { + continue + } + if ci.HeadSHA == "" { + ci.HeadSHA = ch.CommitHash + } + check := ports.SCMCheckObservation{ + Name: ch.Name, + Status: string(ch.Status), + Conclusion: ch.Conclusion, + URL: ch.URL, + LogTail: ch.LogTail, + ProviderID: ch.Details, + } + ci.Checks = append(ci.Checks, check) + if ch.Status == domain.PRCheckFailed || ch.Status == domain.PRCheckCancelled { + ci.FailedChecks = append(ci.FailedChecks, check) + if ch.LogTail != "" { + tails = append(tails, ch.LogTail) + } + } } + ci.FailureLogTail = strings.Join(tails, "\n---\n") + return ci +} + +func failedFingerprintFromCIHash(hash string) string { + before, _, ok := strings.Cut(hash, ":") + if !ok { + return "" + } + return before +} + +func mergeabilityObservationFromLocal(pr domain.PullRequest) ports.SCMMergeabilityObservation { + out := mergeabilityFromProviderFacts(pr.ProviderMergeable, pr.ProviderMergeStateStatus, string(pr.CI), string(pr.Review), pr.Draft) + if pr.Mergeability != "" && out.State != string(pr.Mergeability) { + out = ports.SCMMergeabilityObservation{State: string(pr.Mergeability)} + } else if pr.Mergeability != "" { + out.State = string(pr.Mergeability) + } + switch domain.Mergeability(out.State) { + case domain.MergeMergeable: + out.Mergeable = true + case domain.MergeConflicting: + out.Conflict = true + if len(out.Blockers) == 0 { + out.Blockers = append(out.Blockers, "conflicts") + } + case domain.MergeBlocked: + if len(out.Blockers) == 0 { + out.Blockers = mergeBlockersFromLocal(pr) + } + } + return out +} + +func mergeBlockersFromLocal(pr domain.PullRequest) []string { + blockers := []string{} + if pr.Draft { + blockers = append(blockers, "draft") + } + if pr.CI == domain.CIFailing { + blockers = append(blockers, "ci_failing") + } + switch pr.Review { + case domain.ReviewChangesRequest: + blockers = append(blockers, "changes_requested") + case domain.ReviewRequired: + blockers = append(blockers, "review_required") + } + if len(blockers) == 0 { + blockers = append(blockers, "blocked_by_provider") + } + return blockers +} + +func mergeabilityFromProviderFacts(providerMergeable, providerMergeState, ci, review string, draft bool) ports.SCMMergeabilityObservation { + state := strings.ToUpper(strings.TrimSpace(providerMergeState)) + mergeable := strings.ToUpper(strings.TrimSpace(providerMergeable)) + out := ports.SCMMergeabilityObservation{State: string(domain.MergeUnknown)} + addBlocker := func(b string) { out.Blockers = append(out.Blockers, b) } + if state == "DIRTY" || mergeable == "CONFLICTING" { + out.State = string(domain.MergeConflicting) + out.Conflict = true + addBlocker("conflicts") + return out + } + if state == "BEHIND" || state == "BEHIND_BASE" { + out.BehindBase = true + addBlocker("behind_base") + } + if state == "BLOCKED" { + out.State = string(domain.MergeBlocked) + addBlocker("blocked_by_provider") + } + if draft { + out.State = string(domain.MergeBlocked) + addBlocker("draft") + } + if ci == string(domain.CIFailing) { + out.State = string(domain.MergeBlocked) + addBlocker("ci_failing") + } + switch review { + case string(domain.ReviewChangesRequest): + out.State = string(domain.MergeBlocked) + addBlocker("changes_requested") + case string(domain.ReviewRequired): + out.State = string(domain.MergeBlocked) + addBlocker("review_required") + } + if out.State == string(domain.MergeBlocked) { + return out + } + if state == "UNSTABLE" { + out.State = string(domain.MergeUnstable) + return out + } + if mergeable == "MERGEABLE" && (state == "CLEAN" || state == "HAS_HOOKS" || state == "") && + (review == "" || review == string(domain.ReviewApproved) || review == string(domain.ReviewNone)) && !draft { + out.State = string(domain.MergeMergeable) + out.Mergeable = true + return out + } + return out } func chunks[T any](in []T, n int) [][]T { @@ -824,6 +1108,18 @@ func (o *Observer) cacheSetTime(m map[string]time.Time, order *[]string, key str } } +func (o *Observer) cacheSetBool(m map[string]bool, order *[]string, key string, value bool) { + if _, ok := m[key]; !ok { + *order = append(*order, key) + } + m[key] = value + for len(*order) > o.Cache.max { + evict := (*order)[0] + *order = (*order)[1:] + delete(m, evict) + } +} + func (o *Observer) evictStrings(m map[string]string, order *[]string) { for len(*order) > o.Cache.max { evict := (*order)[0] diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go index 4e17c6a4..01759176 100644 --- a/backend/internal/observe/scm/observer_test.go +++ b/backend/internal/observe/scm/observer_test.go @@ -27,6 +27,7 @@ type fakeStore struct { projects map[string]domain.ProjectRecord prs map[domain.SessionID][]domain.PullRequest checks map[string][]domain.PullRequestCheck + writeErr error writes []fakeWrite @@ -84,6 +85,9 @@ func (s *fakeStore) ListChecks(_ context.Context, prURL string) ([]domain.PullRe func (s *fakeStore) WriteSCMObservation(_ context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceReview bool) error { s.mu.Lock() defer s.mu.Unlock() + if s.writeErr != nil { + return s.writeErr + } s.writes = append(s.writes, fakeWrite{pr: pr, checks: append([]domain.PullRequestCheck(nil), checks...), threads: append([]domain.PullRequestReviewThread(nil), threads...), comments: append([]domain.PullRequestComment(nil), comments...), replaceReview: replaceReview}) return nil } @@ -96,6 +100,8 @@ type fakeProvider struct { observations map[string]ports.SCMObservation reviews map[string]ports.SCMReviewObservation logTails map[string]string + fetchErr error + reviewErr error credentialGate bool credentialOK bool @@ -144,6 +150,9 @@ func (p *fakeProvider) FetchPullRequests(_ context.Context, refs []ports.SCMPRRe p.mu.Lock() defer p.mu.Unlock() p.fetchBatches = append(p.fetchBatches, append([]ports.SCMPRRef(nil), refs...)) + if p.fetchErr != nil { + return nil, p.fetchErr + } out := make([]ports.SCMObservation, 0, len(refs)) for _, ref := range refs { if obs, ok := p.observations[prKey(ref.Repo, ref.Number)]; ok { @@ -162,12 +171,21 @@ func (p *fakeProvider) FetchReviewThreads(_ context.Context, ref ports.SCMPRRef) p.mu.Lock() defer p.mu.Unlock() p.reviewCalls++ + if p.reviewErr != nil { + return ports.SCMReviewObservation{}, p.reviewErr + } return p.reviews[prKey(ref.Repo, ref.Number)], nil } -type fakeLifecycle struct{ observed []ports.SCMObservation } +type fakeLifecycle struct { + observed []ports.SCMObservation + err error +} func (l *fakeLifecycle) ApplySCMObservation(_ context.Context, _ domain.SessionID, obs ports.SCMObservation) error { + if l.err != nil { + return l.err + } l.observed = append(l.observed, obs) return nil } @@ -199,7 +217,7 @@ func testObs(num int) ports.SCMObservation { func knownPR(num int) domain.PullRequest { obs := testObs(num) - pr, _, _, _ := domainFromObservation("p-1", obs, domain.PullRequest{}, false, time.Unix(1, 0).UTC()) + pr, _, _, _ := domainFromObservation("p-1", obs, domain.PullRequest{}, persistenceOptions{}, time.Unix(1, 0).UTC()) return pr } @@ -432,3 +450,200 @@ func TestPoll_ReviewHashDrivesPersistenceAndLifecycle(t *testing.T) { t.Fatalf("review change not notified: %#v", lc.observed) } } + +func TestPoll_ReviewOnlyRefreshPreservesLocalCIAndMetadata(t *testing.T) { + store := testStoreWithSession() + localObs := testObs(1) + local := knownPR(1) + local.CI = domain.CIPassing + local.Review = domain.ReviewChangesRequest + local.ReviewHash = "old-review" + local.MetadataHash = metadataSemanticHash(localObs) + local.CIHash = ciSemanticHash(localObs.CI) + local.ObservedAt = time.Unix(10, 0).UTC() + local.CIObservedAt = time.Unix(11, 0).UTC() + local.ReviewObservedAt = time.Unix(12, 0).UTC() + store.prs["p-1"] = []domain.PullRequest{local} + store.checks[local.URL] = []domain.PullRequestCheck{ + {Name: "build", CommitHash: "sha1", Status: domain.PRCheckPassed, Conclusion: "success", URL: "ci"}, + {Name: "stale", CommitHash: "old-sha", Status: domain.PRCheckFailed, Conclusion: "failure", URL: "old-ci", LogTail: "old tail"}, + } + review := ports.SCMReviewObservation{ + Decision: string(domain.ReviewChangesRequest), + Threads: []ports.SCMReviewThreadObservation{{ID: "t1", Path: "f.go", Line: 2, Comments: []ports.SCMReviewCommentObservation{{ID: "c1", Author: "ann", Body: "fix"}}}}, + } + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, + reviews: map[string]ports.SCMReviewObservation{prKey(testRepo, 1): review}, + } + now := time.Unix(200, 0).UTC() + obs := newTestObserver(store, provider, &fakeLifecycle{}, now) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(store.writes) != 1 { + t.Fatalf("writes = %d, want review-only write", len(store.writes)) + } + write := store.writes[0] + if write.pr.MetadataHash != local.MetadataHash { + t.Fatalf("metadata hash changed on review-only refresh: got %q want %q", write.pr.MetadataHash, local.MetadataHash) + } + if write.pr.CIHash != local.CIHash { + t.Fatalf("CI hash changed on review-only refresh: got %q want %q", write.pr.CIHash, local.CIHash) + } + if !write.pr.ObservedAt.Equal(local.ObservedAt) { + t.Fatalf("ObservedAt changed on review-only refresh: got %s want %s", write.pr.ObservedAt, local.ObservedAt) + } + if !write.pr.CIObservedAt.Equal(local.CIObservedAt) { + t.Fatalf("CIObservedAt changed on review-only refresh: got %s want %s", write.pr.CIObservedAt, local.CIObservedAt) + } + if !write.pr.ReviewObservedAt.Equal(now) { + t.Fatalf("ReviewObservedAt = %s, want %s", write.pr.ReviewObservedAt, now) + } + if len(write.checks) != 1 || write.checks[0].Name != "build" { + t.Fatalf("review-only local reconstruction should include current-head checks only: %#v", write.checks) + } +} + +func TestPoll_ReviewFetchFailureDoesNotUpdateReviewDecision(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + local.Review = domain.ReviewChangesRequest + local.ReviewHash = reviewSemanticHash(ports.SCMReviewObservation{Decision: string(domain.ReviewChangesRequest), Threads: []ports.SCMReviewThreadObservation{{ID: "old", Comments: []ports.SCMReviewCommentObservation{{ID: "c-old", Body: "old"}}}}}) + obsValue := testObs(1) + obsValue.Review.Decision = string(domain.ReviewApproved) + local.MetadataHash = metadataSemanticHash(obsValue) + local.CIHash = ciSemanticHash(obsValue.CI) + store.prs["p-1"] = []domain.PullRequest{local} + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo2"}}, + observations: map[string]ports.SCMObservation{prKey(testRepo, 1): obsValue}, + reviewErr: errors.New("review API down"), + } + lc := &fakeLifecycle{} + obs := newTestObserver(store, provider, lc, time.Unix(300, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.reviewCalls != 1 { + t.Fatalf("reviewCalls = %d, want 1", provider.reviewCalls) + } + if len(store.writes) != 0 || len(lc.observed) != 0 { + t.Fatalf("review fetch failure should not persist/notify stale review data: writes=%#v lifecycle=%#v", store.writes, lc.observed) + } + if !obs.Cache.ReviewRefreshFailed[prKey(testRepo, 1)] { + t.Fatalf("review fetch failure was not marked for retry") + } +} + +func TestPoll_DoesNotCommitCommitETagWhenFetchFails(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + store.prs["p-1"] = []domain.PullRequest{local} + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, + checkGuards: map[string]ports.SCMGuardResult{commitKey(testRepo, "sha1"): {ETag: "ci2"}}, + fetchErr: errors.New("graphql down"), + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(400, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")] = "ci1" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if got := obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")]; got != "ci1" { + t.Fatalf("commit ETag advanced after failed fetch: got %q want ci1", got) + } +} + +func TestPoll_LifecycleFailureDoesNotWriteOrAdvanceETags(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + local.MetadataHash = "old-metadata" + local.CIHash = "old-ci" + store.prs["p-1"] = []domain.PullRequest{local} + changed := testObs(1) + changed.PR.Title = "changed title" + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo2"}}, + checkGuards: map[string]ports.SCMGuardResult{commitKey(testRepo, "sha1"): {ETag: "ci2"}}, + observations: map[string]ports.SCMObservation{prKey(testRepo, 1): changed}, + } + lc := &fakeLifecycle{err: errors.New("lifecycle down")} + obs := newTestObserver(store, provider, lc, time.Unix(500, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo1" + obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")] = "ci1" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(store.writes) != 0 { + t.Fatalf("write occurred before lifecycle accepted observation: %#v", store.writes) + } + if got := obs.Cache.RepoPRListETag[prKey(testRepo, 0)]; got != "repo1" { + t.Fatalf("repo ETag advanced after lifecycle failure: got %q want repo1", got) + } + if got := obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")]; got != "ci1" { + t.Fatalf("commit ETag advanced after lifecycle failure: got %q want ci1", got) + } +} + +func TestPoll_WriteFailureDoesNotAdvanceGuardETags(t *testing.T) { + store := testStoreWithSession() + store.writeErr = errors.New("db down") + local := knownPR(1) + local.MetadataHash = "old-metadata" + local.CIHash = "old-ci" + store.prs["p-1"] = []domain.PullRequest{local} + changed := testObs(1) + changed.PR.Title = "changed title" + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo2"}}, + checkGuards: map[string]ports.SCMGuardResult{commitKey(testRepo, "sha1"): {ETag: "ci2"}}, + observations: map[string]ports.SCMObservation{prKey(testRepo, 1): changed}, + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(550, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo1" + obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")] = "ci1" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if got := obs.Cache.RepoPRListETag[prKey(testRepo, 0)]; got != "repo1" { + t.Fatalf("repo ETag advanced after write failure: got %q want repo1", got) + } + if got := obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")]; got != "ci1" { + t.Fatalf("commit ETag advanced after write failure: got %q want ci1", got) + } +} + +func TestPoll_DuplicateTrackedPRKeepsFirstSession(t *testing.T) { + store := &fakeStore{ + sessions: []domain.SessionRecord{ + {ID: "p-1", ProjectID: "p", Metadata: domain.SessionMetadata{Branch: "feat"}}, + {ID: "p-2", ProjectID: "p", Metadata: domain.SessionMetadata{Branch: "feat"}}, + }, + projects: map[string]domain.ProjectRecord{"p": {ID: "p", RepoOriginURL: "https://github.com/o/r.git"}}, + prs: map[domain.SessionID][]domain.PullRequest{}, + checks: map[string][]domain.PullRequestCheck{}, + } + pr1 := knownPR(1) + pr1.MetadataHash = "old-1" + pr2 := pr1 + pr2.SessionID = "p-2" + store.prs["p-1"] = []domain.PullRequest{pr1} + store.prs["p-2"] = []domain.PullRequest{pr2} + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo2"}}, + observations: map[string]ports.SCMObservation{prKey(testRepo, 1): testObs(1)}, + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(600, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(store.writes) != 1 { + t.Fatalf("writes = %d, want exactly one owner write", len(store.writes)) + } + if store.writes[0].pr.SessionID != "p-1" { + t.Fatalf("duplicate owner overwrote first session: wrote session %s", store.writes[0].pr.SessionID) + } +} From 481f44a5d604bd739192073952682e5e761865b9 Mon Sep 17 00:00:00 2001 From: whoisasx Date: Wed, 3 Jun 2026 11:44:16 +0530 Subject: [PATCH 09/20] fix: retry scm lifecycle after persistence --- .../adapters/scm/github/observer_provider.go | 24 +++++- .../adapters/scm/github/provider_test.go | 7 +- backend/internal/observe/scm/observer.go | 78 ++++++++++++++++--- backend/internal/observe/scm/observer_test.go | 24 +++++- 4 files changed, 117 insertions(+), 16 deletions(-) diff --git a/backend/internal/adapters/scm/github/observer_provider.go b/backend/internal/adapters/scm/github/observer_provider.go index eb94986c..233bdb67 100644 --- a/backend/internal/adapters/scm/github/observer_provider.go +++ b/backend/internal/adapters/scm/github/observer_provider.go @@ -258,6 +258,11 @@ func scmObservationFromGraphQL(ref ports.SCMPRRef, pr map[string]any) ports.SCMO checks := scmChecksFromGraphQL(pr) failed := failedSCMChecks(checks) ci := string(ciSummaryFromRollupState(pr)) + prURL := firstNonEmpty(str(pr["url"]), ref.URL) + if ci == string(domain.CIFailing) && len(failed) == 0 && scmContextsPaginated(pr) { + failed = []ports.SCMCheckObservation{paginatedFailureSCMCheck(prURL)} + checks = append(checks, failed[0]) + } review := string(reviewDecisionFromGraphQL(pr)) providerMergeable := str(pr["mergeable"]) providerMergeState := str(pr["mergeStateStatus"]) @@ -270,7 +275,7 @@ func scmObservationFromGraphQL(ref ports.SCMPRRef, pr map[string]any) ports.SCMO Host: ref.Repo.Host, Repo: repoFullName(ref.Repo), PR: ports.SCMPRObservation{ - URL: firstNonEmpty(str(pr["url"]), ref.URL), + URL: prURL, Number: int(num(pr["number"])), State: normalizePRState(draft, merged, closed), Draft: draft, @@ -316,6 +321,23 @@ func ciSummaryFromRollupState(pr map[string]any) domain.CIState { return mapRollupState(str(roll["state"])) } +func scmContextsPaginated(pr map[string]any) bool { + roll := statusRollup(pr) + contexts, _ := roll["contexts"].(map[string]any) + return pageInfoHasMore(contexts) +} + +func paginatedFailureSCMCheck(prURL string) ports.SCMCheckObservation { + const msg = "GitHub reports failing CI, but this PR has more than 100 check/status contexts so the failing context is outside AO's first-page GraphQL view. Open the PR checks tab for the hidden failing job." + return ports.SCMCheckObservation{ + Name: "GitHub statusCheckRollup", + Status: string(domain.PRCheckFailed), + Conclusion: "failure", + URL: prURL, + LogTail: msg, + } +} + func scmChecksFromGraphQL(pr map[string]any) []ports.SCMCheckObservation { roll := statusRollup(pr) contexts, _ := roll["contexts"].(map[string]any) diff --git a/backend/internal/adapters/scm/github/provider_test.go b/backend/internal/adapters/scm/github/provider_test.go index 2502e71d..2c706a47 100644 --- a/backend/internal/adapters/scm/github/provider_test.go +++ b/backend/internal/adapters/scm/github/provider_test.go @@ -1163,8 +1163,11 @@ func TestSCMObservationUsesRollupStateWhenContextsPaginated(t *testing.T) { if obs.CI.Summary != string(domain.CIFailing) { t.Fatalf("observer CI summary = %q, want failing from aggregate rollup state", obs.CI.Summary) } - if len(obs.CI.FailedChecks) != 0 { - t.Fatalf("failed checks should only contain visible failing contexts, got %#v", obs.CI.FailedChecks) + if len(obs.CI.FailedChecks) != 1 { + t.Fatalf("failed checks should include a synthetic paginated failure, got %#v", obs.CI.FailedChecks) + } + if obs.CI.FailedChecks[0].Name != "GitHub statusCheckRollup" || !strings.Contains(obs.CI.FailedChecks[0].LogTail, "more than 100") { + t.Fatalf("synthetic paginated failure missing actionable details: %#v", obs.CI.FailedChecks[0]) } } diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go index 7fd18a95..fd4eb229 100644 --- a/backend/internal/observe/scm/observer.go +++ b/backend/internal/observe/scm/observer.go @@ -136,6 +136,11 @@ type Observer struct { credentialsChecked bool // disabled is set after the credential gate reports unavailable credentials. disabled bool + // pendingLifecycle retries lifecycle side effects that failed after a + // successful DB write, so semantic hash equality cannot make us forget them. + pendingLifecycle map[string]pendingLifecycleNotification + // pendingLifecycleOrder tracks FIFO eviction order for pendingLifecycle. + pendingLifecycleOrder []string // Cache holds bounded in-memory provider ETags and review poll timestamps. Cache ObserverCache } @@ -143,7 +148,7 @@ type Observer struct { // New constructs an Observer with default cadence/cache settings for zero // values in cfg. func New(provider Provider, store Store, lifecycle Lifecycle, cfg Config) *Observer { - o := &Observer{provider: provider, store: store, lifecycle: lifecycle, tick: cfg.Tick, reviewInterval: cfg.ReviewInterval, clock: cfg.Clock, logger: cfg.Logger, Cache: newCache(cfg.CacheMax)} + o := &Observer{provider: provider, store: store, lifecycle: lifecycle, tick: cfg.Tick, reviewInterval: cfg.ReviewInterval, clock: cfg.Clock, logger: cfg.Logger, pendingLifecycle: map[string]pendingLifecycleNotification{}, Cache: newCache(cfg.CacheMax)} if o.tick <= 0 { o.tick = DefaultTickInterval } @@ -221,6 +226,11 @@ type persistenceOptions struct { preserveLocalReview bool } +type pendingLifecycleNotification struct { + sessionID domain.SessionID + obs ports.SCMObservation +} + // Poll runs one synchronous SCM observation cycle. func (o *Observer) Poll(ctx context.Context) error { now := o.clock().UTC() @@ -243,6 +253,10 @@ func (o *Observer) Poll(ctx context.Context) error { if o.disabled { return nil } + o.retryPendingLifecycle(ctx) + if err := ctx.Err(); err != nil { + return err + } repoGuards := o.guardRepos(ctx, subjects) repoRefreshOK := pendingRepoRefreshes(repoGuards) @@ -337,18 +351,20 @@ func (o *Observer) Poll(ctx context.Context) error { prRefreshOK[key] = true continue } + pr, checks, threads, comments := domainFromObservation(subj.session.ID, prepared, local, opts, now) + if err := o.store.WriteSCMObservation(ctx, pr, checks, threads, comments, reviewRefreshed[key]); err != nil { + o.logger.Error("scm observer: DB write failed", "session", subj.session.ID, "pr", pr.URL, "err", err) + markRepoRefreshFailed(subj.repo) + continue + } if o.lifecycle != nil { if err := o.lifecycle.ApplySCMObservation(ctx, subj.session.ID, prepared); err != nil { o.logger.Error("scm observer: lifecycle notification failed", "session", subj.session.ID, "pr", firstNonEmpty(prepared.PR.URL, prepared.PR.HTMLURL, local.URL), "err", err) + o.queuePendingLifecycle(key, subj.session.ID, prepared) markRepoRefreshFailed(subj.repo) continue } - } - pr, checks, threads, comments := domainFromObservation(subj.session.ID, prepared, local, opts, now) - if err := o.store.WriteSCMObservation(ctx, pr, checks, threads, comments, reviewRefreshed[key]); err != nil { - o.logger.Error("scm observer: DB write failed", "session", subj.session.ID, "pr", pr.URL, "err", err) - markRepoRefreshFailed(subj.repo) - continue + o.deletePendingLifecycle(key) } prRefreshOK[key] = true } @@ -399,6 +415,44 @@ func (o *Observer) checkCredentials(ctx context.Context) error { return nil } +func (o *Observer) retryPendingLifecycle(ctx context.Context) { + if o.lifecycle == nil || len(o.pendingLifecycle) == 0 { + return + } + for key, pending := range o.pendingLifecycle { + if err := ctx.Err(); err != nil { + return + } + if err := o.lifecycle.ApplySCMObservation(ctx, pending.sessionID, pending.obs); err != nil { + o.logger.Error("scm observer: pending lifecycle notification retry failed", "session", pending.sessionID, "pr", firstNonEmpty(pending.obs.PR.URL, pending.obs.PR.HTMLURL), "err", err) + continue + } + o.deletePendingLifecycle(key) + } +} + +func (o *Observer) queuePendingLifecycle(key string, sessionID domain.SessionID, obs ports.SCMObservation) { + if key == "" { + key = prKeyFromObs(obs) + } + if key == "" { + key = string(sessionID) + ":" + firstNonEmpty(obs.PR.URL, obs.PR.HTMLURL) + } + if _, ok := o.pendingLifecycle[key]; !ok { + o.pendingLifecycleOrder = append(o.pendingLifecycleOrder, key) + } + o.pendingLifecycle[key] = pendingLifecycleNotification{sessionID: sessionID, obs: obs} + for len(o.pendingLifecycleOrder) > o.Cache.max { + evict := o.pendingLifecycleOrder[0] + o.pendingLifecycleOrder = o.pendingLifecycleOrder[1:] + delete(o.pendingLifecycle, evict) + } +} + +func (o *Observer) deletePendingLifecycle(key string) { + delete(o.pendingLifecycle, key) +} + func (o *Observer) discoverSubjects(ctx context.Context) (map[string]*subject, error) { sessions, err := o.store.ListAllSessions(ctx) if err != nil { @@ -592,9 +646,13 @@ func (o *Observer) enrichFailureLogs(ctx context.Context, obs *ports.SCMObservat } tails := make([]string, 0, len(obs.CI.FailedChecks)) for i := range obs.CI.FailedChecks { - tail, err := o.provider.FetchFailedCheckLogTail(ctx, ports.SCMRepo{Provider: obs.Provider, Host: obs.Host, Repo: obs.Repo, Owner: ownerOf(obs.Repo), Name: nameOf(obs.Repo)}, obs.CI.FailedChecks[i]) - if err != nil { - tail = "" + tail := obs.CI.FailedChecks[i].LogTail + if obs.CI.FailedChecks[i].ProviderID != "" || tail == "" { + var err error + tail, err = o.provider.FetchFailedCheckLogTail(ctx, ports.SCMRepo{Provider: obs.Provider, Host: obs.Host, Repo: obs.Repo, Owner: ownerOf(obs.Repo), Name: nameOf(obs.Repo)}, obs.CI.FailedChecks[i]) + if err != nil { + tail = "" + } } obs.CI.FailedChecks[i].LogTail = tail if tail != "" { diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go index 01759176..a71996e0 100644 --- a/backend/internal/observe/scm/observer_test.go +++ b/backend/internal/observe/scm/observer_test.go @@ -557,7 +557,7 @@ func TestPoll_DoesNotCommitCommitETagWhenFetchFails(t *testing.T) { } } -func TestPoll_LifecycleFailureDoesNotWriteOrAdvanceETags(t *testing.T) { +func TestPoll_LifecycleFailureWritesQueuesRetryAndDoesNotAdvanceETags(t *testing.T) { store := testStoreWithSession() local := knownPR(1) local.MetadataHash = "old-metadata" @@ -577,8 +577,14 @@ func TestPoll_LifecycleFailureDoesNotWriteOrAdvanceETags(t *testing.T) { if err := obs.Poll(context.Background()); err != nil { t.Fatal(err) } - if len(store.writes) != 0 { - t.Fatalf("write occurred before lifecycle accepted observation: %#v", store.writes) + if len(store.writes) != 1 { + t.Fatalf("DB write should succeed before lifecycle retry is queued, got writes=%#v", store.writes) + } + if store.writes[0].pr.Title != "changed title" { + t.Fatalf("DB write did not persist the observed PR state: %#v", store.writes[0].pr) + } + if len(obs.pendingLifecycle) != 1 { + t.Fatalf("pending lifecycle retry not queued: %#v", obs.pendingLifecycle) } if got := obs.Cache.RepoPRListETag[prKey(testRepo, 0)]; got != "repo1" { t.Fatalf("repo ETag advanced after lifecycle failure: got %q want repo1", got) @@ -586,6 +592,18 @@ func TestPoll_LifecycleFailureDoesNotWriteOrAdvanceETags(t *testing.T) { if got := obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")]; got != "ci1" { t.Fatalf("commit ETag advanced after lifecycle failure: got %q want ci1", got) } + + lc.err = nil + store.prs["p-1"] = []domain.PullRequest{store.writes[0].pr} + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(lc.observed) != 1 { + t.Fatalf("pending lifecycle notification was not retried: %#v", lc.observed) + } + if len(obs.pendingLifecycle) != 0 { + t.Fatalf("pending lifecycle notification not cleared after retry: %#v", obs.pendingLifecycle) + } } func TestPoll_WriteFailureDoesNotAdvanceGuardETags(t *testing.T) { From 40521b17dac723df6da31e6d5d81814098d903b8 Mon Sep 17 00:00:00 2001 From: whoisasx Date: Wed, 3 Jun 2026 12:57:35 +0530 Subject: [PATCH 10/20] fix: persist scm lifecycle acknowledgement cursor --- backend/internal/observe/scm/observer.go | 117 +++++++----------- backend/internal/observe/scm/observer_test.go | 37 ++++-- 2 files changed, 71 insertions(+), 83 deletions(-) diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go index fd4eb229..bd24d025 100644 --- a/backend/internal/observe/scm/observer.go +++ b/backend/internal/observe/scm/observer.go @@ -136,11 +136,6 @@ type Observer struct { credentialsChecked bool // disabled is set after the credential gate reports unavailable credentials. disabled bool - // pendingLifecycle retries lifecycle side effects that failed after a - // successful DB write, so semantic hash equality cannot make us forget them. - pendingLifecycle map[string]pendingLifecycleNotification - // pendingLifecycleOrder tracks FIFO eviction order for pendingLifecycle. - pendingLifecycleOrder []string // Cache holds bounded in-memory provider ETags and review poll timestamps. Cache ObserverCache } @@ -148,7 +143,7 @@ type Observer struct { // New constructs an Observer with default cadence/cache settings for zero // values in cfg. func New(provider Provider, store Store, lifecycle Lifecycle, cfg Config) *Observer { - o := &Observer{provider: provider, store: store, lifecycle: lifecycle, tick: cfg.Tick, reviewInterval: cfg.ReviewInterval, clock: cfg.Clock, logger: cfg.Logger, pendingLifecycle: map[string]pendingLifecycleNotification{}, Cache: newCache(cfg.CacheMax)} + o := &Observer{provider: provider, store: store, lifecycle: lifecycle, tick: cfg.Tick, reviewInterval: cfg.ReviewInterval, clock: cfg.Clock, logger: cfg.Logger, Cache: newCache(cfg.CacheMax)} if o.tick <= 0 { o.tick = DefaultTickInterval } @@ -220,15 +215,11 @@ type refreshSelection struct { } type persistenceOptions struct { - reviewFetched bool - preserveLocalMetadata bool - preserveLocalCI bool - preserveLocalReview bool -} - -type pendingLifecycleNotification struct { - sessionID domain.SessionID - obs ports.SCMObservation + reviewFetched bool + preserveLocalMetadataHash bool + preserveLocalCIHash bool + preserveLocalReviewHash bool + preserveLocalReviewDecision bool } // Poll runs one synchronous SCM observation cycle. @@ -253,10 +244,6 @@ func (o *Observer) Poll(ctx context.Context) error { if o.disabled { return nil } - o.retryPendingLifecycle(ctx) - if err := ctx.Err(); err != nil { - return err - } repoGuards := o.guardRepos(ctx, subjects) repoRefreshOK := pendingRepoRefreshes(repoGuards) @@ -341,17 +328,38 @@ func (o *Observer) Poll(ctx context.Context) error { } local := subj.known opts := persistenceOptions{ - reviewFetched: reviewRefreshed[key], - preserveLocalMetadata: localOnlyObservations[key], - preserveLocalCI: localOnlyObservations[key], - preserveLocalReview: reviewStale[key], + reviewFetched: reviewRefreshed[key], + preserveLocalMetadataHash: localOnlyObservations[key], + preserveLocalCIHash: localOnlyObservations[key], + preserveLocalReviewHash: reviewStale[key], + preserveLocalReviewDecision: reviewStale[key], } prepared := o.prepareForPersistence(obs, local, opts, now) if !prepared.Changed.Metadata && !prepared.Changed.CI && !prepared.Changed.Review { prRefreshOK[key] = true continue } - pr, checks, threads, comments := domainFromObservation(subj.session.ID, prepared, local, opts, now) + finalPR, finalChecks, finalThreads, finalComments := domainFromObservation(subj.session.ID, prepared, local, opts, now) + pr, checks, threads, comments := finalPR, finalChecks, finalThreads, finalComments + // Lifecycle is allowed to run only after the observed facts are durable, + // but semantic hashes are the observer's acknowledgement cursor. Keep + // changed hashes at their local values until lifecycle succeeds; if the + // daemon restarts after a lifecycle failure, the stale hashes force the + // same observation to be fetched and delivered again. + holdHashes := o.lifecycle != nil + if holdHashes { + pendingOpts := opts + if prepared.Changed.Metadata { + pendingOpts.preserveLocalMetadataHash = true + } + if prepared.Changed.CI { + pendingOpts.preserveLocalCIHash = true + } + if prepared.Changed.Review { + pendingOpts.preserveLocalReviewHash = true + } + pr, checks, threads, comments = domainFromObservation(subj.session.ID, prepared, local, pendingOpts, now) + } if err := o.store.WriteSCMObservation(ctx, pr, checks, threads, comments, reviewRefreshed[key]); err != nil { o.logger.Error("scm observer: DB write failed", "session", subj.session.ID, "pr", pr.URL, "err", err) markRepoRefreshFailed(subj.repo) @@ -360,11 +368,16 @@ func (o *Observer) Poll(ctx context.Context) error { if o.lifecycle != nil { if err := o.lifecycle.ApplySCMObservation(ctx, subj.session.ID, prepared); err != nil { o.logger.Error("scm observer: lifecycle notification failed", "session", subj.session.ID, "pr", firstNonEmpty(prepared.PR.URL, prepared.PR.HTMLURL, local.URL), "err", err) - o.queuePendingLifecycle(key, subj.session.ID, prepared) markRepoRefreshFailed(subj.repo) continue } - o.deletePendingLifecycle(key) + if holdHashes { + if err := o.store.WriteSCMObservation(ctx, finalPR, finalChecks, finalThreads, finalComments, reviewRefreshed[key]); err != nil { + o.logger.Error("scm observer: DB lifecycle acknowledgement failed", "session", subj.session.ID, "pr", finalPR.URL, "err", err) + markRepoRefreshFailed(subj.repo) + continue + } + } } prRefreshOK[key] = true } @@ -415,44 +428,6 @@ func (o *Observer) checkCredentials(ctx context.Context) error { return nil } -func (o *Observer) retryPendingLifecycle(ctx context.Context) { - if o.lifecycle == nil || len(o.pendingLifecycle) == 0 { - return - } - for key, pending := range o.pendingLifecycle { - if err := ctx.Err(); err != nil { - return - } - if err := o.lifecycle.ApplySCMObservation(ctx, pending.sessionID, pending.obs); err != nil { - o.logger.Error("scm observer: pending lifecycle notification retry failed", "session", pending.sessionID, "pr", firstNonEmpty(pending.obs.PR.URL, pending.obs.PR.HTMLURL), "err", err) - continue - } - o.deletePendingLifecycle(key) - } -} - -func (o *Observer) queuePendingLifecycle(key string, sessionID domain.SessionID, obs ports.SCMObservation) { - if key == "" { - key = prKeyFromObs(obs) - } - if key == "" { - key = string(sessionID) + ":" + firstNonEmpty(obs.PR.URL, obs.PR.HTMLURL) - } - if _, ok := o.pendingLifecycle[key]; !ok { - o.pendingLifecycleOrder = append(o.pendingLifecycleOrder, key) - } - o.pendingLifecycle[key] = pendingLifecycleNotification{sessionID: sessionID, obs: obs} - for len(o.pendingLifecycleOrder) > o.Cache.max { - evict := o.pendingLifecycleOrder[0] - o.pendingLifecycleOrder = o.pendingLifecycleOrder[1:] - delete(o.pendingLifecycle, evict) - } -} - -func (o *Observer) deletePendingLifecycle(key string) { - delete(o.pendingLifecycle, key) -} - func (o *Observer) discoverSubjects(ctx context.Context) (map[string]*subject, error) { sessions, err := o.store.ListAllSessions(ctx) if err != nil { @@ -769,15 +744,15 @@ func (o *Observer) needsReviewRefresh(key string, local domain.PullRequest, deci func (o *Observer) prepareForPersistence(obs ports.SCMObservation, local domain.PullRequest, opts persistenceOptions, now time.Time) ports.SCMObservation { metadataHash := metadataSemanticHash(obs) - if opts.preserveLocalMetadata { + if opts.preserveLocalMetadataHash { metadataHash = local.MetadataHash } ciHash := ciSemanticHash(obs.CI) - if opts.preserveLocalCI { + if opts.preserveLocalCIHash { ciHash = local.CIHash } reviewHash := local.ReviewHash - if !opts.preserveLocalReview && (opts.reviewFetched || local.ReviewHash == "" || obs.Review.Decision != string(local.Review)) { + if !opts.preserveLocalReviewHash && (opts.reviewFetched || local.ReviewHash == "" || obs.Review.Decision != string(local.Review)) { reviewHash = reviewSemanticHash(obs.Review) } obs.Changed = ports.SCMChanged{ @@ -792,17 +767,19 @@ func (o *Observer) prepareForPersistence(obs ports.SCMObservation, local domain. func domainFromObservation(sessionID domain.SessionID, obs ports.SCMObservation, local domain.PullRequest, opts persistenceOptions, now time.Time) (domain.PullRequest, []domain.PullRequestCheck, []domain.PullRequestReviewThread, []domain.PullRequestComment) { metadataHash := metadataSemanticHash(obs) - if opts.preserveLocalMetadata { + if opts.preserveLocalMetadataHash { metadataHash = local.MetadataHash } ciHash := ciSemanticHash(obs.CI) - if opts.preserveLocalCI { + if opts.preserveLocalCIHash { ciHash = local.CIHash } reviewHash := reviewSemanticHash(obs.Review) reviewDecision := domain.ReviewDecision(firstNonEmpty(obs.Review.Decision, string(domain.ReviewNone))) - if opts.preserveLocalReview { + if opts.preserveLocalReviewDecision { reviewDecision = local.Review + } + if opts.preserveLocalReviewHash { reviewHash = local.ReviewHash } else if !opts.reviewFetched && local.ReviewHash != "" && reviewDecision == local.Review { reviewHash = local.ReviewHash diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go index a71996e0..ceab274c 100644 --- a/backend/internal/observe/scm/observer_test.go +++ b/backend/internal/observe/scm/observer_test.go @@ -297,7 +297,7 @@ func TestPoll_RepoETag200DetectsPRAndRefreshesSamePoll(t *testing.T) { if len(provider.fetchBatches) != 1 || len(provider.fetchBatches[0]) != 1 || provider.fetchBatches[0][0].Number != 1 { t.Fatalf("new PR not refreshed in same poll: %#v", provider.fetchBatches) } - if len(store.writes) != 1 || len(lc.observed) != 1 { + if len(store.writes) < 1 || len(lc.observed) != 1 { t.Fatalf("write/lifecycle missing: writes=%d lifecycle=%d", len(store.writes), len(lc.observed)) } } @@ -405,7 +405,7 @@ func TestPoll_ReviewPollingRespectsInterval(t *testing.T) { if provider.reviewCalls != 1 { t.Fatalf("review not fetched after interval: %d", provider.reviewCalls) } - if len(store.writes) != 1 || !store.writes[0].replaceReview { + if len(store.writes) == 0 || !store.writes[0].replaceReview { t.Fatalf("review refresh not persisted with replaceReview: %#v", store.writes) } } @@ -443,7 +443,7 @@ func TestPoll_ReviewHashDrivesPersistenceAndLifecycle(t *testing.T) { if err := obs.Poll(context.Background()); err != nil { t.Fatal(err) } - if len(store.writes) != 1 || len(store.writes[0].comments) != 1 { + if len(store.writes) == 0 || len(store.writes[0].comments) != 1 { t.Fatalf("review change not persisted: %#v", store.writes) } if len(lc.observed) != 1 || !lc.observed[0].Changed.Review { @@ -482,10 +482,10 @@ func TestPoll_ReviewOnlyRefreshPreservesLocalCIAndMetadata(t *testing.T) { if err := obs.Poll(context.Background()); err != nil { t.Fatal(err) } - if len(store.writes) != 1 { + if len(store.writes) == 0 { t.Fatalf("writes = %d, want review-only write", len(store.writes)) } - write := store.writes[0] + write := store.writes[len(store.writes)-1] if write.pr.MetadataHash != local.MetadataHash { t.Fatalf("metadata hash changed on review-only refresh: got %q want %q", write.pr.MetadataHash, local.MetadataHash) } @@ -557,7 +557,7 @@ func TestPoll_DoesNotCommitCommitETagWhenFetchFails(t *testing.T) { } } -func TestPoll_LifecycleFailureWritesQueuesRetryAndDoesNotAdvanceETags(t *testing.T) { +func TestPoll_LifecycleFailureHoldsBackHashesForDurableRetry(t *testing.T) { store := testStoreWithSession() local := knownPR(1) local.MetadataHash = "old-metadata" @@ -583,8 +583,11 @@ func TestPoll_LifecycleFailureWritesQueuesRetryAndDoesNotAdvanceETags(t *testing if store.writes[0].pr.Title != "changed title" { t.Fatalf("DB write did not persist the observed PR state: %#v", store.writes[0].pr) } - if len(obs.pendingLifecycle) != 1 { - t.Fatalf("pending lifecycle retry not queued: %#v", obs.pendingLifecycle) + if store.writes[0].pr.MetadataHash != local.MetadataHash { + t.Fatalf("metadata hash advanced before lifecycle acknowledgement: got %q want %q", store.writes[0].pr.MetadataHash, local.MetadataHash) + } + if store.writes[0].pr.CIHash != local.CIHash { + t.Fatalf("CI hash advanced before lifecycle acknowledgement: got %q want %q", store.writes[0].pr.CIHash, local.CIHash) } if got := obs.Cache.RepoPRListETag[prKey(testRepo, 0)]; got != "repo1" { t.Fatalf("repo ETag advanced after lifecycle failure: got %q want repo1", got) @@ -595,14 +598,22 @@ func TestPoll_LifecycleFailureWritesQueuesRetryAndDoesNotAdvanceETags(t *testing lc.err = nil store.prs["p-1"] = []domain.PullRequest{store.writes[0].pr} - if err := obs.Poll(context.Background()); err != nil { + restarted := newTestObserver(store, provider, lc, time.Unix(501, 0).UTC()) + if err := restarted.Poll(context.Background()); err != nil { t.Fatal(err) } if len(lc.observed) != 1 { - t.Fatalf("pending lifecycle notification was not retried: %#v", lc.observed) + t.Fatalf("held-back semantic hashes did not force lifecycle retry after restart: %#v", lc.observed) } - if len(obs.pendingLifecycle) != 0 { - t.Fatalf("pending lifecycle notification not cleared after retry: %#v", obs.pendingLifecycle) + if len(store.writes) != 3 { + t.Fatalf("retry should write held-back facts then acknowledge hashes, got writes=%d", len(store.writes)) + } + last := store.writes[len(store.writes)-1].pr + if last.MetadataHash != metadataSemanticHash(changed) { + t.Fatalf("metadata hash not acknowledged after lifecycle success: got %q want %q", last.MetadataHash, metadataSemanticHash(changed)) + } + if last.CIHash != ciSemanticHash(changed.CI) { + t.Fatalf("CI hash not acknowledged after lifecycle success: got %q want %q", last.CIHash, ciSemanticHash(changed.CI)) } } @@ -658,7 +669,7 @@ func TestPoll_DuplicateTrackedPRKeepsFirstSession(t *testing.T) { if err := obs.Poll(context.Background()); err != nil { t.Fatal(err) } - if len(store.writes) != 1 { + if len(store.writes) == 0 { t.Fatalf("writes = %d, want exactly one owner write", len(store.writes)) } if store.writes[0].pr.SessionID != "p-1" { From 4515244305dd138b4335b5275c3ed35e9d3b4597 Mon Sep 17 00:00:00 2001 From: whoisasx Date: Wed, 3 Jun 2026 15:23:49 +0530 Subject: [PATCH 11/20] fix: tune scm graphql pagination --- .../adapters/scm/github/observer_provider.go | 204 +++++++++++++----- .../adapters/scm/github/provider_test.go | 196 ++++++++++++++++- backend/internal/observe/scm/observer.go | 31 ++- backend/internal/observe/scm/observer_test.go | 51 ++++- backend/internal/ports/outbound.go | 24 ++- backend/internal/ports/scm_observations.go | 4 + .../storage/sqlite/gen/pr_comment.sql.go | 14 ++ .../sqlite/gen/pr_review_threads.sql.go | 1 + .../storage/sqlite/queries/pr_comment.sql | 3 + .../internal/storage/sqlite/store/pr_store.go | 48 ++++- .../storage/sqlite/store/store_test.go | 74 ++++++- 11 files changed, 555 insertions(+), 95 deletions(-) diff --git a/backend/internal/adapters/scm/github/observer_provider.go b/backend/internal/adapters/scm/github/observer_provider.go index 233bdb67..c733b447 100644 --- a/backend/internal/adapters/scm/github/observer_provider.go +++ b/backend/internal/adapters/scm/github/observer_provider.go @@ -21,12 +21,19 @@ import ( "github.com/aoagents/agent-orchestrator/backend/internal/ports" ) -const scmBatchCheckContextLimit = 100 - -// githubReviewThreadMaxPages bounds a single review-thread refresh. GitHub -// returns 100 threads per page, so this permits up to 5000 threads while keeping -// one poll from allocating unbounded memory or issuing unlimited GraphQL calls. -const githubReviewThreadMaxPages = 50 +const scmBatchCheckContextLimit = 20 + +const ( + // githubReviewThreadPageSize fetches the latest review window cheaply for + // the common case while still covering active review feedback. + githubReviewThreadPageSize = 50 + // githubReviewCommentLimitPerThread stores only the leading comments needed + // to understand a thread without making one pathological thread dominate + // GraphQL cost. + githubReviewCommentLimitPerThread = 5 + // githubReviewThreadMaxPages bounds the explicit older-thread fallback. + githubReviewThreadMaxPages = 2 +) // ParseRepository normalizes a GitHub remote/origin URL into a provider-neutral // repository key. It accepts https://github.com/owner/repo(.git), @@ -118,6 +125,11 @@ func (p *Provider) FetchPullRequests(ctx context.Context, refs []ports.SCMPRRef) if pr == nil { continue } + if scmContextsPaginated(pr) { + if err := p.fetchRemainingCheckContexts(ctx, ref, pr); err != nil { + return nil, err + } + } out = append(out, scmObservationFromGraphQL(ref, pr)) } return out, nil @@ -144,41 +156,36 @@ func (p *Provider) FetchFailedCheckLogTail(ctx context.Context, repo ports.SCMRe // FetchReviewThreads fetches review threads separately from the fast PR/CI path. func (p *Provider) FetchReviewThreads(ctx context.Context, ref ports.SCMPRRef) (ports.SCMReviewObservation, error) { - var all []ports.SCMReviewThreadObservation - var decision string - var cursor string - for page := 0; page < githubReviewThreadMaxPages; page++ { - query := buildReviewThreadsQuery(ref, cursor) - data, err := p.client.doGraphQL(ctx, query, nil) - if err != nil { - return ports.SCMReviewObservation{}, err - } - repoData, _ := data["repo"].(map[string]any) - pr, _ := repoData["pullRequest"].(map[string]any) - if pr == nil { - return ports.SCMReviewObservation{}, fmt.Errorf("%w: pull request not found in review response", ErrNotFound) - } - decision = string(reviewDecisionFromGraphQL(pr)) - threads, _ := pr["reviewThreads"].(map[string]any) - for _, th := range nodes(threads["nodes"]) { - all = append(all, scmThreadFromGraphQL(th)) - } - pi, _ := threads["pageInfo"].(map[string]any) - if !boolv(pi["hasNextPage"]) { - break - } - cursor = str(pi["endCursor"]) - if cursor == "" { - break - } - if page == githubReviewThreadMaxPages-1 { - p.logger.Warn("github scm: review thread page limit reached", - "repo", repoFullName(ref.Repo), "pr", ref.Number, - "max_pages", githubReviewThreadMaxPages) - break + latest, decision, pi, err := p.fetchReviewThreadPage(ctx, ref, "") + if err != nil { + return ports.SCMReviewObservation{}, err + } + if !boolv(pi["hasPreviousPage"]) { + return ports.SCMReviewObservation{Decision: decision, Threads: latest}, nil + } + out := latest + startCursor := str(pi["startCursor"]) + if len(latest) == 0 || !latest[0].Resolved { + if startCursor == "" { + p.logger.Warn("github scm: review thread page is partial but missing start cursor", + "repo", repoFullName(ref.Repo), "pr", ref.Number) + } else { + older, _, olderPI, err := p.fetchReviewThreadPage(ctx, ref, startCursor) + if err != nil { + return ports.SCMReviewObservation{}, err + } + combined := make([]ports.SCMReviewThreadObservation, 0, len(older)+len(latest)) + combined = append(combined, older...) + combined = append(combined, latest...) + out = combined + if boolv(olderPI["hasPreviousPage"]) { + p.logger.Warn("github scm: review thread page limit reached", + "repo", repoFullName(ref.Repo), "pr", ref.Number, + "max_pages", githubReviewThreadMaxPages) + } } } - return ports.SCMReviewObservation{Decision: decision, Threads: all}, nil + return ports.SCMReviewObservation{Decision: decision, Threads: out, Partial: true}, nil } type restListPull struct { @@ -250,10 +257,84 @@ commits(last:1){ nodes{ commit{ oid statusCheckRollup{ state contexts(first:CONT __typename ... on CheckRun { name status conclusion detailsUrl url databaseId } ... on StatusContext { context state targetUrl } -} pageInfo{ hasNextPage } } } } } } +} pageInfo{ hasNextPage endCursor } } } } } } `, "CONTEXT_LIMIT", strconv.Itoa(scmBatchCheckContextLimit)) } +func (p *Provider) fetchRemainingCheckContexts(ctx context.Context, ref ports.SCMPRRef, pr map[string]any) error { + contexts := statusContexts(pr) + if contexts == nil { + return nil + } + cursor := pageInfoEndCursor(contexts) + if cursor == "" { + return fmt.Errorf("github scm: paginated check contexts for %s#%d missing end cursor", repoFullName(ref.Repo), ref.Number) + } + for { + query := buildCheckContextsQuery(ref, cursor) + data, err := p.client.doGraphQL(ctx, query, nil) + if err != nil { + return fmt.Errorf("github scm: fetch remaining check contexts for %s#%d: %w", repoFullName(ref.Repo), ref.Number, err) + } + repoData, _ := data["repo"].(map[string]any) + pagePR, _ := repoData["pullRequest"].(map[string]any) + if pagePR == nil { + return fmt.Errorf("%w: pull request not found in check context response", ErrNotFound) + } + pageContexts := statusContexts(pagePR) + if pageContexts == nil { + return fmt.Errorf("github scm: check context fallback for %s#%d returned no contexts", repoFullName(ref.Repo), ref.Number) + } + appendStatusContextNodes(contexts, pageContexts) + if !pageInfoHasMore(pageContexts) { + break + } + cursor = pageInfoEndCursor(pageContexts) + if cursor == "" { + return fmt.Errorf("github scm: paginated check context page for %s#%d missing end cursor", repoFullName(ref.Repo), ref.Number) + } + } + return nil +} + +func buildCheckContextsQuery(ref ports.SCMPRRef, cursor string) string { + return fmt.Sprintf(`query{ +repo: repository(owner:%s,name:%s){ pullRequest(number:%d){ + commits(last:1){ nodes{ commit{ statusCheckRollup{ contexts(first:%d, after:%s){ nodes{ + __typename + ... on CheckRun { name status conclusion detailsUrl url databaseId } + ... on StatusContext { context state targetUrl } + } pageInfo{ hasNextPage endCursor } } } } } } +} } +}`, graphQLString(ref.Repo.Owner), graphQLString(ref.Repo.Name), ref.Number, scmBatchCheckContextLimit, graphQLString(cursor)) +} + +func statusContexts(pr map[string]any) map[string]any { + roll := statusRollup(pr) + if roll == nil { + return nil + } + contexts, _ := roll["contexts"].(map[string]any) + return contexts +} + +func appendStatusContextNodes(dst, src map[string]any) { + if dst == nil || src == nil { + return + } + merged, _ := dst["nodes"].([]any) + for _, n := range nodes(src["nodes"]) { + merged = append(merged, n) + } + dst["nodes"] = merged + dst["pageInfo"] = src["pageInfo"] +} + +func pageInfoEndCursor(connection map[string]any) string { + pi, _ := connection["pageInfo"].(map[string]any) + return str(pi["endCursor"]) +} + func scmObservationFromGraphQL(ref ports.SCMPRRef, pr map[string]any) ports.SCMObservation { checks := scmChecksFromGraphQL(pr) failed := failedSCMChecks(checks) @@ -322,13 +403,11 @@ func ciSummaryFromRollupState(pr map[string]any) domain.CIState { } func scmContextsPaginated(pr map[string]any) bool { - roll := statusRollup(pr) - contexts, _ := roll["contexts"].(map[string]any) - return pageInfoHasMore(contexts) + return pageInfoHasMore(statusContexts(pr)) } func paginatedFailureSCMCheck(prURL string) ports.SCMCheckObservation { - const msg = "GitHub reports failing CI, but this PR has more than 100 check/status contexts so the failing context is outside AO's first-page GraphQL view. Open the PR checks tab for the hidden failing job." + const msg = "GitHub reports failing CI, but the failing context is outside AO's current GraphQL check-context page. Open the PR checks tab for the hidden failing job." return ports.SCMCheckObservation{ Name: "GitHub statusCheckRollup", Status: string(domain.PRCheckFailed), @@ -446,17 +525,38 @@ func mergeabilityObservation(providerMergeable, providerMergeState, ci, review s return out } -func buildReviewThreadsQuery(ref ports.SCMPRRef, cursor string) string { - after := "null" - if cursor != "" { - after = graphQLString(cursor) +func (p *Provider) fetchReviewThreadPage(ctx context.Context, ref ports.SCMPRRef, beforeCursor string) ([]ports.SCMReviewThreadObservation, string, map[string]any, error) { + query := buildReviewThreadsQuery(ref, beforeCursor) + data, err := p.client.doGraphQL(ctx, query, nil) + if err != nil { + return nil, "", nil, err + } + repoData, _ := data["repo"].(map[string]any) + pr, _ := repoData["pullRequest"].(map[string]any) + if pr == nil { + return nil, "", nil, fmt.Errorf("%w: pull request not found in review response", ErrNotFound) + } + decision := string(reviewDecisionFromGraphQL(pr)) + threads, _ := pr["reviewThreads"].(map[string]any) + out := make([]ports.SCMReviewThreadObservation, 0, len(nodes(threads["nodes"]))) + for _, th := range nodes(threads["nodes"]) { + out = append(out, scmThreadFromGraphQL(th)) + } + pi, _ := threads["pageInfo"].(map[string]any) + return out, decision, pi, nil +} + +func buildReviewThreadsQuery(ref ports.SCMPRRef, beforeCursor string) string { + before := "null" + if beforeCursor != "" { + before = graphQLString(beforeCursor) } return fmt.Sprintf(`query{ -repo: repository(owner:%s,name:%s){ pullRequest(number:%d){ reviewDecision reviewThreads(first:100, after:%s){ nodes{ +repo: repository(owner:%s,name:%s){ pullRequest(number:%d){ reviewDecision reviewThreads(last:%d, before:%s){ nodes{ id isResolved path line - comments(first:100){ nodes{ id body url author{ login __typename } } } -} pageInfo{ hasNextPage endCursor } } } } -}`, graphQLString(ref.Repo.Owner), graphQLString(ref.Repo.Name), ref.Number, after) + comments(first:%d){ nodes{ id body url author{ login __typename } } } +} pageInfo{ hasPreviousPage startCursor } } } } +}`, graphQLString(ref.Repo.Owner), graphQLString(ref.Repo.Name), ref.Number, githubReviewThreadPageSize, before, githubReviewCommentLimitPerThread) } func scmThreadFromGraphQL(th map[string]any) ports.SCMReviewThreadObservation { diff --git a/backend/internal/adapters/scm/github/provider_test.go b/backend/internal/adapters/scm/github/provider_test.go index 2c706a47..32e04b9e 100644 --- a/backend/internal/adapters/scm/github/provider_test.go +++ b/backend/internal/adapters/scm/github/provider_test.go @@ -1166,7 +1166,7 @@ func TestSCMObservationUsesRollupStateWhenContextsPaginated(t *testing.T) { if len(obs.CI.FailedChecks) != 1 { t.Fatalf("failed checks should include a synthetic paginated failure, got %#v", obs.CI.FailedChecks) } - if obs.CI.FailedChecks[0].Name != "GitHub statusCheckRollup" || !strings.Contains(obs.CI.FailedChecks[0].LogTail, "more than 100") { + if obs.CI.FailedChecks[0].Name != "GitHub statusCheckRollup" || !strings.Contains(obs.CI.FailedChecks[0].LogTail, "outside AO's current GraphQL check-context page") { t.Fatalf("synthetic paginated failure missing actionable details: %#v", obs.CI.FailedChecks[0]) } } @@ -1194,17 +1194,146 @@ func TestSCMMergeabilityBlocksReviewRequiredAndDraft(t *testing.T) { } } -func TestFetchReviewThreadsStopsAtPageLimit(t *testing.T) { +func TestFetchPullRequestsDoesNotFallbackWhenContextPageComplete(t *testing.T) { fake := newFakeGH(t) + fx := basePRFixture() + var pr map[string]any + fx.prData(func(m map[string]any) { pr = m }) fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { - page := fake.callsTo(http.MethodPost, "/graphql") + body, _ := io.ReadAll(r.Body) + if !strings.Contains(string(body), "contexts(first:20)") { + t.Fatalf("batch query should request 20 contexts, body=%s", body) + } + if !strings.Contains(string(body), "pageInfo{ hasNextPage endCursor }") { + t.Fatalf("batch query should request endCursor for fallback, body=%s", body) + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"pr0": map[string]any{"pullRequest": pr}}, + }) + }) + p := newProviderForTest(t, fake) + obs, err := p.FetchPullRequests(ctx(), []ports.SCMPRRef{{Repo: ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "octocat", Name: "hello", Repo: "octocat/hello"}, Number: 42}}) + if err != nil { + t.Fatalf("FetchPullRequests: %v", err) + } + if got := fake.callsTo(http.MethodPost, "/graphql"); got != 1 { + t.Fatalf("graphql calls = %d, want no fallback", got) + } + if len(obs) != 1 || len(obs[0].CI.Checks) != 1 || obs[0].CI.Summary != string(domain.CIPassing) { + t.Fatalf("observation = %#v", obs) + } +} + +func TestFetchPullRequestsFetchesRemainingCheckContexts(t *testing.T) { + fake := newFakeGH(t) + fx := basePRFixture() + var pr map[string]any + fx.prData(func(m map[string]any) { + pr = m + commits := m["commits"].(map[string]any)["nodes"].([]any)[0].(map[string]any) + commit := commits["commit"].(map[string]any) + roll := commit["statusCheckRollup"].(map[string]any) + roll["state"] = "FAILURE" + ctxs := roll["contexts"].(map[string]any) + ctxs["nodes"] = []any{ + map[string]any{"__typename": "CheckRun", "name": "visible-pass", "status": "COMPLETED", "conclusion": "SUCCESS"}, + } + ctxs["pageInfo"] = map[string]any{"hasNextPage": true, "endCursor": "cursor-1"} + }) + fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { + call := fake.callsTo(http.MethodPost, "/graphql") + body, _ := io.ReadAll(r.Body) + w.Header().Set("Content-Type", "application/json") + switch call { + case 1: + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"pr0": map[string]any{"pullRequest": pr}}, + }) + case 2: + if !strings.Contains(string(body), `after:\"cursor-1\"`) && !strings.Contains(string(body), `after:"cursor-1"`) { + t.Fatalf("fallback query missing cursor, body=%s", body) + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"repo": map[string]any{"pullRequest": map[string]any{ + "commits": map[string]any{"nodes": []any{map[string]any{"commit": map[string]any{"statusCheckRollup": map[string]any{ + "contexts": map[string]any{ + "nodes": []any{ + map[string]any{"__typename": "CheckRun", "name": "hidden-fail", "status": "COMPLETED", "conclusion": "FAILURE"}, + }, + "pageInfo": map[string]any{"hasNextPage": false, "endCursor": nil}, + }, + }}}}}, + }}}, + }) + default: + t.Fatalf("unexpected graphql call %d", call) + } + }) + p := newProviderForTest(t, fake) + obs, err := p.FetchPullRequests(ctx(), []ports.SCMPRRef{{Repo: ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "octocat", Name: "hello", Repo: "octocat/hello"}, Number: 42}}) + if err != nil { + t.Fatalf("FetchPullRequests: %v", err) + } + if got := fake.callsTo(http.MethodPost, "/graphql"); got != 2 { + t.Fatalf("graphql calls = %d, want batch + fallback", got) + } + if len(obs) != 1 { + t.Fatalf("observations = %#v", obs) + } + if obs[0].CI.Summary != string(domain.CIFailing) { + t.Fatalf("CI summary = %q, want aggregate failing", obs[0].CI.Summary) + } + if len(obs[0].CI.Checks) != 2 || len(obs[0].CI.FailedChecks) != 1 || obs[0].CI.FailedChecks[0].Name != "hidden-fail" { + t.Fatalf("checks not completed from fallback: %#v failed=%#v", obs[0].CI.Checks, obs[0].CI.FailedChecks) + } +} + +func TestFetchPullRequestsFailsWhenCheckContextFallbackFails(t *testing.T) { + fake := newFakeGH(t) + fx := basePRFixture() + var pr map[string]any + fx.prData(func(m map[string]any) { + pr = m + commits := m["commits"].(map[string]any)["nodes"].([]any)[0].(map[string]any) + commit := commits["commit"].(map[string]any) + ctxs := commit["statusCheckRollup"].(map[string]any)["contexts"].(map[string]any) + ctxs["pageInfo"] = map[string]any{"hasNextPage": true, "endCursor": "cursor-1"} + }) + fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { + call := fake.callsTo(http.MethodPost, "/graphql") + if call == 1 { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"pr0": map[string]any{"pullRequest": pr}}, + }) + return + } + http.Error(w, `{"message":"graphql down"}`, http.StatusInternalServerError) + }) + p := newProviderForTest(t, fake) + if _, err := p.FetchPullRequests(ctx(), []ports.SCMPRRef{{Repo: ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "octocat", Name: "hello", Repo: "octocat/hello"}, Number: 42}}); err == nil { + t.Fatal("FetchPullRequests error = nil, want fallback failure") + } +} + +func TestFetchReviewThreadsUsesLatestWindowWithoutFallbackWhenOldestResolved(t *testing.T) { + fake := newFakeGH(t) + fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + if !strings.Contains(string(body), "reviewThreads(last:50, before:null)") { + t.Fatalf("review query should fetch latest 50, body=%s", body) + } + if !strings.Contains(string(body), "comments(first:5)") { + t.Fatalf("review query should cap comments per thread, body=%s", body) + } w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(map[string]any{ "data": map[string]any{"repo": map[string]any{"pullRequest": map[string]any{ "reviewDecision": "CHANGES_REQUESTED", "reviewThreads": map[string]any{ - "nodes": []any{map[string]any{"id": "thread-" + strconv.Itoa(page), "path": "main.go", "line": page, "isResolved": false, "comments": map[string]any{"nodes": []any{}}}}, - "pageInfo": map[string]any{"hasNextPage": true, "endCursor": "cursor-" + strconv.Itoa(page)}, + "nodes": []any{map[string]any{"id": "latest-resolved", "path": "main.go", "line": 1, "isResolved": true, "comments": map[string]any{"nodes": []any{}}}}, + "pageInfo": map[string]any{"hasPreviousPage": true, "startCursor": "latest-start"}, }, }}}, }) @@ -1214,10 +1343,63 @@ func TestFetchReviewThreadsStopsAtPageLimit(t *testing.T) { if err != nil { t.Fatalf("FetchReviewThreads: %v", err) } + if got := fake.callsTo(http.MethodPost, "/graphql"); got != 1 { + t.Fatalf("graphql calls = %d, want no fallback when oldest latest thread is resolved", got) + } + if !review.Partial { + t.Fatalf("review Partial = false, want true because older pages exist") + } + if len(review.Threads) != 1 || review.Threads[0].ID != "latest-resolved" { + t.Fatalf("threads = %#v", review.Threads) + } +} + +func TestFetchReviewThreadsFetchesOneOlderPageWhenOldestUnresolved(t *testing.T) { + fake := newFakeGH(t) + fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { + call := fake.callsTo(http.MethodPost, "/graphql") + body, _ := io.ReadAll(r.Body) + w.Header().Set("Content-Type", "application/json") + switch call { + case 1: + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"repo": map[string]any{"pullRequest": map[string]any{ + "reviewDecision": "CHANGES_REQUESTED", + "reviewThreads": map[string]any{ + "nodes": []any{map[string]any{"id": "latest-unresolved", "path": "main.go", "line": 2, "isResolved": false, "comments": map[string]any{"nodes": []any{}}}}, + "pageInfo": map[string]any{"hasPreviousPage": true, "startCursor": "latest-start"}, + }, + }}}, + }) + case 2: + if !strings.Contains(string(body), `before:\"latest-start\"`) && !strings.Contains(string(body), `before:"latest-start"`) { + t.Fatalf("older review query missing before cursor, body=%s", body) + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"repo": map[string]any{"pullRequest": map[string]any{ + "reviewDecision": "CHANGES_REQUESTED", + "reviewThreads": map[string]any{ + "nodes": []any{map[string]any{"id": "older", "path": "old.go", "line": 1, "isResolved": false, "comments": map[string]any{"nodes": []any{}}}}, + "pageInfo": map[string]any{"hasPreviousPage": true, "startCursor": "older-start"}, + }, + }}}, + }) + default: + t.Fatalf("unexpected graphql call %d", call) + } + }) + p := newProviderForTest(t, fake) + review, err := p.FetchReviewThreads(ctx(), ports.SCMPRRef{Repo: ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "o", Name: "r", Repo: "o/r"}, Number: 1}) + if err != nil { + t.Fatalf("FetchReviewThreads: %v", err) + } if got := fake.callsTo(http.MethodPost, "/graphql"); got != githubReviewThreadMaxPages { t.Fatalf("graphql calls = %d, want capped at %d", got, githubReviewThreadMaxPages) } - if len(review.Threads) != githubReviewThreadMaxPages { - t.Fatalf("threads = %d, want %d", len(review.Threads), githubReviewThreadMaxPages) + if !review.Partial { + t.Fatalf("review Partial = false, want true because pagination remains bounded") + } + if len(review.Threads) != 2 || review.Threads[0].ID != "older" || review.Threads[1].ID != "latest-unresolved" { + t.Fatalf("threads order = %#v", review.Threads) } } diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go index bd24d025..a95751e8 100644 --- a/backend/internal/observe/scm/observer.go +++ b/backend/internal/observe/scm/observer.go @@ -49,7 +49,7 @@ type Store interface { GetProject(ctx context.Context, id string) (domain.ProjectRecord, bool, error) ListPRsBySession(ctx context.Context, sessionID domain.SessionID) ([]domain.PullRequest, error) ListChecks(ctx context.Context, prURL string) ([]domain.PullRequestCheck, error) - WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceReview bool) error + WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, reviewMode ports.ReviewWriteMode) error } // Lifecycle is the provider-neutral lifecycle notification sink. @@ -310,10 +310,10 @@ func (o *Observer) Poll(ctx context.Context) error { observations[key] = obs } - reviewRefreshed := map[string]bool{} + reviewModes := map[string]ports.ReviewWriteMode{} localOnlyObservations := map[string]bool{} reviewStale := map[string]bool{} - o.refreshReviews(ctx, subjects, observations, selection.subjectsByPR, reviewRefreshed, localOnlyObservations, reviewStale, now) + o.refreshReviews(ctx, subjects, observations, selection.subjectsByPR, reviewModes, localOnlyObservations, reviewStale, now) if err := ctx.Err(); err != nil { return err } @@ -327,8 +327,9 @@ func (o *Observer) Poll(ctx context.Context) error { continue } local := subj.known + reviewMode := reviewModes[key] opts := persistenceOptions{ - reviewFetched: reviewRefreshed[key], + reviewFetched: reviewMode != ports.ReviewWritePreserve, preserveLocalMetadataHash: localOnlyObservations[key], preserveLocalCIHash: localOnlyObservations[key], preserveLocalReviewHash: reviewStale[key], @@ -360,7 +361,7 @@ func (o *Observer) Poll(ctx context.Context) error { } pr, checks, threads, comments = domainFromObservation(subj.session.ID, prepared, local, pendingOpts, now) } - if err := o.store.WriteSCMObservation(ctx, pr, checks, threads, comments, reviewRefreshed[key]); err != nil { + if err := o.store.WriteSCMObservation(ctx, pr, checks, threads, comments, reviewMode); err != nil { o.logger.Error("scm observer: DB write failed", "session", subj.session.ID, "pr", pr.URL, "err", err) markRepoRefreshFailed(subj.repo) continue @@ -372,7 +373,7 @@ func (o *Observer) Poll(ctx context.Context) error { continue } if holdHashes { - if err := o.store.WriteSCMObservation(ctx, finalPR, finalChecks, finalThreads, finalComments, reviewRefreshed[key]); err != nil { + if err := o.store.WriteSCMObservation(ctx, finalPR, finalChecks, finalThreads, finalComments, reviewMode); err != nil { o.logger.Error("scm observer: DB lifecycle acknowledgement failed", "session", subj.session.ID, "pr", finalPR.URL, "err", err) markRepoRefreshFailed(subj.repo) continue @@ -388,7 +389,7 @@ func (o *Observer) Poll(ctx context.Context) error { if pending, found := selection.commitETags[key]; found { o.cacheSetString(o.Cache.CommitChecksETag, &o.Cache.commitOrder, pending.key, pending.value) } - if reviewRefreshed[key] { + if reviewModes[key] != ports.ReviewWritePreserve { o.cacheSetTime(o.Cache.LastReviewPollAt, &o.Cache.lastReviewPollOrder, key, now) } } @@ -673,7 +674,7 @@ func applyStoredFailedLogTails(obs *ports.SCMObservation, checks []domain.PullRe return true } -func (o *Observer) refreshReviews(ctx context.Context, subjects map[string]*subject, observations map[string]ports.SCMObservation, subjectsByPR map[string]*subject, reviewRefreshed, localOnlyObservations, reviewStale map[string]bool, now time.Time) { +func (o *Observer) refreshReviews(ctx context.Context, subjects map[string]*subject, observations map[string]ports.SCMObservation, subjectsByPR map[string]*subject, reviewModes map[string]ports.ReviewWriteMode, localOnlyObservations, reviewStale map[string]bool, now time.Time) { for _, s := range subjects { if !s.hasPR || s.known.Number <= 0 { continue @@ -712,10 +713,15 @@ func (o *Observer) refreshReviews(ctx context.Context, subjects map[string]*subj obs.Review.Decision = review.Decision } obs.Review.Threads = review.Threads + obs.Review.Partial = review.Partial obs.ObservedAt = now observations[pkey] = obs subjectsByPR[pkey] = s - reviewRefreshed[pkey] = true + if review.Partial { + reviewModes[pkey] = ports.ReviewWriteMerge + } else { + reviewModes[pkey] = ports.ReviewWriteReplace + } if _, ok := o.Cache.ReviewRefreshFailed[pkey]; ok { o.Cache.ReviewRefreshFailed[pkey] = false } @@ -1032,7 +1038,12 @@ func ciSemanticHash(ci ports.SCMCIObservation) string { } func reviewSemanticHash(review ports.SCMReviewObservation) string { - return stableHash(review) + type reviewHashPayload struct { + Decision string + Threads []ports.SCMReviewThreadObservation + Partial bool `json:",omitempty"` + } + return stableHash(reviewHashPayload{Decision: review.Decision, Threads: review.Threads, Partial: review.Partial}) } func threadSemanticHash(th ports.SCMReviewThreadObservation) string { diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go index ceab274c..5fd382ae 100644 --- a/backend/internal/observe/scm/observer_test.go +++ b/backend/internal/observe/scm/observer_test.go @@ -36,11 +36,11 @@ type fakeStore struct { } type fakeWrite struct { - pr domain.PullRequest - checks []domain.PullRequestCheck - threads []domain.PullRequestReviewThread - comments []domain.PullRequestComment - replaceReview bool + pr domain.PullRequest + checks []domain.PullRequestCheck + threads []domain.PullRequestReviewThread + comments []domain.PullRequestComment + reviewMode ports.ReviewWriteMode } func (s *fakeStore) ListAllSessions(ctx context.Context) ([]domain.SessionRecord, error) { @@ -82,13 +82,13 @@ func (s *fakeStore) ListChecks(_ context.Context, prURL string) ([]domain.PullRe return append([]domain.PullRequestCheck(nil), s.checks[prURL]...), nil } -func (s *fakeStore) WriteSCMObservation(_ context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceReview bool) error { +func (s *fakeStore) WriteSCMObservation(_ context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, reviewMode ports.ReviewWriteMode) error { s.mu.Lock() defer s.mu.Unlock() if s.writeErr != nil { return s.writeErr } - s.writes = append(s.writes, fakeWrite{pr: pr, checks: append([]domain.PullRequestCheck(nil), checks...), threads: append([]domain.PullRequestReviewThread(nil), threads...), comments: append([]domain.PullRequestComment(nil), comments...), replaceReview: replaceReview}) + s.writes = append(s.writes, fakeWrite{pr: pr, checks: append([]domain.PullRequestCheck(nil), checks...), threads: append([]domain.PullRequestReviewThread(nil), threads...), comments: append([]domain.PullRequestComment(nil), comments...), reviewMode: reviewMode}) return nil } @@ -190,7 +190,7 @@ func (l *fakeLifecycle) ApplySCMObservation(_ context.Context, _ domain.SessionI return nil } -func newTestObserver(store *fakeStore, provider *fakeProvider, lc *fakeLifecycle, now time.Time) *Observer { +func newTestObserver(store *fakeStore, provider *fakeProvider, lc Lifecycle, now time.Time) *Observer { return New(provider, store, lc, Config{Clock: func() time.Time { return now }, Tick: time.Hour, Logger: quietSlog(), CacheMax: 128}) } @@ -405,8 +405,8 @@ func TestPoll_ReviewPollingRespectsInterval(t *testing.T) { if provider.reviewCalls != 1 { t.Fatalf("review not fetched after interval: %d", provider.reviewCalls) } - if len(store.writes) == 0 || !store.writes[0].replaceReview { - t.Fatalf("review refresh not persisted with replaceReview: %#v", store.writes) + if len(store.writes) == 0 || store.writes[0].reviewMode != ports.ReviewWriteReplace { + t.Fatalf("review refresh not persisted with replace mode: %#v", store.writes) } } @@ -451,6 +451,37 @@ func TestPoll_ReviewHashDrivesPersistenceAndLifecycle(t *testing.T) { } } +func TestPoll_PartialReviewRefreshUsesMergeMode(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + local.ReviewHash = "old" + local.Review = domain.ReviewChangesRequest + store.prs["p-1"] = []domain.PullRequest{local} + review := ports.SCMReviewObservation{ + Decision: string(domain.ReviewChangesRequest), + Partial: true, + Threads: []ports.SCMReviewThreadObservation{{ID: "t1", Path: "f.go", Line: 2, Comments: []ports.SCMReviewCommentObservation{{ID: "c1", Author: "ann", Body: "fix this"}}}}, + } + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, + reviews: map[string]ports.SCMReviewObservation{prKey(testRepo, 1): review}, + } + obs := newTestObserver(store, provider, nil, time.Unix(210, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(store.writes) != 1 { + t.Fatalf("writes = %#v, want one partial review merge", store.writes) + } + if store.writes[0].reviewMode != ports.ReviewWriteMerge { + t.Fatalf("review mode = %v, want merge", store.writes[0].reviewMode) + } + if store.writes[0].pr.ReviewHash != reviewSemanticHash(review) { + t.Fatalf("review hash = %q, want partial hash %q", store.writes[0].pr.ReviewHash, reviewSemanticHash(review)) + } +} + func TestPoll_ReviewOnlyRefreshPreservesLocalCIAndMetadata(t *testing.T) { store := testStoreWithSession() localObs := testObs(1) diff --git a/backend/internal/ports/outbound.go b/backend/internal/ports/outbound.go index 414a0218..efab1ea7 100644 --- a/backend/internal/ports/outbound.go +++ b/backend/internal/ports/outbound.go @@ -15,11 +15,27 @@ type PRWriter interface { WritePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, comments []domain.PullRequestComment) error } -// SCMWriter records provider-neutral SCM observations. Review facts are only -// replaced when replaceReview is true so metadata/CI-only refreshes preserve the -// last known review threads/comments. +// ReviewWriteMode describes how an SCM observation should update normalized +// review-thread/comment rows. +type ReviewWriteMode int + +const ( + // ReviewWritePreserve leaves stored review rows untouched. Metadata/CI-only + // refreshes and failed review fetches use this mode. + ReviewWritePreserve ReviewWriteMode = iota + // ReviewWriteReplace treats the fetched review rows as a complete snapshot + // and replaces all stored review rows for the PR. + ReviewWriteReplace + // ReviewWriteMerge treats the fetched review rows as a partial window: + // fetched threads/comments are updated while older unseen rows are preserved. + ReviewWriteMerge +) + +// SCMWriter records provider-neutral SCM observations. reviewMode decides +// whether review facts are preserved, replaced with a complete snapshot, or +// merged as a bounded partial window. type SCMWriter interface { - WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceReview bool) error + WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, reviewMode ReviewWriteMode) error } // AgentMessenger injects a message into a running agent. diff --git a/backend/internal/ports/scm_observations.go b/backend/internal/ports/scm_observations.go index 03ba0dc3..27d01135 100644 --- a/backend/internal/ports/scm_observations.go +++ b/backend/internal/ports/scm_observations.go @@ -175,6 +175,10 @@ type SCMReviewObservation struct { Decision string // Threads contains normalized review threads fetched on the slower review cadence. Threads []SCMReviewThreadObservation + // Partial is true when the provider intentionally fetched and persisted a + // bounded review-thread window instead of a complete PR-lifetime snapshot. + // Consumers should treat Threads as a merge/update set in that case. + Partial bool } // SCMReviewThreadObservation is a normalized review thread with comments. diff --git a/backend/internal/storage/sqlite/gen/pr_comment.sql.go b/backend/internal/storage/sqlite/gen/pr_comment.sql.go index b87bf173..43821fdb 100644 --- a/backend/internal/storage/sqlite/gen/pr_comment.sql.go +++ b/backend/internal/storage/sqlite/gen/pr_comment.sql.go @@ -19,6 +19,20 @@ func (q *Queries) DeletePRComments(ctx context.Context, prUrl string) error { return err } +const deletePRCommentsByThread = `-- name: DeletePRCommentsByThread :exec +DELETE FROM pr_comment WHERE pr_url = ? AND thread_id = ? +` + +type DeletePRCommentsByThreadParams struct { + PRURL string + ThreadID string +} + +func (q *Queries) DeletePRCommentsByThread(ctx context.Context, arg DeletePRCommentsByThreadParams) error { + _, err := q.db.ExecContext(ctx, deletePRCommentsByThread, arg.PRURL, arg.ThreadID) + return err +} + const insertPRComment = `-- name: InsertPRComment :exec INSERT INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) diff --git a/backend/internal/storage/sqlite/gen/pr_review_threads.sql.go b/backend/internal/storage/sqlite/gen/pr_review_threads.sql.go index a9b67273..0f2eda25 100644 --- a/backend/internal/storage/sqlite/gen/pr_review_threads.sql.go +++ b/backend/internal/storage/sqlite/gen/pr_review_threads.sql.go @@ -79,6 +79,7 @@ type UpsertPRReviewThreadParams struct { UpdatedAt time.Time } +// Summary: SQLC queries for replacing and reading normalized PR review threads. func (q *Queries) UpsertPRReviewThread(ctx context.Context, arg UpsertPRReviewThreadParams) error { _, err := q.db.ExecContext(ctx, upsertPRReviewThread, arg.PRURL, diff --git a/backend/internal/storage/sqlite/queries/pr_comment.sql b/backend/internal/storage/sqlite/queries/pr_comment.sql index c180c97a..c7844c23 100644 --- a/backend/internal/storage/sqlite/queries/pr_comment.sql +++ b/backend/internal/storage/sqlite/queries/pr_comment.sql @@ -5,6 +5,9 @@ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); -- name: DeletePRComments :exec DELETE FROM pr_comment WHERE pr_url = ?; +-- name: DeletePRCommentsByThread :exec +DELETE FROM pr_comment WHERE pr_url = ? AND thread_id = ?; + -- name: ListPRComments :many SELECT pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot FROM pr_comment WHERE pr_url = ? ORDER BY created_at, comment_id; diff --git a/backend/internal/storage/sqlite/store/pr_store.go b/backend/internal/storage/sqlite/store/pr_store.go index 1519cb82..88ba9706 100644 --- a/backend/internal/storage/sqlite/store/pr_store.go +++ b/backend/internal/storage/sqlite/store/pr_store.go @@ -35,18 +35,19 @@ var ( // not touch pr_review_threads: those rows are owned by WriteSCMObservation's // slower review-thread refresh path. func (s *Store) WritePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, comments []domain.PullRequestComment) error { - return s.writePR(ctx, pr, checks, nil, comments, false, true) + return s.writePR(ctx, pr, checks, nil, comments, ports.ReviewWritePreserve, true) } // WriteSCMObservation persists a provider-neutral SCM observation in one write // transaction. It upserts the full PR metadata row and CI checks. Review threads -// and comments are replaced only when replaceReview is true, because review -// polling runs at a slower cadence than metadata/CI polling. -func (s *Store) WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceReview bool) error { - return s.writePR(ctx, pr, checks, threads, comments, replaceReview, replaceReview) +// and comments are preserved, replaced, or merged according to reviewMode +// because review polling runs at a slower and sometimes intentionally bounded +// cadence than metadata/CI polling. +func (s *Store) WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, reviewMode ports.ReviewWriteMode) error { + return s.writePR(ctx, pr, checks, threads, comments, reviewMode, false) } -func (s *Store) writePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, replaceThreads, replaceComments bool) error { +func (s *Store) writePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, reviewMode ports.ReviewWriteMode, replaceLegacyComments bool) error { s.writeMu.Lock() defer s.writeMu.Unlock() return s.inTx(ctx, "write pr observation", func(q *gen.Queries) error { @@ -65,24 +66,31 @@ func (s *Store) writePR(ctx context.Context, pr domain.PullRequest, checks []dom return err } } - if replaceThreads { + if reviewMode == ports.ReviewWriteReplace { if err := q.DeletePRReviewThreads(ctx, pr.URL); err != nil { return err } } - if replaceComments { + if reviewMode == ports.ReviewWriteReplace || replaceLegacyComments { if err := q.DeletePRComments(ctx, pr.URL); err != nil { return err } } - if replaceThreads { + if reviewMode == ports.ReviewWriteReplace || reviewMode == ports.ReviewWriteMerge { for _, th := range threads { if err := q.UpsertPRReviewThread(ctx, genReviewThreadParams(pr.URL, th)); err != nil { return fmt.Errorf("review thread %q: %w", th.ThreadID, err) } } } - if replaceComments { + if reviewMode == ports.ReviewWriteMerge { + for _, threadID := range reviewThreadIDs(threads, comments) { + if err := q.DeletePRCommentsByThread(ctx, gen.DeletePRCommentsByThreadParams{PRURL: pr.URL, ThreadID: threadID}); err != nil { + return fmt.Errorf("delete comments for review thread %q: %w", threadID, err) + } + } + } + if reviewMode == ports.ReviewWriteReplace || reviewMode == ports.ReviewWriteMerge || replaceLegacyComments { for _, c := range comments { if err := q.InsertPRComment(ctx, genCommentParams(pr.URL, c)); err != nil { return fmt.Errorf("comment %q: %w", c.ID, err) @@ -93,6 +101,26 @@ func (s *Store) writePR(ctx context.Context, pr domain.PullRequest, checks []dom }) } +func reviewThreadIDs(threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment) []string { + seen := map[string]bool{} + out := make([]string, 0, len(threads)) + for _, th := range threads { + if th.ThreadID == "" || seen[th.ThreadID] { + continue + } + seen[th.ThreadID] = true + out = append(out, th.ThreadID) + } + for _, c := range comments { + if c.ThreadID == "" || seen[c.ThreadID] { + continue + } + seen[c.ThreadID] = true + out = append(out, c.ThreadID) + } + return out +} + // GetPR returns the PR facts for a URL, or ok=false if absent. func (s *Store) GetPR(ctx context.Context, url string) (domain.PullRequest, bool, error) { p, err := s.qr.GetPR(ctx, url) diff --git a/backend/internal/storage/sqlite/store/store_test.go b/backend/internal/storage/sqlite/store/store_test.go index b94dab02..d696c5d3 100644 --- a/backend/internal/storage/sqlite/store/store_test.go +++ b/backend/internal/storage/sqlite/store/store_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" "github.com/aoagents/agent-orchestrator/backend/internal/storage/sqlite" ) @@ -239,7 +240,7 @@ func TestWriteSCMObservationPersistsMetadataChecksReviewsAndComments(t *testing. threads := []domain.PullRequestReviewThread{{ThreadID: "t1", Path: "main.go", Line: 7, SemanticHash: "th", UpdatedAt: now}} comments := []domain.PullRequestComment{{ThreadID: "t1", ID: "c1", Author: "reviewer", File: "main.go", Line: 7, Body: "fix", URL: "comment", CreatedAt: now}} - if err := s.WriteSCMObservation(ctx, pr, checks, threads, comments, true); err != nil { + if err := s.WriteSCMObservation(ctx, pr, checks, threads, comments, ports.ReviewWriteReplace); err != nil { t.Fatal(err) } got, ok, err := s.GetPR(ctx, pr.URL) @@ -263,6 +264,75 @@ func TestWriteSCMObservationPersistsMetadataChecksReviewsAndComments(t *testing. } } +func TestWriteSCMObservationMergeUpdatesFetchedReviewWindow(t *testing.T) { + s := newTestStore(t) + ctx := context.Background() + seedProject(t, s, "mer") + r, _ := s.CreateSession(ctx, sampleRecord("mer")) + now := time.Now().UTC().Truncate(time.Second) + pr := domain.PullRequest{URL: "https://github.com/o/r/pull/1", SessionID: r.ID, Number: 1, UpdatedAt: now} + + initialThreads := []domain.PullRequestReviewThread{ + {ThreadID: "older", Path: "old.go", Line: 1, Resolved: false, SemanticHash: "old", UpdatedAt: now}, + {ThreadID: "latest", Path: "main.go", Line: 7, Resolved: false, SemanticHash: "latest-v1", UpdatedAt: now}, + } + initialComments := []domain.PullRequestComment{ + {ThreadID: "older", ID: "older-c1", Author: "ann", Body: "old", CreatedAt: now}, + {ThreadID: "latest", ID: "latest-c1", Author: "bob", Body: "before", CreatedAt: now}, + } + if err := s.WriteSCMObservation(ctx, pr, nil, initialThreads, initialComments, ports.ReviewWriteReplace); err != nil { + t.Fatal(err) + } + + mergedThreads := []domain.PullRequestReviewThread{ + {ThreadID: "latest", Path: "main.go", Line: 8, Resolved: true, SemanticHash: "latest-v2", UpdatedAt: now.Add(time.Second)}, + {ThreadID: "new", Path: "new.go", Line: 2, Resolved: false, SemanticHash: "new", UpdatedAt: now.Add(time.Second)}, + } + mergedComments := []domain.PullRequestComment{ + {ThreadID: "latest", ID: "latest-c2", Author: "bob", Body: "after", CreatedAt: now.Add(time.Second)}, + {ThreadID: "new", ID: "new-c1", Author: "cat", Body: "new", CreatedAt: now.Add(time.Second)}, + } + if err := s.WriteSCMObservation(ctx, pr, nil, mergedThreads, mergedComments, ports.ReviewWriteMerge); err != nil { + t.Fatal(err) + } + + gotThreads, err := s.ListPRReviewThreads(ctx, pr.URL) + if err != nil { + t.Fatal(err) + } + if len(gotThreads) != 3 { + t.Fatalf("threads = %#v, want older preserved plus latest/new", gotThreads) + } + byThread := map[string]domain.PullRequestReviewThread{} + for _, th := range gotThreads { + byThread[th.ThreadID] = th + } + if byThread["older"].SemanticHash != "old" { + t.Fatalf("older thread not preserved: %#v", byThread["older"]) + } + if byThread["latest"].SemanticHash != "latest-v2" || !byThread["latest"].Resolved || byThread["latest"].Line != 8 { + t.Fatalf("latest thread not updated: %#v", byThread["latest"]) + } + if byThread["new"].SemanticHash != "new" { + t.Fatalf("new thread not inserted: %#v", byThread["new"]) + } + + gotComments, err := s.ListPRComments(ctx, pr.URL) + if err != nil { + t.Fatal(err) + } + ids := map[string]bool{} + for _, c := range gotComments { + ids[c.ID] = true + } + if !ids["older-c1"] || !ids["latest-c2"] || !ids["new-c1"] { + t.Fatalf("comments after merge = %#v, want older preserved and fetched threads replaced", gotComments) + } + if ids["latest-c1"] { + t.Fatalf("stale fetched-thread comment was preserved: %#v", gotComments) + } +} + func TestWritePRPreservesSCMReviewThreads(t *testing.T) { s := newTestStore(t) ctx := context.Background() @@ -272,7 +342,7 @@ func TestWritePRPreservesSCMReviewThreads(t *testing.T) { pr := domain.PullRequest{URL: "https://github.com/o/r/pull/1", SessionID: r.ID, Number: 1, UpdatedAt: now} threads := []domain.PullRequestReviewThread{{ThreadID: "t1", Path: "main.go", Line: 7, SemanticHash: "thread-v1", UpdatedAt: now}} - if err := s.WriteSCMObservation(ctx, pr, nil, threads, nil, true); err != nil { + if err := s.WriteSCMObservation(ctx, pr, nil, threads, nil, ports.ReviewWriteReplace); err != nil { t.Fatal(err) } if err := s.WritePR(ctx, domain.PullRequest{URL: pr.URL, SessionID: r.ID, Number: 1, CI: domain.CIPassing, UpdatedAt: now.Add(time.Second)}, nil, nil); err != nil { From 314c1fda72be637e81ee93eed1cd522512bbd240 Mon Sep 17 00:00:00 2001 From: whoisasx Date: Wed, 3 Jun 2026 15:48:12 +0530 Subject: [PATCH 12/20] chore: remove scm observer no-op code --- .../adapters/scm/github/observer_provider.go | 18 +-------- .../adapters/scm/github/provider_test.go | 6 --- backend/internal/observe/scm/observer.go | 37 +++++++------------ backend/internal/observe/scm/observer_test.go | 3 +- 4 files changed, 15 insertions(+), 49 deletions(-) diff --git a/backend/internal/adapters/scm/github/observer_provider.go b/backend/internal/adapters/scm/github/observer_provider.go index c733b447..6654dff4 100644 --- a/backend/internal/adapters/scm/github/observer_provider.go +++ b/backend/internal/adapters/scm/github/observer_provider.go @@ -205,7 +205,6 @@ type restListPull struct { } `json:"base"` User struct { Login string `json:"login"` - Type string `json:"type"` } `json:"user"` CreatedAt string `json:"created_at"` UpdatedAt string `json:"updated_at"` @@ -251,7 +250,7 @@ func scmPRFields() string { number url state isDraft merged closed title additions deletions changedFiles mergeable mergeStateStatus reviewDecision headRefName headRefOid baseRefName baseRefOid createdAt updatedAt mergedAt closedAt -author{ login __typename } +author{ login } mergeCommit{ oid } commits(last:1){ nodes{ commit{ oid statusCheckRollup{ state contexts(first:CONTEXT_LIMIT){ nodes{ __typename @@ -340,10 +339,6 @@ func scmObservationFromGraphQL(ref ports.SCMPRRef, pr map[string]any) ports.SCMO failed := failedSCMChecks(checks) ci := string(ciSummaryFromRollupState(pr)) prURL := firstNonEmpty(str(pr["url"]), ref.URL) - if ci == string(domain.CIFailing) && len(failed) == 0 && scmContextsPaginated(pr) { - failed = []ports.SCMCheckObservation{paginatedFailureSCMCheck(prURL)} - checks = append(checks, failed[0]) - } review := string(reviewDecisionFromGraphQL(pr)) providerMergeable := str(pr["mergeable"]) providerMergeState := str(pr["mergeStateStatus"]) @@ -406,17 +401,6 @@ func scmContextsPaginated(pr map[string]any) bool { return pageInfoHasMore(statusContexts(pr)) } -func paginatedFailureSCMCheck(prURL string) ports.SCMCheckObservation { - const msg = "GitHub reports failing CI, but the failing context is outside AO's current GraphQL check-context page. Open the PR checks tab for the hidden failing job." - return ports.SCMCheckObservation{ - Name: "GitHub statusCheckRollup", - Status: string(domain.PRCheckFailed), - Conclusion: "failure", - URL: prURL, - LogTail: msg, - } -} - func scmChecksFromGraphQL(pr map[string]any) []ports.SCMCheckObservation { roll := statusRollup(pr) contexts, _ := roll["contexts"].(map[string]any) diff --git a/backend/internal/adapters/scm/github/provider_test.go b/backend/internal/adapters/scm/github/provider_test.go index 32e04b9e..20159aab 100644 --- a/backend/internal/adapters/scm/github/provider_test.go +++ b/backend/internal/adapters/scm/github/provider_test.go @@ -1163,12 +1163,6 @@ func TestSCMObservationUsesRollupStateWhenContextsPaginated(t *testing.T) { if obs.CI.Summary != string(domain.CIFailing) { t.Fatalf("observer CI summary = %q, want failing from aggregate rollup state", obs.CI.Summary) } - if len(obs.CI.FailedChecks) != 1 { - t.Fatalf("failed checks should include a synthetic paginated failure, got %#v", obs.CI.FailedChecks) - } - if obs.CI.FailedChecks[0].Name != "GitHub statusCheckRollup" || !strings.Contains(obs.CI.FailedChecks[0].LogTail, "outside AO's current GraphQL check-context page") { - t.Fatalf("synthetic paginated failure missing actionable details: %#v", obs.CI.FailedChecks[0]) - } } func TestSCMMergeabilityBlocksReviewRequiredAndDraft(t *testing.T) { diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go index a95751e8..0f7f2c09 100644 --- a/backend/internal/observe/scm/observer.go +++ b/backend/internal/observe/scm/observer.go @@ -82,8 +82,6 @@ type ObserverCache struct { RepoPRListETag map[string]string // CommitChecksETag maps repo+commit keys to the last check-runs ETag. CommitChecksETag map[string]string - // ReviewETag is reserved for provider review-thread ETags when supported. - ReviewETag map[string]string // LastReviewPollAt maps PR keys to the last review-thread fetch timestamp. LastReviewPollAt map[string]time.Time // ReviewRefreshFailed marks PRs whose review-thread refresh failed; the @@ -108,7 +106,6 @@ func newCache(maxEntries int) ObserverCache { return ObserverCache{ RepoPRListETag: map[string]string{}, CommitChecksETag: map[string]string{}, - ReviewETag: map[string]string{}, LastReviewPollAt: map[string]time.Time{}, ReviewRefreshFailed: map[string]bool{}, max: maxEntries, @@ -187,13 +184,11 @@ func (o *Observer) loop(ctx context.Context, done chan<- struct{}) { } type subject struct { - session domain.SessionRecord - project domain.ProjectRecord - repo ports.SCMRepo - branch string - known domain.PullRequest - hasPR bool - newlyDetected bool + session domain.SessionRecord + repo ports.SCMRepo + branch string + known domain.PullRequest + hasPR bool } type repoGuardState struct { @@ -347,8 +342,7 @@ func (o *Observer) Poll(ctx context.Context) error { // changed hashes at their local values until lifecycle succeeds; if the // daemon restarts after a lifecycle failure, the stale hashes force the // same observation to be fetched and delivered again. - holdHashes := o.lifecycle != nil - if holdHashes { + if o.lifecycle != nil { pendingOpts := opts if prepared.Changed.Metadata { pendingOpts.preserveLocalMetadataHash = true @@ -372,12 +366,10 @@ func (o *Observer) Poll(ctx context.Context) error { markRepoRefreshFailed(subj.repo) continue } - if holdHashes { - if err := o.store.WriteSCMObservation(ctx, finalPR, finalChecks, finalThreads, finalComments, reviewMode); err != nil { - o.logger.Error("scm observer: DB lifecycle acknowledgement failed", "session", subj.session.ID, "pr", finalPR.URL, "err", err) - markRepoRefreshFailed(subj.repo) - continue - } + if err := o.store.WriteSCMObservation(ctx, finalPR, finalChecks, finalThreads, finalComments, reviewMode); err != nil { + o.logger.Error("scm observer: DB lifecycle acknowledgement failed", "session", subj.session.ID, "pr", finalPR.URL, "err", err) + markRepoRefreshFailed(subj.repo) + continue } } prRefreshOK[key] = true @@ -466,7 +458,7 @@ func (o *Observer) discoverSubjects(ctx context.Context) (map[string]*subject, e return nil, err } known, hasPR := chooseKnownPR(prs) - s := &subject{session: sess, project: proj, repo: repo, branch: branch, known: known, hasPR: hasPR} + s := &subject{session: sess, repo: repo, branch: branch, known: known, hasPR: hasPR} if hasPR && known.Number > 0 { key := prKey(repo, known.Number) if existing, ok := out[key]; ok { @@ -557,7 +549,6 @@ func (o *Observer) detectMissingPRs(ctx context.Context, subjects map[string]*su } s.known = domain.PullRequest{URL: pr.URL, SessionID: s.session.ID, Number: pr.Number, SourceBranch: pr.SourceBranch, TargetBranch: pr.TargetBranch, HeadSHA: pr.HeadSHA, Provider: s.repo.Provider, Host: s.repo.Host, Repo: repoFullName(s.repo), UpdatedAt: now} s.hasPR = true - s.newlyDetected = true delete(subjects, oldKey) subjects[newKey] = s } @@ -569,14 +560,13 @@ func (o *Observer) selectRefreshCandidates(ctx context.Context, subjects map[str commitETags: map[string]pendingCacheString{}, candidateKeys: map[string]bool{}, } - seen := map[string]bool{} for _, s := range subjects { if !s.hasPR || s.known.Number <= 0 { continue } key := prKey(s.repo, s.known.Number) selection.subjectsByPR[key] = s - candidate := s.newlyDetected || missingLocalState(s.known) + candidate := missingLocalState(s.known) g := guards[prKey(s.repo, 0)] if g.err == nil && !g.result.NotModified { candidate = true @@ -597,10 +587,9 @@ func (o *Observer) selectRefreshCandidates(ctx context.Context, subjects map[str } } } - if candidate && !seen[key] { + if candidate { selection.refs = append(selection.refs, ports.SCMPRRef{Repo: s.repo, Number: s.known.Number, URL: s.known.URL}) selection.candidateKeys[key] = true - seen[key] = true } } return selection diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go index 5fd382ae..fe026d8b 100644 --- a/backend/internal/observe/scm/observer_test.go +++ b/backend/internal/observe/scm/observer_test.go @@ -38,7 +38,6 @@ type fakeStore struct { type fakeWrite struct { pr domain.PullRequest checks []domain.PullRequestCheck - threads []domain.PullRequestReviewThread comments []domain.PullRequestComment reviewMode ports.ReviewWriteMode } @@ -88,7 +87,7 @@ func (s *fakeStore) WriteSCMObservation(_ context.Context, pr domain.PullRequest if s.writeErr != nil { return s.writeErr } - s.writes = append(s.writes, fakeWrite{pr: pr, checks: append([]domain.PullRequestCheck(nil), checks...), threads: append([]domain.PullRequestReviewThread(nil), threads...), comments: append([]domain.PullRequestComment(nil), comments...), reviewMode: reviewMode}) + s.writes = append(s.writes, fakeWrite{pr: pr, checks: append([]domain.PullRequestCheck(nil), checks...), comments: append([]domain.PullRequestComment(nil), comments...), reviewMode: reviewMode}) return nil } From 691077c7b125d1bfe45d27fd6b8c8f58f69517ba Mon Sep 17 00:00:00 2001 From: whoisasx Date: Wed, 3 Jun 2026 16:01:24 +0530 Subject: [PATCH 13/20] fix: tighten scm bot and log-tail handling --- .../adapters/scm/github/observer_provider.go | 9 +++-- .../adapters/scm/github/provider_test.go | 33 +++++++++++++++++++ backend/internal/observe/scm/observer.go | 2 +- backend/internal/observe/scm/observer_test.go | 28 ++++++++++++++++ 4 files changed, 68 insertions(+), 4 deletions(-) diff --git a/backend/internal/adapters/scm/github/observer_provider.go b/backend/internal/adapters/scm/github/observer_provider.go index 6654dff4..2a463bf5 100644 --- a/backend/internal/adapters/scm/github/observer_provider.go +++ b/backend/internal/adapters/scm/github/observer_provider.go @@ -551,11 +551,13 @@ func scmThreadFromGraphQL(th map[string]any) ports.SCMReviewThreadObservation { Resolved: boolv(th["isResolved"]), } comments, _ := th["comments"].(map[string]any) - for _, cn := range nodes(comments["nodes"]) { + commentNodes := nodes(comments["nodes"]) + allCommentsBot := len(commentNodes) > 0 + for _, cn := range commentNodes { author, _ := cn["author"].(map[string]any) isBot := isBotAuthor(author) - if isBot { - out.IsBot = true + if !isBot { + allCommentsBot = false } out.Comments = append(out.Comments, ports.SCMReviewCommentObservation{ ID: str(cn["id"]), @@ -565,6 +567,7 @@ func scmThreadFromGraphQL(th map[string]any) ports.SCMReviewThreadObservation { IsBot: isBot, }) } + out.IsBot = allCommentsBot return out } diff --git a/backend/internal/adapters/scm/github/provider_test.go b/backend/internal/adapters/scm/github/provider_test.go index 20159aab..a4d58cc8 100644 --- a/backend/internal/adapters/scm/github/provider_test.go +++ b/backend/internal/adapters/scm/github/provider_test.go @@ -1144,6 +1144,39 @@ func TestSCMChecksFromGraphQL_StatusContextUsesState(t *testing.T) { } } +func TestSCMThreadFromGraphQLMarksThreadBotOnlyWhenAllCommentsAreBots(t *testing.T) { + mixed := scmThreadFromGraphQL(map[string]any{ + "id": "T-mixed", + "path": "main.go", + "line": float64(12), + "isResolved": false, + "comments": map[string]any{"nodes": []any{ + map[string]any{"id": "C-human", "body": "please fix", "author": map[string]any{"login": "alice", "__typename": "User"}}, + map[string]any{"id": "C-bot", "body": "automated note", "author": map[string]any{"login": "review-bot", "__typename": "Bot"}}, + }}, + }) + if mixed.IsBot { + t.Fatalf("mixed human+bot thread marked as bot: %+v", mixed) + } + if len(mixed.Comments) != 2 || mixed.Comments[0].IsBot || !mixed.Comments[1].IsBot { + t.Fatalf("comment bot flags not preserved on mixed thread: %+v", mixed.Comments) + } + + allBot := scmThreadFromGraphQL(map[string]any{ + "id": "T-bot", + "path": "main.go", + "line": float64(12), + "isResolved": false, + "comments": map[string]any{"nodes": []any{ + map[string]any{"id": "C-bot-1", "body": "automated note", "author": map[string]any{"login": "review-bot", "__typename": "Bot"}}, + map[string]any{"id": "C-bot-2", "body": "more automation", "author": map[string]any{"login": "other-bot", "__typename": "Bot"}}, + }}, + }) + if !allBot.IsBot { + t.Fatalf("all-bot thread not marked as bot: %+v", allBot) + } +} + func TestSCMObservationUsesRollupStateWhenContextsPaginated(t *testing.T) { fx := basePRFixture() var pr map[string]any diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go index 0f7f2c09..53c7e874 100644 --- a/backend/internal/observe/scm/observer.go +++ b/backend/internal/observe/scm/observer.go @@ -612,7 +612,7 @@ func (o *Observer) enrichFailureLogs(ctx context.Context, obs *ports.SCMObservat tails := make([]string, 0, len(obs.CI.FailedChecks)) for i := range obs.CI.FailedChecks { tail := obs.CI.FailedChecks[i].LogTail - if obs.CI.FailedChecks[i].ProviderID != "" || tail == "" { + if tail == "" && obs.CI.FailedChecks[i].ProviderID != "" { var err error tail, err = o.provider.FetchFailedCheckLogTail(ctx, ports.SCMRepo{Provider: obs.Provider, Host: obs.Host, Repo: obs.Repo, Owner: ownerOf(obs.Repo), Name: nameOf(obs.Repo)}, obs.CI.FailedChecks[i]) if err != nil { diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go index fe026d8b..4a865a31 100644 --- a/backend/internal/observe/scm/observer_test.go +++ b/backend/internal/observe/scm/observer_test.go @@ -381,6 +381,34 @@ func TestPoll_FailingCIFetchesLogTailOnlyWhenFingerprintChanged(t *testing.T) { } } +func TestEnrichFailureLogsDoesNotRefetchExistingTailOrMissingProviderID(t *testing.T) { + obsValue := testObs(1) + obsValue.CI.Summary = string(domain.CIFailing) + obsValue.CI.FailedFingerprint = "fp" + obsValue.CI.Checks = []ports.SCMCheckObservation{ + {Name: "build", Status: string(domain.PRCheckFailed), Conclusion: "failure", ProviderID: "99", LogTail: "provider supplied tail"}, + {Name: "lint", Status: string(domain.PRCheckFailed), Conclusion: "failure"}, + } + obsValue.CI.FailedChecks = append([]ports.SCMCheckObservation(nil), obsValue.CI.Checks...) + + provider := &fakeProvider{logTails: map[string]string{"build": "fetched tail", "lint": "should not fetch"}} + obs := newTestObserver(testStoreWithSession(), provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + obs.enrichFailureLogs(context.Background(), &obsValue, domain.PullRequest{}) + + if provider.logCalls != 0 { + t.Fatalf("log calls = %d, want 0 when tail already exists or provider id is missing", provider.logCalls) + } + if got := obsValue.CI.FailedChecks[0].LogTail; got != "provider supplied tail" { + t.Fatalf("existing tail changed: got %q", got) + } + if got := obsValue.CI.FailedChecks[1].LogTail; got != "" { + t.Fatalf("tail without provider id = %q, want empty", got) + } + if got := obsValue.CI.FailureLogTail; got != "provider supplied tail" { + t.Fatalf("FailureLogTail = %q, want only existing tail", got) + } +} + func TestPoll_ReviewPollingRespectsInterval(t *testing.T) { store := testStoreWithSession() local := knownPR(1) From 0ef2870a29e44df5dd1a59a7ebc242fdc17fa4c4 Mon Sep 17 00:00:00 2001 From: harshitsinghbhandari <24b4506@iitb.ac.in> Date: Wed, 3 Jun 2026 22:56:48 +0530 Subject: [PATCH 14/20] fix(scm): emit CDC events for pr_review_threads --- backend/internal/cdc/event.go | 12 +-- .../migrations/0003_scm_observer_schema.sql | 88 +++++++++++++++++++ .../storage/sqlite/store/pr_cdc_test.go | 65 ++++++++++++++ 3 files changed, 160 insertions(+), 5 deletions(-) diff --git a/backend/internal/cdc/event.go b/backend/internal/cdc/event.go index 571ede2d..32c651b4 100644 --- a/backend/internal/cdc/event.go +++ b/backend/internal/cdc/event.go @@ -20,11 +20,13 @@ type EventType string // Event types, one per row-change the DB triggers emit into change_log. const ( - EventSessionCreated EventType = "session_created" - EventSessionUpdated EventType = "session_updated" - EventPRCreated EventType = "pr_created" - EventPRUpdated EventType = "pr_updated" - EventPRCheckRecorded EventType = "pr_check_recorded" + EventSessionCreated EventType = "session_created" + EventSessionUpdated EventType = "session_updated" + EventPRCreated EventType = "pr_created" + EventPRUpdated EventType = "pr_updated" + EventPRCheckRecorded EventType = "pr_check_recorded" + EventPRReviewThreadAdded EventType = "pr_review_thread_added" + EventPRReviewThreadResolved EventType = "pr_review_thread_resolved" ) // Event is one CDC change read from change_log. Seq is the monotonic ordering + diff --git a/backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql b/backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql index ee57e8ba..a767aa53 100644 --- a/backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql +++ b/backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql @@ -54,8 +54,96 @@ CREATE TABLE pr_review_threads ( CREATE INDEX idx_pr_review_threads_lookup ON pr_review_threads (pr_url, updated_at); -- +goose StatementEnd +-- +goose StatementBegin +-- Widen change_log.event_type CHECK to include the new pr_review_thread_* events. +-- SQLite cannot ALTER an in-place CHECK constraint; this drop/recreate is safe +-- because change_log is append-only and the SSE broadcaster re-seeks to head on +-- restart (no consumer durably stores a change_log offset across this migration). +DROP INDEX IF EXISTS idx_change_log_project; +DROP TABLE change_log; +CREATE TABLE change_log ( + seq INTEGER PRIMARY KEY AUTOINCREMENT, + project_id TEXT NOT NULL REFERENCES projects (id), + session_id TEXT REFERENCES sessions (id), + event_type TEXT NOT NULL + CHECK (event_type IN ( + 'session_created', + 'session_updated', + 'pr_created', + 'pr_updated', + 'pr_check_recorded', + 'pr_review_thread_added', + 'pr_review_thread_resolved' + )), + payload TEXT NOT NULL CHECK (json_valid(payload)), + created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')) +); +CREATE INDEX idx_change_log_project ON change_log (project_id, seq); +-- +goose StatementEnd + +-- +goose StatementBegin +-- Emit on every new review thread the SCM observer persists, so the broadcaster +-- can stream per-thread additions instead of waiting for a rolled-up review_decision flip. +CREATE TRIGGER pr_review_threads_cdc_insert +AFTER INSERT ON pr_review_threads +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ( + (SELECT s.project_id FROM pr p JOIN sessions s ON s.id = p.session_id WHERE p.url = NEW.pr_url), + (SELECT session_id FROM pr WHERE url = NEW.pr_url), + 'pr_review_thread_added', + json_object( + 'pr', NEW.pr_url, + 'thread', NEW.thread_id, + 'path', NEW.path, + 'line', NEW.line, + 'resolved', json(CASE WHEN NEW.resolved THEN 'true' ELSE 'false' END), + 'isBot', json(CASE WHEN NEW.is_bot THEN 'true' ELSE 'false' END) + ), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +-- Emit only on resolved <-> unresolved transitions. Other thread mutations +-- (semantic_hash refresh, line shifts) are captured by the slower review-decision +-- rollup so we don't flood CDC with no-op semantic-hash updates. +CREATE TRIGGER pr_review_threads_cdc_update +AFTER UPDATE ON pr_review_threads +WHEN OLD.resolved <> NEW.resolved +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ( + (SELECT s.project_id FROM pr p JOIN sessions s ON s.id = p.session_id WHERE p.url = NEW.pr_url), + (SELECT session_id FROM pr WHERE url = NEW.pr_url), + 'pr_review_thread_resolved', + json_object( + 'pr', NEW.pr_url, + 'thread', NEW.thread_id, + 'path', NEW.path, + 'line', NEW.line, + 'resolved', json(CASE WHEN NEW.resolved THEN 'true' ELSE 'false' END) + ), + NEW.updated_at); +END; +-- +goose StatementEnd + -- +goose Down -- +goose StatementBegin +DROP TRIGGER IF EXISTS pr_review_threads_cdc_update; +DROP TRIGGER IF EXISTS pr_review_threads_cdc_insert; +DROP INDEX IF EXISTS idx_change_log_project; +DROP TABLE change_log; +CREATE TABLE change_log ( + seq INTEGER PRIMARY KEY AUTOINCREMENT, + project_id TEXT NOT NULL REFERENCES projects (id), + session_id TEXT REFERENCES sessions (id), + event_type TEXT NOT NULL + CHECK (event_type IN ('session_created', 'session_updated', 'pr_created', 'pr_updated', 'pr_check_recorded')), + payload TEXT NOT NULL CHECK (json_valid(payload)), + created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')) +); +CREATE INDEX idx_change_log_project ON change_log (project_id, seq); DROP TABLE pr_review_threads; ALTER TABLE pr_comment DROP COLUMN is_bot; ALTER TABLE pr_comment DROP COLUMN url; diff --git a/backend/internal/storage/sqlite/store/pr_cdc_test.go b/backend/internal/storage/sqlite/store/pr_cdc_test.go index 82f53b75..971dc00a 100644 --- a/backend/internal/storage/sqlite/store/pr_cdc_test.go +++ b/backend/internal/storage/sqlite/store/pr_cdc_test.go @@ -2,12 +2,14 @@ package store_test import ( "context" + "encoding/json" "strings" "testing" "time" "github.com/aoagents/agent-orchestrator/backend/internal/cdc" "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" ) // A check can change status on the same commit (in_progress -> failed) via @@ -50,6 +52,69 @@ func TestPRChecksCDC_EmitsOnInsertAndStatusUpdate(t *testing.T) { } } +func TestPRReviewThreadsCDC_EmitsOnInsertAndResolvedTransition(t *testing.T) { + s := newTestStore(t) + ctx := context.Background() + seedProject(t, s, "mer") + rec, err := s.CreateSession(ctx, sampleRecord("mer")) + if err != nil { + t.Fatal(err) + } + now := time.Now().UTC().Truncate(time.Second) + pr := domain.PullRequest{URL: "https://example/pr/9", SessionID: rec.ID, Number: 9, UpdatedAt: now} + + if err := s.WriteSCMObservation(ctx, pr, nil, []domain.PullRequestReviewThread{{ + ThreadID: "t1", Path: "main.go", Line: 7, IsBot: true, SemanticHash: "v1", UpdatedAt: now, + }}, nil, ports.ReviewWriteReplace); err != nil { + t.Fatal(err) + } + if err := s.WriteSCMObservation(ctx, pr, nil, []domain.PullRequestReviewThread{{ + ThreadID: "t1", Path: "main.go", Line: 8, Resolved: true, IsBot: true, SemanticHash: "v2", UpdatedAt: now.Add(time.Second), + }}, nil, ports.ReviewWriteMerge); err != nil { + t.Fatal(err) + } + if err := s.WriteSCMObservation(ctx, pr, nil, []domain.PullRequestReviewThread{{ + ThreadID: "t1", Path: "main.go", Line: 9, Resolved: true, IsBot: true, SemanticHash: "v3", UpdatedAt: now.Add(2 * time.Second), + }}, nil, ports.ReviewWriteMerge); err != nil { + t.Fatal(err) + } + + rows, err := s.EventsAfter(ctx, 0, 100) + if err != nil { + t.Fatal(err) + } + var added, resolved []cdc.Event + for _, r := range rows { + switch r.Type { + case cdc.EventPRReviewThreadAdded: + added = append(added, r) + case cdc.EventPRReviewThreadResolved: + resolved = append(resolved, r) + } + } + if len(added) != 1 { + t.Fatalf("want 1 review-thread added CDC event, got %d", len(added)) + } + if len(resolved) != 1 { + t.Fatalf("want 1 review-thread resolved CDC event (resolved transition only), got %d", len(resolved)) + } + + var addPayload map[string]any + if err := json.Unmarshal(added[0].Payload, &addPayload); err != nil { + t.Fatalf("added payload JSON: %v", err) + } + if addPayload["thread"] != "t1" || addPayload["isBot"] != true || addPayload["resolved"] != false { + t.Fatalf("added payload = %#v", addPayload) + } + var resolvedPayload map[string]any + if err := json.Unmarshal(resolved[0].Payload, &resolvedPayload); err != nil { + t.Fatalf("resolved payload JSON: %v", err) + } + if resolvedPayload["thread"] != "t1" || resolvedPayload["line"] != float64(8) || resolvedPayload["resolved"] != true { + t.Fatalf("resolved payload = %#v", resolvedPayload) + } +} + // WritePR persists scalar facts, checks, and comments in one tx; all three // should be queryable afterward. func TestWritePR_PersistsScalarsChecksAndComments(t *testing.T) { From 4404e14ffd49c4bd22b46c98ee18131df2b032fc Mon Sep 17 00:00:00 2001 From: harshitsinghbhandari <24b4506@iitb.ac.in> Date: Wed, 3 Jun 2026 23:00:10 +0530 Subject: [PATCH 15/20] fix(db): renumber scm observer migration --- ...{0003_scm_observer_schema.sql => 0004_scm_observer_schema.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename backend/internal/storage/sqlite/migrations/{0003_scm_observer_schema.sql => 0004_scm_observer_schema.sql} (100%) diff --git a/backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql b/backend/internal/storage/sqlite/migrations/0004_scm_observer_schema.sql similarity index 100% rename from backend/internal/storage/sqlite/migrations/0003_scm_observer_schema.sql rename to backend/internal/storage/sqlite/migrations/0004_scm_observer_schema.sql From 36f83fc360000ce8331ca8659900b347ae78d6e3 Mon Sep 17 00:00:00 2001 From: whoisasx Date: Wed, 3 Jun 2026 23:04:47 +0530 Subject: [PATCH 16/20] fix: preserve cdc triggers during scm migration --- .../migrations/0004_scm_observer_schema.sql | 230 ++++++++++++++++-- 1 file changed, 206 insertions(+), 24 deletions(-) diff --git a/backend/internal/storage/sqlite/migrations/0004_scm_observer_schema.sql b/backend/internal/storage/sqlite/migrations/0004_scm_observer_schema.sql index a767aa53..d29dc683 100644 --- a/backend/internal/storage/sqlite/migrations/0004_scm_observer_schema.sql +++ b/backend/internal/storage/sqlite/migrations/0004_scm_observer_schema.sql @@ -40,28 +40,18 @@ ALTER TABLE pr_comment ADD COLUMN thread_id TEXT NOT NULL DEFAULT ''; ALTER TABLE pr_comment ADD COLUMN url TEXT NOT NULL DEFAULT ''; ALTER TABLE pr_comment ADD COLUMN is_bot INTEGER NOT NULL DEFAULT 0; -CREATE TABLE pr_review_threads ( - pr_url TEXT NOT NULL REFERENCES pr (url) ON DELETE CASCADE, - thread_id TEXT NOT NULL, - path TEXT NOT NULL DEFAULT '', - line INTEGER NOT NULL DEFAULT 0, - resolved INTEGER NOT NULL DEFAULT 0, - is_bot INTEGER NOT NULL DEFAULT 0, - semantic_hash TEXT NOT NULL DEFAULT '', - updated_at TIMESTAMP NOT NULL, - PRIMARY KEY (pr_url, thread_id) -); -CREATE INDEX idx_pr_review_threads_lookup ON pr_review_threads (pr_url, updated_at); --- +goose StatementEnd - --- +goose StatementBegin -- Widen change_log.event_type CHECK to include the new pr_review_thread_* events. --- SQLite cannot ALTER an in-place CHECK constraint; this drop/recreate is safe --- because change_log is append-only and the SSE broadcaster re-seeks to head on --- restart (no consumer durably stores a change_log offset across this migration). -DROP INDEX IF EXISTS idx_change_log_project; -DROP TABLE change_log; -CREATE TABLE change_log ( +-- SQLite cannot ALTER an in-place CHECK constraint. Drop CDC triggers before +-- rebuilding change_log; otherwise dropping the old table invalidates triggers +-- that still reference it. +DROP TRIGGER IF EXISTS sessions_cdc_insert; +DROP TRIGGER IF EXISTS sessions_cdc_update; +DROP TRIGGER IF EXISTS pr_cdc_insert; +DROP TRIGGER IF EXISTS pr_cdc_update; +DROP TRIGGER IF EXISTS pr_checks_cdc_insert; +DROP TRIGGER IF EXISTS pr_checks_cdc_update; + +CREATE TABLE change_log_new ( seq INTEGER PRIMARY KEY AUTOINCREMENT, project_id TEXT NOT NULL REFERENCES projects (id), session_id TEXT REFERENCES sessions (id), @@ -78,7 +68,25 @@ CREATE TABLE change_log ( payload TEXT NOT NULL CHECK (json_valid(payload)), created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')) ); +INSERT INTO change_log_new (seq, project_id, session_id, event_type, payload, created_at) +SELECT seq, project_id, session_id, event_type, payload, created_at FROM change_log; +DROP INDEX IF EXISTS idx_change_log_project; +DROP TABLE change_log; +ALTER TABLE change_log_new RENAME TO change_log; CREATE INDEX idx_change_log_project ON change_log (project_id, seq); + +CREATE TABLE pr_review_threads ( + pr_url TEXT NOT NULL REFERENCES pr (url) ON DELETE CASCADE, + thread_id TEXT NOT NULL, + path TEXT NOT NULL DEFAULT '', + line INTEGER NOT NULL DEFAULT 0, + resolved INTEGER NOT NULL DEFAULT 0, + is_bot INTEGER NOT NULL DEFAULT 0, + semantic_hash TEXT NOT NULL DEFAULT '', + updated_at TIMESTAMP NOT NULL, + PRIMARY KEY (pr_url, thread_id) +); +CREATE INDEX idx_pr_review_threads_lookup ON pr_review_threads (pr_url, updated_at); -- +goose StatementEnd -- +goose StatementBegin @@ -128,13 +136,99 @@ BEGIN END; -- +goose StatementEnd +-- +goose StatementBegin +CREATE TRIGGER sessions_cdc_insert +AFTER INSERT ON sessions +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES (NEW.project_id, NEW.id, 'session_created', + json_object('id', NEW.id, 'activity', NEW.activity_state, 'isTerminated', json(CASE WHEN NEW.is_terminated THEN 'true' ELSE 'false' END)), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER sessions_cdc_update +AFTER UPDATE ON sessions +WHEN OLD.activity_state <> NEW.activity_state + OR OLD.is_terminated <> NEW.is_terminated +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES (NEW.project_id, NEW.id, 'session_updated', + json_object('id', NEW.id, 'activity', NEW.activity_state, 'isTerminated', json(CASE WHEN NEW.is_terminated THEN 'true' ELSE 'false' END)), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_cdc_insert +AFTER INSERT ON pr +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ((SELECT project_id FROM sessions WHERE id = NEW.session_id), NEW.session_id, 'pr_created', + json_object('url', NEW.url, 'session', NEW.session_id, 'state', NEW.pr_state, + 'ci', NEW.ci_state, 'review', NEW.review_decision, 'mergeability', NEW.mergeability), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_cdc_update +AFTER UPDATE ON pr +WHEN OLD.pr_state <> NEW.pr_state + OR OLD.ci_state <> NEW.ci_state + OR OLD.review_decision <> NEW.review_decision + OR OLD.mergeability <> NEW.mergeability +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ((SELECT project_id FROM sessions WHERE id = NEW.session_id), NEW.session_id, 'pr_updated', + json_object('url', NEW.url, 'session', NEW.session_id, 'state', NEW.pr_state, + 'ci', NEW.ci_state, 'review', NEW.review_decision, 'mergeability', NEW.mergeability), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_checks_cdc_insert +AFTER INSERT ON pr_checks +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ( + (SELECT s.project_id FROM pr p JOIN sessions s ON s.id = p.session_id WHERE p.url = NEW.pr_url), + (SELECT session_id FROM pr WHERE url = NEW.pr_url), + 'pr_check_recorded', + json_object('pr', NEW.pr_url, 'name', NEW.name, 'commit', NEW.commit_hash, 'status', NEW.status), + NEW.created_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_checks_cdc_update +AFTER UPDATE ON pr_checks +WHEN OLD.status <> NEW.status +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ( + (SELECT s.project_id FROM pr p JOIN sessions s ON s.id = p.session_id WHERE p.url = NEW.pr_url), + (SELECT session_id FROM pr WHERE url = NEW.pr_url), + 'pr_check_recorded', + json_object('pr', NEW.pr_url, 'name', NEW.name, 'commit', NEW.commit_hash, 'status', NEW.status), + datetime('now')); +END; +-- +goose StatementEnd + -- +goose Down -- +goose StatementBegin DROP TRIGGER IF EXISTS pr_review_threads_cdc_update; DROP TRIGGER IF EXISTS pr_review_threads_cdc_insert; -DROP INDEX IF EXISTS idx_change_log_project; -DROP TABLE change_log; -CREATE TABLE change_log ( +DROP TRIGGER IF EXISTS sessions_cdc_insert; +DROP TRIGGER IF EXISTS sessions_cdc_update; +DROP TRIGGER IF EXISTS pr_cdc_insert; +DROP TRIGGER IF EXISTS pr_cdc_update; +DROP TRIGGER IF EXISTS pr_checks_cdc_insert; +DROP TRIGGER IF EXISTS pr_checks_cdc_update; + +CREATE TABLE change_log_old ( seq INTEGER PRIMARY KEY AUTOINCREMENT, project_id TEXT NOT NULL REFERENCES projects (id), session_id TEXT REFERENCES sessions (id), @@ -143,7 +237,14 @@ CREATE TABLE change_log ( payload TEXT NOT NULL CHECK (json_valid(payload)), created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')) ); +INSERT INTO change_log_old (seq, project_id, session_id, event_type, payload, created_at) +SELECT seq, project_id, session_id, event_type, payload, created_at FROM change_log +WHERE event_type IN ('session_created', 'session_updated', 'pr_created', 'pr_updated', 'pr_check_recorded'); +DROP INDEX IF EXISTS idx_change_log_project; +DROP TABLE change_log; +ALTER TABLE change_log_old RENAME TO change_log; CREATE INDEX idx_change_log_project ON change_log (project_id, seq); + DROP TABLE pr_review_threads; ALTER TABLE pr_comment DROP COLUMN is_bot; ALTER TABLE pr_comment DROP COLUMN url; @@ -181,3 +282,84 @@ ALTER TABLE pr DROP COLUMN repo; ALTER TABLE pr DROP COLUMN host; ALTER TABLE pr DROP COLUMN provider; -- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER sessions_cdc_insert +AFTER INSERT ON sessions +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES (NEW.project_id, NEW.id, 'session_created', + json_object('id', NEW.id, 'activity', NEW.activity_state, 'isTerminated', json(CASE WHEN NEW.is_terminated THEN 'true' ELSE 'false' END)), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER sessions_cdc_update +AFTER UPDATE ON sessions +WHEN OLD.activity_state <> NEW.activity_state + OR OLD.is_terminated <> NEW.is_terminated +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES (NEW.project_id, NEW.id, 'session_updated', + json_object('id', NEW.id, 'activity', NEW.activity_state, 'isTerminated', json(CASE WHEN NEW.is_terminated THEN 'true' ELSE 'false' END)), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_cdc_insert +AFTER INSERT ON pr +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ((SELECT project_id FROM sessions WHERE id = NEW.session_id), NEW.session_id, 'pr_created', + json_object('url', NEW.url, 'session', NEW.session_id, 'state', NEW.pr_state, + 'ci', NEW.ci_state, 'review', NEW.review_decision, 'mergeability', NEW.mergeability), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_cdc_update +AFTER UPDATE ON pr +WHEN OLD.pr_state <> NEW.pr_state + OR OLD.ci_state <> NEW.ci_state + OR OLD.review_decision <> NEW.review_decision + OR OLD.mergeability <> NEW.mergeability +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ((SELECT project_id FROM sessions WHERE id = NEW.session_id), NEW.session_id, 'pr_updated', + json_object('url', NEW.url, 'session', NEW.session_id, 'state', NEW.pr_state, + 'ci', NEW.ci_state, 'review', NEW.review_decision, 'mergeability', NEW.mergeability), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_checks_cdc_insert +AFTER INSERT ON pr_checks +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ( + (SELECT s.project_id FROM pr p JOIN sessions s ON s.id = p.session_id WHERE p.url = NEW.pr_url), + (SELECT session_id FROM pr WHERE url = NEW.pr_url), + 'pr_check_recorded', + json_object('pr', NEW.pr_url, 'name', NEW.name, 'commit', NEW.commit_hash, 'status', NEW.status), + NEW.created_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_checks_cdc_update +AFTER UPDATE ON pr_checks +WHEN OLD.status <> NEW.status +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ( + (SELECT s.project_id FROM pr p JOIN sessions s ON s.id = p.session_id WHERE p.url = NEW.pr_url), + (SELECT session_id FROM pr WHERE url = NEW.pr_url), + 'pr_check_recorded', + json_object('pr', NEW.pr_url, 'name', NEW.name, 'commit', NEW.commit_hash, 'status', NEW.status), + datetime('now')); +END; +-- +goose StatementEnd From ea3a1d6dc9a3440f741b412db9da19c4e3d371d4 Mon Sep 17 00:00:00 2001 From: whoisasx Date: Wed, 3 Jun 2026 23:23:49 +0530 Subject: [PATCH 17/20] fix: preserve scm comments across legacy writes --- .../internal/adapters/scm/github/client.go | 4 +- backend/internal/observe/scm/observer.go | 2 +- backend/internal/observe/scm/observer_test.go | 25 +++++++++- backend/internal/ports/scm_observations.go | 9 +++- .../storage/sqlite/gen/pr_comment.sql.go | 45 ++++++++++++++++++ .../storage/sqlite/queries/pr_comment.sql | 7 +++ .../internal/storage/sqlite/store/pr_store.go | 22 ++++++++- .../storage/sqlite/store/store_test.go | 46 ++++++++++++++++++- 8 files changed, 152 insertions(+), 8 deletions(-) diff --git a/backend/internal/adapters/scm/github/client.go b/backend/internal/adapters/scm/github/client.go index c342aa06..d988b420 100644 --- a/backend/internal/adapters/scm/github/client.go +++ b/backend/internal/adapters/scm/github/client.go @@ -13,6 +13,8 @@ import ( "strings" "sync" "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/ports" ) const ( @@ -25,7 +27,7 @@ const ( // errors.Is; the orchestrator's lifecycle code is intentionally insulated // from raw HTTP status codes. var ( - ErrNotFound = errors.New("github scm: not found") + ErrNotFound = ports.ErrSCMNotFound ErrAuthFailed = errors.New("github scm: authentication failed") ErrRateLimited = errors.New("github scm: rate limited") ) diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go index 53c7e874..3f2eb3c4 100644 --- a/backend/internal/observe/scm/observer.go +++ b/backend/internal/observe/scm/observer.go @@ -534,7 +534,7 @@ func (o *Observer) detectMissingPRs(ctx context.Context, subjects map[string]*su pr, err := o.provider.DetectPRByBranch(ctx, s.repo, s.branch) if err != nil { o.logger.Debug("scm observer: no PR detected for branch", "session", s.session.ID, "branch", s.branch, "err", err) - if markRepoFailed != nil { + if markRepoFailed != nil && !errors.Is(err, ports.ErrSCMNotFound) { markRepoFailed(s.repo) } continue diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go index 4a865a31..fa0b1aec 100644 --- a/backend/internal/observe/scm/observer_test.go +++ b/backend/internal/observe/scm/observer_test.go @@ -138,7 +138,7 @@ func (p *fakeProvider) DetectPRByBranch(_ context.Context, _ ports.SCMRepo, bran p.detectCalls++ pr, ok := p.detected[branch] if !ok { - return ports.SCMPRObservation{}, errors.New("not found") + return ports.SCMPRObservation{}, ports.ErrSCMNotFound } return pr, nil } @@ -278,6 +278,29 @@ func TestPoll_RepoETag304SkipsDetectPR(t *testing.T) { } } +func TestPoll_DetectPRNotFoundCommitsRepoETag(t *testing.T) { + store := testStoreWithSession() + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "v2"}}, + detected: map[string]ports.SCMPRObservation{}, + observations: map[string]ports.SCMObservation{}, + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "v1" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.detectCalls != 1 { + t.Fatalf("detectPR calls = %d, want 1", provider.detectCalls) + } + if got := obs.Cache.RepoPRListETag[prKey(testRepo, 0)]; got != "v2" { + t.Fatalf("repo ETag after not-found detection = %q, want v2", got) + } + if len(provider.fetchBatches) != 0 { + t.Fatalf("not-found branch should not fetch PR batch: %#v", provider.fetchBatches) + } +} + func TestPoll_RepoETag200DetectsPRAndRefreshesSamePoll(t *testing.T) { store := testStoreWithSession() provider := &fakeProvider{ diff --git a/backend/internal/ports/scm_observations.go b/backend/internal/ports/scm_observations.go index 27d01135..86a44670 100644 --- a/backend/internal/ports/scm_observations.go +++ b/backend/internal/ports/scm_observations.go @@ -4,7 +4,14 @@ // GitHub payloads or GitHub-specific enum names. package ports -import "time" +import ( + "errors" + "time" +) + +// ErrSCMNotFound is the provider-neutral sentinel for successful SCM lookups +// that found no matching resource, such as a branch with no open pull request. +var ErrSCMNotFound = errors.New("scm: not found") // SCMRepo identifies a repository without assuming a provider-specific URL // shape. Repo is conventionally "owner/name" for providers that expose an diff --git a/backend/internal/storage/sqlite/gen/pr_comment.sql.go b/backend/internal/storage/sqlite/gen/pr_comment.sql.go index 43821fdb..a5dba653 100644 --- a/backend/internal/storage/sqlite/gen/pr_comment.sql.go +++ b/backend/internal/storage/sqlite/gen/pr_comment.sql.go @@ -10,6 +10,15 @@ import ( "time" ) +const deleteLegacyPRComments = `-- name: DeleteLegacyPRComments :exec +DELETE FROM pr_comment WHERE pr_url = ? AND thread_id = '' +` + +func (q *Queries) DeleteLegacyPRComments(ctx context.Context, prUrl string) error { + _, err := q.db.ExecContext(ctx, deleteLegacyPRComments, prUrl) + return err +} + const deletePRComments = `-- name: DeletePRComments :exec DELETE FROM pr_comment WHERE pr_url = ? ` @@ -33,6 +42,42 @@ func (q *Queries) DeletePRCommentsByThread(ctx context.Context, arg DeletePRComm return err } +const insertLegacyPRComment = `-- name: InsertLegacyPRComment :exec +INSERT OR IGNORE INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +` + +type InsertLegacyPRCommentParams struct { + PRURL string + CommentID string + Author string + File string + Line int64 + Body string + Resolved bool + CreatedAt time.Time + ThreadID string + URL string + IsBot int64 +} + +func (q *Queries) InsertLegacyPRComment(ctx context.Context, arg InsertLegacyPRCommentParams) error { + _, err := q.db.ExecContext(ctx, insertLegacyPRComment, + arg.PRURL, + arg.CommentID, + arg.Author, + arg.File, + arg.Line, + arg.Body, + arg.Resolved, + arg.CreatedAt, + arg.ThreadID, + arg.URL, + arg.IsBot, + ) + return err +} + const insertPRComment = `-- name: InsertPRComment :exec INSERT INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) diff --git a/backend/internal/storage/sqlite/queries/pr_comment.sql b/backend/internal/storage/sqlite/queries/pr_comment.sql index c7844c23..9a22cd1c 100644 --- a/backend/internal/storage/sqlite/queries/pr_comment.sql +++ b/backend/internal/storage/sqlite/queries/pr_comment.sql @@ -2,9 +2,16 @@ INSERT INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); +-- name: InsertLegacyPRComment :exec +INSERT OR IGNORE INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); + -- name: DeletePRComments :exec DELETE FROM pr_comment WHERE pr_url = ?; +-- name: DeleteLegacyPRComments :exec +DELETE FROM pr_comment WHERE pr_url = ? AND thread_id = ''; + -- name: DeletePRCommentsByThread :exec DELETE FROM pr_comment WHERE pr_url = ? AND thread_id = ?; diff --git a/backend/internal/storage/sqlite/store/pr_store.go b/backend/internal/storage/sqlite/store/pr_store.go index 88ba9706..d4781b03 100644 --- a/backend/internal/storage/sqlite/store/pr_store.go +++ b/backend/internal/storage/sqlite/store/pr_store.go @@ -71,10 +71,14 @@ func (s *Store) writePR(ctx context.Context, pr domain.PullRequest, checks []dom return err } } - if reviewMode == ports.ReviewWriteReplace || replaceLegacyComments { + if reviewMode == ports.ReviewWriteReplace { if err := q.DeletePRComments(ctx, pr.URL); err != nil { return err } + } else if replaceLegacyComments { + if err := q.DeleteLegacyPRComments(ctx, pr.URL); err != nil { + return err + } } if reviewMode == ports.ReviewWriteReplace || reviewMode == ports.ReviewWriteMerge { for _, th := range threads { @@ -90,12 +94,18 @@ func (s *Store) writePR(ctx context.Context, pr domain.PullRequest, checks []dom } } } - if reviewMode == ports.ReviewWriteReplace || reviewMode == ports.ReviewWriteMerge || replaceLegacyComments { + if reviewMode == ports.ReviewWriteReplace || reviewMode == ports.ReviewWriteMerge { for _, c := range comments { if err := q.InsertPRComment(ctx, genCommentParams(pr.URL, c)); err != nil { return fmt.Errorf("comment %q: %w", c.ID, err) } } + } else if replaceLegacyComments { + for _, c := range comments { + if err := q.InsertLegacyPRComment(ctx, genLegacyCommentParams(pr.URL, c)); err != nil { + return fmt.Errorf("legacy comment %q: %w", c.ID, err) + } + } } return nil }) @@ -335,6 +345,14 @@ func genCommentParams(prURL string, c domain.PullRequestComment) gen.InsertPRCom } } +func genLegacyCommentParams(prURL string, c domain.PullRequestComment) gen.InsertLegacyPRCommentParams { + return gen.InsertLegacyPRCommentParams{ + PRURL: prURL, CommentID: c.ID, Author: c.Author, File: c.File, + Line: int64(c.Line), Body: c.Body, Resolved: c.Resolved, CreatedAt: c.CreatedAt, + ThreadID: "", URL: "", IsBot: 0, + } +} + func commentFromGen(c gen.PRComment) domain.PullRequestComment { return domain.PullRequestComment{ ThreadID: c.ThreadID, ID: c.CommentID, Author: c.Author, diff --git a/backend/internal/storage/sqlite/store/store_test.go b/backend/internal/storage/sqlite/store/store_test.go index d696c5d3..161a16eb 100644 --- a/backend/internal/storage/sqlite/store/store_test.go +++ b/backend/internal/storage/sqlite/store/store_test.go @@ -341,11 +341,16 @@ func TestWritePRPreservesSCMReviewThreads(t *testing.T) { now := time.Now().UTC().Truncate(time.Second) pr := domain.PullRequest{URL: "https://github.com/o/r/pull/1", SessionID: r.ID, Number: 1, UpdatedAt: now} threads := []domain.PullRequestReviewThread{{ThreadID: "t1", Path: "main.go", Line: 7, SemanticHash: "thread-v1", UpdatedAt: now}} + comments := []domain.PullRequestComment{{ThreadID: "t1", ID: "c1", Author: "reviewer", Body: "scm", URL: "https://example/comment/c1", CreatedAt: now}} - if err := s.WriteSCMObservation(ctx, pr, nil, threads, nil, ports.ReviewWriteReplace); err != nil { + if err := s.WriteSCMObservation(ctx, pr, nil, threads, comments, ports.ReviewWriteReplace); err != nil { t.Fatal(err) } - if err := s.WritePR(ctx, domain.PullRequest{URL: pr.URL, SessionID: r.ID, Number: 1, CI: domain.CIPassing, UpdatedAt: now.Add(time.Second)}, nil, nil); err != nil { + legacyComments := []domain.PullRequestComment{ + {ID: "c1", Author: "legacy", Body: "duplicate legacy row must not clear thread metadata", CreatedAt: now.Add(time.Second)}, + {ID: "legacy-only", Author: "legacy", Body: "legacy", CreatedAt: now.Add(time.Second)}, + } + if err := s.WritePR(ctx, domain.PullRequest{URL: pr.URL, SessionID: r.ID, Number: 1, CI: domain.CIPassing, UpdatedAt: now.Add(time.Second)}, nil, legacyComments); err != nil { t.Fatal(err) } @@ -356,6 +361,43 @@ func TestWritePRPreservesSCMReviewThreads(t *testing.T) { if len(gotThreads) != 1 || gotThreads[0].ThreadID != "t1" || gotThreads[0].SemanticHash != "thread-v1" { t.Fatalf("legacy WritePR must preserve SCM-owned review threads, got %+v", gotThreads) } + gotComments, err := s.ListPRComments(ctx, pr.URL) + if err != nil { + t.Fatal(err) + } + byID := map[string]domain.PullRequestComment{} + for _, c := range gotComments { + byID[c.ID] = c + } + scmComment, ok := byID["c1"] + if !ok || scmComment.ThreadID != "t1" || scmComment.URL != "https://example/comment/c1" || scmComment.Body != "scm" { + t.Fatalf("legacy WritePR must not clear SCM comment metadata, got %+v", scmComment) + } + legacyOnly, ok := byID["legacy-only"] + if !ok || legacyOnly.ThreadID != "" { + t.Fatalf("legacy-only comment should remain unthreaded, got %+v", legacyOnly) + } + + mergedThreads := []domain.PullRequestReviewThread{{ThreadID: "t1", Path: "main.go", Line: 8, Resolved: true, SemanticHash: "thread-v2", UpdatedAt: now.Add(2 * time.Second)}} + mergedComments := []domain.PullRequestComment{{ThreadID: "t1", ID: "c2", Author: "reviewer", Body: "updated", URL: "https://example/comment/c2", CreatedAt: now.Add(2 * time.Second)}} + if err := s.WriteSCMObservation(ctx, pr, nil, mergedThreads, mergedComments, ports.ReviewWriteMerge); err != nil { + t.Fatal(err) + } + gotComments, err = s.ListPRComments(ctx, pr.URL) + if err != nil { + t.Fatal(err) + } + byID = map[string]domain.PullRequestComment{} + for _, c := range gotComments { + byID[c.ID] = c + } + if _, ok := byID["c1"]; ok { + t.Fatalf("SCM merge should delete stale fetched-thread comment c1, comments=%+v", gotComments) + } + replacement, ok := byID["c2"] + if !ok || replacement.ThreadID != "t1" { + t.Fatalf("SCM merge did not insert replacement threaded comment, comments=%+v", gotComments) + } } func TestCDCTriggersPopulateChangeLog(t *testing.T) { From e84d4ee97781722c92e3bcd46fd22b928703816d Mon Sep 17 00:00:00 2001 From: whoisasx Date: Wed, 3 Jun 2026 23:38:46 +0530 Subject: [PATCH 18/20] fix: retry transient scm credential checks --- backend/internal/observe/scm/observer.go | 24 +++++++------ backend/internal/observe/scm/observer_test.go | 34 +++++++++++++++++++ 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go index 3f2eb3c4..15d37f42 100644 --- a/backend/internal/observe/scm/observer.go +++ b/backend/internal/observe/scm/observer.go @@ -233,10 +233,11 @@ func (o *Observer) Poll(ctx context.Context) error { if len(subjects) == 0 { return nil } - if err := o.checkCredentials(ctx); err != nil { + proceed, err := o.checkCredentials(ctx) + if err != nil { return err } - if o.disabled { + if !proceed || o.disabled { return nil } @@ -396,29 +397,30 @@ func (o *Observer) Poll(ctx context.Context) error { return nil } -func (o *Observer) checkCredentials(ctx context.Context) error { +func (o *Observer) checkCredentials(ctx context.Context) (bool, error) { if o.credentialsChecked { - return nil + return true, nil } if err := ctx.Err(); err != nil { - return err + return false, err } - o.credentialsChecked = true checker, ok := o.provider.(credentialChecker) if !ok { - return nil + o.credentialsChecked = true + return true, nil } available, err := checker.SCMCredentialsAvailable(ctx) if err != nil { - o.disabled = true - o.logger.Warn("scm observer disabled: provider credentials unavailable", "err", err) - return nil + o.logger.Warn("scm observer credentials check failed; will retry", "err", err) + return false, nil } + o.credentialsChecked = true if !available { o.disabled = true o.logger.Warn("scm observer disabled: provider credentials unavailable") + return false, nil } - return nil + return true, nil } func (o *Observer) discoverSubjects(ctx context.Context) (map[string]*subject, error) { diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go index fa0b1aec..fe0b0f99 100644 --- a/backend/internal/observe/scm/observer_test.go +++ b/backend/internal/observe/scm/observer_test.go @@ -265,6 +265,40 @@ func TestPoll_DisablesOnceWhenCredentialsUnavailable(t *testing.T) { } } +func TestPoll_RetriesTransientCredentialErrors(t *testing.T) { + store := testStoreWithSession() + provider := &fakeProvider{ + credentialGate: true, + credentialErr: errors.New("gh auth token failed transiently"), + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "v1"}}, + observations: map[string]ports.SCMObservation{}, + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if obs.credentialsChecked || obs.disabled { + t.Fatalf("transient credential error should not commit checked/disabled: checked=%v disabled=%v", obs.credentialsChecked, obs.disabled) + } + if provider.credentialChecks != 1 || provider.repoGuardCalls != 0 { + t.Fatalf("first poll should check credentials only: checks=%d repoGuards=%d", provider.credentialChecks, provider.repoGuardCalls) + } + + provider.mu.Lock() + provider.credentialErr = nil + provider.credentialOK = true + provider.mu.Unlock() + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if !obs.credentialsChecked || obs.disabled { + t.Fatalf("successful retry should commit checked without disabling: checked=%v disabled=%v", obs.credentialsChecked, obs.disabled) + } + if provider.credentialChecks != 2 || provider.repoGuardCalls != 1 { + t.Fatalf("second poll should retry credentials and continue: checks=%d repoGuards=%d", provider.credentialChecks, provider.repoGuardCalls) + } +} + func TestPoll_RepoETag304SkipsDetectPR(t *testing.T) { store := testStoreWithSession() provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "v1", NotModified: true}}, observations: map[string]ports.SCMObservation{}} From 63028448d01972509657082e44f1e39b810448c8 Mon Sep 17 00:00:00 2001 From: whoisasx Date: Thu, 4 Jun 2026 12:37:10 +0530 Subject: [PATCH 19/20] fix: preserve review rows during lifecycle ack --- backend/internal/observe/scm/observer.go | 2 +- backend/internal/observe/scm/observer_test.go | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go index 15d37f42..94996c04 100644 --- a/backend/internal/observe/scm/observer.go +++ b/backend/internal/observe/scm/observer.go @@ -367,7 +367,7 @@ func (o *Observer) Poll(ctx context.Context) error { markRepoRefreshFailed(subj.repo) continue } - if err := o.store.WriteSCMObservation(ctx, finalPR, finalChecks, finalThreads, finalComments, reviewMode); err != nil { + if err := o.store.WriteSCMObservation(ctx, finalPR, finalChecks, nil, nil, ports.ReviewWritePreserve); err != nil { o.logger.Error("scm observer: DB lifecycle acknowledgement failed", "session", subj.session.ID, "pr", finalPR.URL, "err", err) markRepoRefreshFailed(subj.repo) continue diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go index fe0b0f99..d2526e39 100644 --- a/backend/internal/observe/scm/observer_test.go +++ b/backend/internal/observe/scm/observer_test.go @@ -530,6 +530,15 @@ func TestPoll_ReviewHashDrivesPersistenceAndLifecycle(t *testing.T) { if len(store.writes) == 0 || len(store.writes[0].comments) != 1 { t.Fatalf("review change not persisted: %#v", store.writes) } + if len(store.writes) != 2 { + t.Fatalf("review change with lifecycle should write held-back facts then acknowledgement, got %d writes", len(store.writes)) + } + if store.writes[0].reviewMode != ports.ReviewWriteReplace { + t.Fatalf("initial review write mode = %v, want replace", store.writes[0].reviewMode) + } + if store.writes[1].reviewMode != ports.ReviewWritePreserve || len(store.writes[1].comments) != 0 { + t.Fatalf("lifecycle acknowledgement should preserve review rows, got mode=%v comments=%d", store.writes[1].reviewMode, len(store.writes[1].comments)) + } if len(lc.observed) != 1 || !lc.observed[0].Changed.Review { t.Fatalf("review change not notified: %#v", lc.observed) } From a5f96a9029bf6b35949a8cafc8b4c4205b150dac Mon Sep 17 00:00:00 2001 From: whoisasx Date: Thu, 4 Jun 2026 22:19:58 +0530 Subject: [PATCH 20/20] fix: harden scm observer review follow-ups --- .../adapters/scm/github/observer_provider.go | 7 ++- backend/internal/lifecycle/manager_test.go | 25 ++++++++++ backend/internal/lifecycle/reactions.go | 3 +- backend/internal/observe/scm/observer.go | 33 ++++++++++--- backend/internal/observe/scm/observer_test.go | 31 ++++++++++++ backend/internal/storage/sqlite/gen/pr.sql.go | 49 +++++++++++++++++++ .../internal/storage/sqlite/queries/pr.sql | 17 +++++++ .../internal/storage/sqlite/store/pr_store.go | 26 +++++++++- .../storage/sqlite/store/store_test.go | 45 ++++++++++++++++- 9 files changed, 224 insertions(+), 12 deletions(-) diff --git a/backend/internal/adapters/scm/github/observer_provider.go b/backend/internal/adapters/scm/github/observer_provider.go index 2a463bf5..bfb703d2 100644 --- a/backend/internal/adapters/scm/github/observer_provider.go +++ b/backend/internal/adapters/scm/github/observer_provider.go @@ -165,7 +165,12 @@ func (p *Provider) FetchReviewThreads(ctx context.Context, ref ports.SCMPRRef) ( } out := latest startCursor := str(pi["startCursor"]) - if len(latest) == 0 || !latest[0].Resolved { + // GitHub returns nodes in connection order even when selecting last:N, so + // latest[0] is the oldest thread in the latest window. If that boundary + // thread is still unresolved, fetch one older window to avoid hiding older + // active review feedback behind the normal 50-thread cost cap. + oldestLatestUnresolved := len(latest) == 0 || !latest[0].Resolved + if oldestLatestUnresolved { if startCursor == "" { p.logger.Warn("github scm: review thread page is partial but missing start cursor", "repo", repoFullName(ref.Repo), "pr", ref.Number) diff --git a/backend/internal/lifecycle/manager_test.go b/backend/internal/lifecycle/manager_test.go index cb9f17e3..24fb3c6c 100644 --- a/backend/internal/lifecycle/manager_test.go +++ b/backend/internal/lifecycle/manager_test.go @@ -164,6 +164,31 @@ func TestSCMObservationProjectsToExistingPRReactions(t *testing.T) { } } +func TestSCMObservationUsesPRHeadWhenCIHeadMissing(t *testing.T) { + m, st, msg := newManager() + st.sessions["mer-1"] = working("mer-1") + o := ports.SCMObservation{ + Fetched: true, + PR: ports.SCMPRObservation{URL: "pr1", HeadSHA: "c1"}, + CI: ports.SCMCIObservation{ + Summary: string(domain.CIFailing), + FailedChecks: []ports.SCMCheckObservation{{ + Name: "build", Status: string(domain.PRCheckFailed), + }}, + }, + } + if err := m.ApplySCMObservation(ctx, "mer-1", o); err != nil { + t.Fatal(err) + } + o.PR.HeadSHA = "c2" + if err := m.ApplySCMObservation(ctx, "mer-1", o); err != nil { + t.Fatal(err) + } + if len(msg.msgs) != 2 { + t.Fatalf("want separate CI nudges for distinct PR heads when CI head is absent, got %d: %v", len(msg.msgs), msg.msgs) + } +} + func TestPRObservation_MergeConflictNudgesAgent(t *testing.T) { m, st, msg := newManager() st.sessions["mer-1"] = working("mer-1") diff --git a/backend/internal/lifecycle/reactions.go b/backend/internal/lifecycle/reactions.go index ebf737b5..abf65891 100644 --- a/backend/internal/lifecycle/reactions.go +++ b/backend/internal/lifecycle/reactions.go @@ -102,6 +102,7 @@ func scmToPRObservation(o ports.SCMObservation) ports.PRObservation { if pr.Mergeability == "" { pr.Mergeability = domain.MergeUnknown } + checkCommit := firstSCMNonEmpty(o.CI.HeadSHA, o.PR.HeadSHA) for _, ch := range o.CI.FailedChecks { status := domain.PRCheckStatus(ch.Status) if status == "" { @@ -113,7 +114,7 @@ func scmToPRObservation(o ports.SCMObservation) ports.PRObservation { } pr.Checks = append(pr.Checks, ports.PRCheckObservation{ Name: ch.Name, - CommitHash: o.CI.HeadSHA, + CommitHash: checkCommit, Status: status, URL: ch.URL, LogTail: logTail, diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go index 94996c04..a1ebd8f7 100644 --- a/backend/internal/observe/scm/observer.go +++ b/backend/internal/observe/scm/observer.go @@ -612,6 +612,11 @@ func (o *Observer) enrichFailureLogs(ctx context.Context, obs *ports.SCMObservat } } tails := make([]string, 0, len(obs.CI.FailedChecks)) + checksByProviderID := make(map[string][]int, len(obs.CI.Checks)) + for i := range obs.CI.Checks { + key := checkProviderKey(obs.CI.Checks[i]) + checksByProviderID[key] = append(checksByProviderID[key], i) + } for i := range obs.CI.FailedChecks { tail := obs.CI.FailedChecks[i].LogTail if tail == "" && obs.CI.FailedChecks[i].ProviderID != "" { @@ -625,15 +630,17 @@ func (o *Observer) enrichFailureLogs(ctx context.Context, obs *ports.SCMObservat if tail != "" { tails = append(tails, tail) } - for j := range obs.CI.Checks { - if obs.CI.Checks[j].Name == obs.CI.FailedChecks[i].Name && obs.CI.Checks[j].ProviderID == obs.CI.FailedChecks[i].ProviderID { - obs.CI.Checks[j].LogTail = tail - } + for _, j := range checksByProviderID[checkProviderKey(obs.CI.FailedChecks[i])] { + obs.CI.Checks[j].LogTail = tail } } obs.CI.FailureLogTail = strings.Join(tails, "\n---\n") } +func checkProviderKey(ch ports.SCMCheckObservation) string { + return ch.Name + "\x00" + ch.ProviderID +} + func applyStoredFailedLogTails(obs *ports.SCMObservation, checks []domain.PullRequestCheck) bool { tailsByName := map[string]string{} for _, ch := range checks { @@ -713,9 +720,7 @@ func (o *Observer) refreshReviews(ctx context.Context, subjects map[string]*subj } else { reviewModes[pkey] = ports.ReviewWriteReplace } - if _, ok := o.Cache.ReviewRefreshFailed[pkey]; ok { - o.Cache.ReviewRefreshFailed[pkey] = false - } + cacheDelete(o.Cache.ReviewRefreshFailed, &o.Cache.reviewFailedOrder, pkey) } } @@ -1157,6 +1162,20 @@ func (o *Observer) cacheSetBool(m map[string]bool, order *[]string, key string, } } +func cacheDelete[V any](m map[string]V, order *[]string, key string) { + if _, ok := m[key]; !ok { + return + } + delete(m, key) + dst := (*order)[:0] + for _, cachedKey := range *order { + if cachedKey != key { + dst = append(dst, cachedKey) + } + } + *order = dst +} + func (o *Observer) evictStrings(m map[string]string, order *[]string) { for len(*order) > o.Cache.max { evict := (*order)[0] diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go index d2526e39..55c61823 100644 --- a/backend/internal/observe/scm/observer_test.go +++ b/backend/internal/observe/scm/observer_test.go @@ -661,6 +661,37 @@ func TestPoll_ReviewFetchFailureDoesNotUpdateReviewDecision(t *testing.T) { } } +func TestPoll_SuccessfulReviewRefreshClearsRetryCacheSlot(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + local.Review = domain.ReviewChangesRequest + local.ReviewHash = "old-review" + store.prs["p-1"] = []domain.PullRequest{local} + review := ports.SCMReviewObservation{ + Decision: string(domain.ReviewChangesRequest), + Threads: []ports.SCMReviewThreadObservation{{ID: "t1", Path: "f.go", Line: 2, Comments: []ports.SCMReviewCommentObservation{{ID: "c1", Body: "fix"}}}}, + } + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, + reviews: map[string]ports.SCMReviewObservation{prKey(testRepo, 1): review}, + } + obs := newTestObserver(store, provider, nil, time.Unix(350, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + obs.cacheSetBool(obs.Cache.ReviewRefreshFailed, &obs.Cache.reviewFailedOrder, prKey(testRepo, 1), true) + + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if _, ok := obs.Cache.ReviewRefreshFailed[prKey(testRepo, 1)]; ok { + t.Fatalf("successful review refresh should delete retry map entry, got %#v", obs.Cache.ReviewRefreshFailed) + } + for _, key := range obs.Cache.reviewFailedOrder { + if key == prKey(testRepo, 1) { + t.Fatalf("successful review refresh should remove retry order slot, got %#v", obs.Cache.reviewFailedOrder) + } + } +} + func TestPoll_DoesNotCommitCommitETagWhenFetchFails(t *testing.T) { store := testStoreWithSession() local := knownPR(1) diff --git a/backend/internal/storage/sqlite/gen/pr.sql.go b/backend/internal/storage/sqlite/gen/pr.sql.go index 63b1dbfa..4edc7cd5 100644 --- a/backend/internal/storage/sqlite/gen/pr.sql.go +++ b/backend/internal/storage/sqlite/gen/pr.sql.go @@ -196,6 +196,55 @@ func (q *Queries) ListPRsBySession(ctx context.Context, sessionID domain.Session return items, nil } +const upsertLegacyPR = `-- name: UpsertLegacyPR :exec +INSERT INTO pr ( + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + is_draft, is_merged, is_closed +) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +ON CONFLICT (url) DO UPDATE SET + number = excluded.number, + pr_state = excluded.pr_state, + review_decision = excluded.review_decision, + ci_state = excluded.ci_state, + mergeability = excluded.mergeability, + updated_at = excluded.updated_at, + is_draft = excluded.is_draft, + is_merged = excluded.is_merged, + is_closed = excluded.is_closed +` + +type UpsertLegacyPRParams struct { + URL string + SessionID domain.SessionID + Number int64 + PRState domain.PRState + ReviewDecision domain.ReviewDecision + CIState domain.CIState + Mergeability domain.Mergeability + UpdatedAt time.Time + IsDraft int64 + IsMerged int64 + IsClosed int64 +} + +func (q *Queries) UpsertLegacyPR(ctx context.Context, arg UpsertLegacyPRParams) error { + _, err := q.db.ExecContext(ctx, upsertLegacyPR, + arg.URL, + arg.SessionID, + arg.Number, + arg.PRState, + arg.ReviewDecision, + arg.CIState, + arg.Mergeability, + arg.UpdatedAt, + arg.IsDraft, + arg.IsMerged, + arg.IsClosed, + ) + return err +} + const upsertPR = `-- name: UpsertPR :exec INSERT INTO pr ( url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, diff --git a/backend/internal/storage/sqlite/queries/pr.sql b/backend/internal/storage/sqlite/queries/pr.sql index 149c27d0..9eacbb3f 100644 --- a/backend/internal/storage/sqlite/queries/pr.sql +++ b/backend/internal/storage/sqlite/queries/pr.sql @@ -47,6 +47,23 @@ ON CONFLICT (url) DO UPDATE SET ci_observed_at = excluded.ci_observed_at, review_observed_at = excluded.review_observed_at; +-- name: UpsertLegacyPR :exec +INSERT INTO pr ( + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + is_draft, is_merged, is_closed +) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +ON CONFLICT (url) DO UPDATE SET + number = excluded.number, + pr_state = excluded.pr_state, + review_decision = excluded.review_decision, + ci_state = excluded.ci_state, + mergeability = excluded.mergeability, + updated_at = excluded.updated_at, + is_draft = excluded.is_draft, + is_merged = excluded.is_merged, + is_closed = excluded.is_closed; + -- name: GetPR :one SELECT url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, diff --git a/backend/internal/storage/sqlite/store/pr_store.go b/backend/internal/storage/sqlite/store/pr_store.go index d4781b03..be664912 100644 --- a/backend/internal/storage/sqlite/store/pr_store.go +++ b/backend/internal/storage/sqlite/store/pr_store.go @@ -58,8 +58,14 @@ func (s *Store) writePR(ctx context.Context, pr domain.PullRequest, checks []dom if err == nil && existing.SessionID != pr.SessionID { return fmt.Errorf("pr %s already belongs to session %s", pr.URL, existing.SessionID) } - if err := q.UpsertPR(ctx, genPRParams(pr)); err != nil { - return err + if replaceLegacyComments { + if err := q.UpsertLegacyPR(ctx, genLegacyPRParams(pr)); err != nil { + return err + } + } else { + if err := q.UpsertPR(ctx, genPRParams(pr)); err != nil { + return err + } } for _, c := range checks { if err := q.UpsertPRCheck(ctx, genCheckParams(pr.URL, c)); err != nil { @@ -254,6 +260,22 @@ func genPRParams(r domain.PullRequest) gen.UpsertPRParams { } } +func genLegacyPRParams(r domain.PullRequest) gen.UpsertLegacyPRParams { + return gen.UpsertLegacyPRParams{ + URL: r.URL, + SessionID: r.SessionID, + Number: int64(r.Number), + PRState: prState(r), + ReviewDecision: reviewOrDefault(r.Review), + CIState: ciOrDefault(r.CI), + Mergeability: mergeabilityOrDefault(r.Mergeability), + UpdatedAt: r.UpdatedAt, + IsDraft: boolInt(r.Draft), + IsMerged: boolInt(r.Merged), + IsClosed: boolInt(r.Closed), + } +} + func reviewOrDefault(v domain.ReviewDecision) domain.ReviewDecision { if v == "" { return domain.ReviewNone diff --git a/backend/internal/storage/sqlite/store/store_test.go b/backend/internal/storage/sqlite/store/store_test.go index 161a16eb..40b74f16 100644 --- a/backend/internal/storage/sqlite/store/store_test.go +++ b/backend/internal/storage/sqlite/store/store_test.go @@ -339,7 +339,13 @@ func TestWritePRPreservesSCMReviewThreads(t *testing.T) { seedProject(t, s, "mer") r, _ := s.CreateSession(ctx, sampleRecord("mer")) now := time.Now().UTC().Truncate(time.Second) - pr := domain.PullRequest{URL: "https://github.com/o/r/pull/1", SessionID: r.ID, Number: 1, UpdatedAt: now} + observedAt := now.Add(-time.Minute) + pr := domain.PullRequest{ + URL: "https://github.com/o/r/pull/1", SessionID: r.ID, Number: 1, UpdatedAt: now, + Provider: "github", Host: "github.com", Repo: "o/r", HeadSHA: "head-1", + MetadataHash: "metadata-v1", CIHash: "ci-v1", ReviewHash: "review-v1", + ObservedAt: observedAt, CIObservedAt: observedAt, ReviewObservedAt: observedAt, + } threads := []domain.PullRequestReviewThread{{ThreadID: "t1", Path: "main.go", Line: 7, SemanticHash: "thread-v1", UpdatedAt: now}} comments := []domain.PullRequestComment{{ThreadID: "t1", ID: "c1", Author: "reviewer", Body: "scm", URL: "https://example/comment/c1", CreatedAt: now}} @@ -354,6 +360,20 @@ func TestWritePRPreservesSCMReviewThreads(t *testing.T) { t.Fatal(err) } + gotPR, ok, err := s.GetPR(ctx, pr.URL) + if err != nil || !ok { + t.Fatalf("get pr: ok=%v err=%v", ok, err) + } + if gotPR.Provider != "github" || gotPR.Host != "github.com" || gotPR.Repo != "o/r" || gotPR.HeadSHA != "head-1" || + gotPR.MetadataHash != "metadata-v1" || gotPR.CIHash != "ci-v1" || gotPR.ReviewHash != "review-v1" { + t.Fatalf("legacy WritePR must preserve SCM-owned metadata and hashes, got %+v", gotPR) + } + if !gotPR.ObservedAt.Equal(observedAt) || !gotPR.CIObservedAt.Equal(observedAt) || !gotPR.ReviewObservedAt.Equal(observedAt) { + t.Fatalf("legacy WritePR must preserve SCM observation timestamps, got observed=%s ci=%s review=%s", gotPR.ObservedAt, gotPR.CIObservedAt, gotPR.ReviewObservedAt) + } + if gotPR.CI != domain.CIPassing { + t.Fatalf("legacy WritePR should still update legacy scalar CI state, got %s", gotPR.CI) + } gotThreads, err := s.ListPRReviewThreads(ctx, pr.URL) if err != nil { t.Fatal(err) @@ -400,6 +420,29 @@ func TestWritePRPreservesSCMReviewThreads(t *testing.T) { } } +func TestWritePRReplacesLegacyCommentBodies(t *testing.T) { + s := newTestStore(t) + ctx := context.Background() + seedProject(t, s, "mer") + r, _ := s.CreateSession(ctx, sampleRecord("mer")) + now := time.Now().UTC().Truncate(time.Second) + pr := domain.PullRequest{URL: "https://github.com/o/r/pull/2", SessionID: r.ID, Number: 2, UpdatedAt: now} + + if err := s.WritePR(ctx, pr, nil, []domain.PullRequestComment{{ID: "legacy", Author: "reviewer", Body: "before", CreatedAt: now}}); err != nil { + t.Fatal(err) + } + if err := s.WritePR(ctx, pr, nil, []domain.PullRequestComment{{ID: "legacy", Author: "reviewer", Body: "after edit", CreatedAt: now.Add(time.Second)}}); err != nil { + t.Fatal(err) + } + got, err := s.ListPRComments(ctx, pr.URL) + if err != nil { + t.Fatal(err) + } + if len(got) != 1 || got[0].Body != "after edit" || !got[0].CreatedAt.Equal(now.Add(time.Second)) { + t.Fatalf("legacy comment replacement did not persist edited row: %+v", got) + } +} + func TestCDCTriggersPopulateChangeLog(t *testing.T) { s := newTestStore(t) ctx := context.Background()