diff --git a/backend/internal/adapters/scm/github/auth.go b/backend/internal/adapters/scm/github/auth.go index 3349d7c3..12993873 100644 --- a/backend/internal/adapters/scm/github/auth.go +++ b/backend/internal/adapters/scm/github/auth.go @@ -59,6 +59,44 @@ func (s EnvTokenSource) Token(context.Context) (string, error) { return "", ErrNoToken } +// FallbackTokenSource tries each source in order, returning the first token. A +// source that returns ErrNoToken is skipped; other errors are remembered and +// surfaced if no later source yields a token. +type FallbackTokenSource []TokenSource + +// Token returns the first non-empty token from the configured sources. +func (s FallbackTokenSource) Token(ctx context.Context) (string, error) { + var firstErr error + for _, src := range s { + if src == nil { + continue + } + tok, err := src.Token(ctx) + if err == nil { + return tok, nil + } + if errors.Is(err, ErrNoToken) { + continue + } + if firstErr == nil { + firstErr = err + } + } + if firstErr != nil { + return "", firstErr + } + return "", ErrNoToken +} + +// InvalidateToken forwards cache invalidation to sources that support it. +func (s FallbackTokenSource) InvalidateToken() { + for _, src := range s { + if inv, ok := src.(tokenInvalidator); ok { + inv.InvalidateToken() + } + } +} + const defaultGHTokenCacheTTL = 5 * time.Minute // GHTokenSource shells out to `gh auth token` when env vars are not diff --git a/backend/internal/adapters/scm/github/client.go b/backend/internal/adapters/scm/github/client.go index 13897672..d988b420 100644 --- a/backend/internal/adapters/scm/github/client.go +++ b/backend/internal/adapters/scm/github/client.go @@ -13,6 +13,8 @@ import ( "strings" "sync" "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/ports" ) const ( @@ -25,7 +27,7 @@ const ( // errors.Is; the orchestrator's lifecycle code is intentionally insulated // from raw HTTP status codes. var ( - ErrNotFound = errors.New("github scm: not found") + ErrNotFound = ports.ErrSCMNotFound ErrAuthFailed = errors.New("github scm: authentication failed") ErrRateLimited = errors.New("github scm: rate limited") ) @@ -127,6 +129,50 @@ type RESTResponse struct { Body []byte } +// doRESTWithETag performs one REST GET with an explicit caller-owned ETag. +// Unlike doREST, it does not replay cached bodies or mutate the client's +// internal compatibility cache; it exists for the provider-neutral SCM observer, +// whose ETag cache belongs to the observer orchestration layer. +func (c *Client) doRESTWithETag(ctx context.Context, path string, q url.Values, etag string) (RESTResponse, error) { + u, err := c.restURL(path, q) + if err != nil { + return RESTResponse{}, fmt.Errorf("github scm: build %s URL: %w", path, err) + } + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, http.NoBody) + if err != nil { + return RESTResponse{}, fmt.Errorf("github scm: build GET %s request: %w", path, err) + } + req.Header.Set("Accept", "application/vnd.github+json") + req.Header.Set("X-GitHub-Api-Version", "2022-11-28") + req.Header.Set("User-Agent", c.userAgent) + if etag != "" { + req.Header.Set("If-None-Match", etag) + } + if err := c.authorize(ctx, req); err != nil { + return RESTResponse{}, err + } + resp, err := c.http.Do(req) + if err != nil { + return RESTResponse{}, fmt.Errorf("github scm: GET %s: %w", path, err) + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode == http.StatusNotModified { + return RESTResponse{StatusCode: resp.StatusCode, NotModified: true, ETag: firstNonEmptyHeader(resp.Header.Get("ETag"), etag)}, nil + } + b, readErr := io.ReadAll(resp.Body) + if readErr != nil { + return RESTResponse{}, fmt.Errorf("github scm: read %s body: %w", path, readErr) + } + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + return RESTResponse{StatusCode: resp.StatusCode, ETag: resp.Header.Get("ETag"), Body: b}, nil + } + err = classifyError(resp, b) + if errors.Is(err, ErrAuthFailed) { + c.invalidateToken() + } + return RESTResponse{StatusCode: resp.StatusCode, Body: b}, err +} + // doREST performs one REST request with ETag-aware caching. The cache is // scoped to the (method, path, query) tuple so repeated PR observations // against the same endpoint replay from the cache while observations of a @@ -434,3 +480,10 @@ func githubMessage(body []byte) string { } return strings.TrimSpace(string(body)) } + +func firstNonEmptyHeader(a, b string) string { + if a != "" { + return a + } + return b +} diff --git a/backend/internal/adapters/scm/github/doc.go b/backend/internal/adapters/scm/github/doc.go index 6bc7b146..4b496379 100644 --- a/backend/internal/adapters/scm/github/doc.go +++ b/backend/internal/adapters/scm/github/doc.go @@ -1,6 +1,6 @@ -// Package github observes GitHub pull requests for the PR Manager. +// Package github observes GitHub pull requests for AO's SCM integrations. // -// The exported surface is one function: +// The compatibility exported surface is: // // (*Provider).Observe(ctx, prURL) (ports.PRObservation, error) // @@ -11,9 +11,11 @@ // GET on /repos/{o}/{r}/actions/jobs/{job_id}/logs to splice the last 20 // lines of the failed job into the observation. // -// The poller / cadence loop is intentionally NOT in this package — it is -// a follow-up PR. This adapter is the observation primitive that loop -// will call. +// The provider-neutral SCM observer uses the same Provider for lower-level +// primitives: repo/commit ETag guards, branch-to-PR detection, GraphQL PR +// batching, failed-job log tails, and review-thread pagination. The polling +// loop itself is intentionally not in this package; it lives in +// internal/observe/scm. // // # State mapping // @@ -105,16 +107,18 @@ // // # Caching // -// The Client maintains an in-memory ETag cache per (method, path, query). +// The legacy Observe path's Client maintains an in-memory ETag cache per +// (method, path, query). // On the second observation of the same PR the REST GET sends // If-None-Match and replays the cached body on a 304 — GraphQL is always // re-fetched because it doesn't expose ETag-based revalidation. // +// The provider-neutral observer owns its own ETag cache and calls explicit +// provider guard methods that do not mutate the legacy Client cache. +// // # Out of scope (intentionally — these are different PRs / lanes) // -// - The poller loop and cadence selection (issue #35). // - Webhook ingestion (this package is polling-only). -// - Persistence (PR Manager owns the row mapping; see internal/service/pr). // - Linear / GitLab providers (separate PRs). // - Issue tracking (separate lane, see internal/adapters/tracker). // - Comment-injection-into-session-context (Messenger lane, not SCM). diff --git a/backend/internal/adapters/scm/github/observer_provider.go b/backend/internal/adapters/scm/github/observer_provider.go new file mode 100644 index 00000000..bfb703d2 --- /dev/null +++ b/backend/internal/adapters/scm/github/observer_provider.go @@ -0,0 +1,682 @@ +package github + +// This file contains the GitHub implementation of the provider-neutral SCM observer contract. +// It handles repository parsing, REST ETag guards, branch PR discovery, GraphQL +// batch PR reads, failed-check log tails, and review-thread pagination. + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "net/url" + "sort" + "strconv" + "strings" + "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +const scmBatchCheckContextLimit = 20 + +const ( + // githubReviewThreadPageSize fetches the latest review window cheaply for + // the common case while still covering active review feedback. + githubReviewThreadPageSize = 50 + // githubReviewCommentLimitPerThread stores only the leading comments needed + // to understand a thread without making one pathological thread dominate + // GraphQL cost. + githubReviewCommentLimitPerThread = 5 + // githubReviewThreadMaxPages bounds the explicit older-thread fallback. + githubReviewThreadMaxPages = 2 +) + +// ParseRepository normalizes a GitHub remote/origin URL into a provider-neutral +// repository key. It accepts https://github.com/owner/repo(.git), +// git@github.com:owner/repo(.git), and path-only owner/repo inputs used by tests. +func (p *Provider) ParseRepository(remote string) (ports.SCMRepo, bool) { + repo, ok := parseGitHubRepo(remote) + return repo, ok +} + +// RepoPRListGuard checks GitHub's cheap open-PR-list ETag guard. +func (p *Provider) RepoPRListGuard(ctx context.Context, repo ports.SCMRepo, etag string) (ports.SCMGuardResult, error) { + q := url.Values{} + q.Set("state", "open") + q.Set("sort", "updated") + q.Set("direction", "desc") + q.Set("per_page", "1") + resp, err := p.client.doRESTWithETag(ctx, repoPath(repo.Owner, repo.Name, "pulls"), q, etag) + if err != nil { + return ports.SCMGuardResult{}, err + } + return ports.SCMGuardResult{ETag: firstNonEmptyHeader(resp.ETag, etag), NotModified: resp.NotModified}, nil +} + +// DetectPRByBranch finds an open PR whose head branch matches the session branch. +func (p *Provider) DetectPRByBranch(ctx context.Context, repo ports.SCMRepo, branch string) (ports.SCMPRObservation, error) { + branch = strings.TrimSpace(branch) + if branch == "" { + return ports.SCMPRObservation{}, fmt.Errorf("%w: empty branch", ErrNotFound) + } + pulls, err := p.detectPRByHead(ctx, repo, repo.Owner+":"+branch) + if err != nil { + return ports.SCMPRObservation{}, err + } + if len(pulls) == 0 { + return ports.SCMPRObservation{}, fmt.Errorf("%w: no open PR for branch %s", ErrNotFound, branch) + } + return restListPullToSCM(pulls[0]), nil +} + +func (p *Provider) detectPRByHead(ctx context.Context, repo ports.SCMRepo, head string) ([]restListPull, error) { + q := url.Values{} + q.Set("state", "open") + q.Set("head", head) + q.Set("per_page", "10") + resp, err := p.client.doREST(ctx, http.MethodGet, repoPath(repo.Owner, repo.Name, "pulls"), q, nil) + if err != nil { + return nil, err + } + var pulls []restListPull + if err := json.Unmarshal(resp.Body, &pulls); err != nil { + return nil, fmt.Errorf("github scm: decode branch PR list: %w", err) + } + return pulls, nil +} + +// CommitChecksGuard checks GitHub's per-commit check-runs ETag guard. +func (p *Provider) CommitChecksGuard(ctx context.Context, repo ports.SCMRepo, headSHA, etag string) (ports.SCMGuardResult, error) { + if strings.TrimSpace(headSHA) == "" { + return ports.SCMGuardResult{}, fmt.Errorf("%w: empty head sha", ErrNotFound) + } + q := url.Values{} + q.Set("per_page", "1") + resp, err := p.client.doRESTWithETag(ctx, repoPath(repo.Owner, repo.Name, "commits", headSHA, "check-runs"), q, etag) + if err != nil { + return ports.SCMGuardResult{}, err + } + return ports.SCMGuardResult{ETag: firstNonEmptyHeader(resp.ETag, etag), NotModified: resp.NotModified}, nil +} + +// FetchPullRequests fetches normalized PR/check metadata for up to 25 PR refs in +// one GraphQL request. The observer owns chunking; this method rejects larger +// batches so tests catch accidental over-batching. +func (p *Provider) FetchPullRequests(ctx context.Context, refs []ports.SCMPRRef) ([]ports.SCMObservation, error) { + if len(refs) == 0 { + return nil, nil + } + if len(refs) > 25 { + return nil, fmt.Errorf("github scm: batch size %d exceeds 25", len(refs)) + } + query, aliases := buildSCMBatchQuery(refs) + data, err := p.client.doGraphQL(ctx, query, nil) + if err != nil { + return nil, err + } + out := make([]ports.SCMObservation, 0, len(refs)) + for i, ref := range refs { + repoData, _ := data[aliases[i]].(map[string]any) + pr, _ := repoData["pullRequest"].(map[string]any) + if pr == nil { + continue + } + if scmContextsPaginated(pr) { + if err := p.fetchRemainingCheckContexts(ctx, ref, pr); err != nil { + return nil, err + } + } + out = append(out, scmObservationFromGraphQL(ref, pr)) + } + return out, nil +} + +// FetchFailedCheckLogTail fetches and tails a failed GitHub Actions job log. +func (p *Provider) FetchFailedCheckLogTail(ctx context.Context, repo ports.SCMRepo, check ports.SCMCheckObservation) (string, error) { + if check.ProviderID == "" { + return "", nil + } + jobID, err := strconv.ParseInt(check.ProviderID, 10, 64) + if err != nil { + return "", fmt.Errorf("github scm: parse check provider id %q: %w", check.ProviderID, err) + } + if jobID <= 0 { + return "", nil + } + log, err := p.fetchJobLogTail(ctx, repo.Owner, repo.Name, jobID) + if err != nil { + return "", err + } + return tailLines(log, ciFailureLogTailLines), nil +} + +// FetchReviewThreads fetches review threads separately from the fast PR/CI path. +func (p *Provider) FetchReviewThreads(ctx context.Context, ref ports.SCMPRRef) (ports.SCMReviewObservation, error) { + latest, decision, pi, err := p.fetchReviewThreadPage(ctx, ref, "") + if err != nil { + return ports.SCMReviewObservation{}, err + } + if !boolv(pi["hasPreviousPage"]) { + return ports.SCMReviewObservation{Decision: decision, Threads: latest}, nil + } + out := latest + startCursor := str(pi["startCursor"]) + // GitHub returns nodes in connection order even when selecting last:N, so + // latest[0] is the oldest thread in the latest window. If that boundary + // thread is still unresolved, fetch one older window to avoid hiding older + // active review feedback behind the normal 50-thread cost cap. + oldestLatestUnresolved := len(latest) == 0 || !latest[0].Resolved + if oldestLatestUnresolved { + if startCursor == "" { + p.logger.Warn("github scm: review thread page is partial but missing start cursor", + "repo", repoFullName(ref.Repo), "pr", ref.Number) + } else { + older, _, olderPI, err := p.fetchReviewThreadPage(ctx, ref, startCursor) + if err != nil { + return ports.SCMReviewObservation{}, err + } + combined := make([]ports.SCMReviewThreadObservation, 0, len(older)+len(latest)) + combined = append(combined, older...) + combined = append(combined, latest...) + out = combined + if boolv(olderPI["hasPreviousPage"]) { + p.logger.Warn("github scm: review thread page limit reached", + "repo", repoFullName(ref.Repo), "pr", ref.Number, + "max_pages", githubReviewThreadMaxPages) + } + } + } + return ports.SCMReviewObservation{Decision: decision, Threads: out, Partial: true}, nil +} + +type restListPull struct { + URL string `json:"url"` + HTMLURL string `json:"html_url"` + Number int `json:"number"` + State string `json:"state"` + Draft bool `json:"draft"` + Title string `json:"title"` + Head struct { + Ref string `json:"ref"` + SHA string `json:"sha"` + } `json:"head"` + Base struct { + Ref string `json:"ref"` + SHA string `json:"sha"` + } `json:"base"` + User struct { + Login string `json:"login"` + } `json:"user"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` +} + +func restListPullToSCM(pull restListPull) ports.SCMPRObservation { + closed := strings.EqualFold(pull.State, "closed") + return ports.SCMPRObservation{ + URL: firstNonEmpty(pull.HTMLURL, pull.URL), + Number: pull.Number, + State: normalizePRState(pull.Draft, false, closed), + Draft: pull.Draft, + Closed: closed, + SourceBranch: pull.Head.Ref, + TargetBranch: pull.Base.Ref, + HeadSHA: pull.Head.SHA, + Title: pull.Title, + Author: pull.User.Login, + BaseSHA: pull.Base.SHA, + ProviderState: pull.State, + HTMLURL: pull.HTMLURL, + CreatedAtProvider: parseGitHubTime(pull.CreatedAt), + UpdatedAtProvider: parseGitHubTime(pull.UpdatedAt), + } +} + +func buildSCMBatchQuery(refs []ports.SCMPRRef) (string, []string) { + aliases := make([]string, len(refs)) + var b strings.Builder + b.WriteString("query{\n") + for i, ref := range refs { + alias := fmt.Sprintf("pr%d", i) + aliases[i] = alias + _, _ = fmt.Fprintf(&b, "%s: repository(owner:%s,name:%s){ pullRequest(number:%d){ %s } }\n", + alias, graphQLString(ref.Repo.Owner), graphQLString(ref.Repo.Name), ref.Number, scmPRFields()) + } + b.WriteString("}") + return b.String(), aliases +} + +func scmPRFields() string { + return strings.ReplaceAll(` +number url state isDraft merged closed title additions deletions changedFiles +mergeable mergeStateStatus reviewDecision headRefName headRefOid baseRefName baseRefOid +createdAt updatedAt mergedAt closedAt +author{ login } +mergeCommit{ oid } +commits(last:1){ nodes{ commit{ oid statusCheckRollup{ state contexts(first:CONTEXT_LIMIT){ nodes{ + __typename + ... on CheckRun { name status conclusion detailsUrl url databaseId } + ... on StatusContext { context state targetUrl } +} pageInfo{ hasNextPage endCursor } } } } } } +`, "CONTEXT_LIMIT", strconv.Itoa(scmBatchCheckContextLimit)) +} + +func (p *Provider) fetchRemainingCheckContexts(ctx context.Context, ref ports.SCMPRRef, pr map[string]any) error { + contexts := statusContexts(pr) + if contexts == nil { + return nil + } + cursor := pageInfoEndCursor(contexts) + if cursor == "" { + return fmt.Errorf("github scm: paginated check contexts for %s#%d missing end cursor", repoFullName(ref.Repo), ref.Number) + } + for { + query := buildCheckContextsQuery(ref, cursor) + data, err := p.client.doGraphQL(ctx, query, nil) + if err != nil { + return fmt.Errorf("github scm: fetch remaining check contexts for %s#%d: %w", repoFullName(ref.Repo), ref.Number, err) + } + repoData, _ := data["repo"].(map[string]any) + pagePR, _ := repoData["pullRequest"].(map[string]any) + if pagePR == nil { + return fmt.Errorf("%w: pull request not found in check context response", ErrNotFound) + } + pageContexts := statusContexts(pagePR) + if pageContexts == nil { + return fmt.Errorf("github scm: check context fallback for %s#%d returned no contexts", repoFullName(ref.Repo), ref.Number) + } + appendStatusContextNodes(contexts, pageContexts) + if !pageInfoHasMore(pageContexts) { + break + } + cursor = pageInfoEndCursor(pageContexts) + if cursor == "" { + return fmt.Errorf("github scm: paginated check context page for %s#%d missing end cursor", repoFullName(ref.Repo), ref.Number) + } + } + return nil +} + +func buildCheckContextsQuery(ref ports.SCMPRRef, cursor string) string { + return fmt.Sprintf(`query{ +repo: repository(owner:%s,name:%s){ pullRequest(number:%d){ + commits(last:1){ nodes{ commit{ statusCheckRollup{ contexts(first:%d, after:%s){ nodes{ + __typename + ... on CheckRun { name status conclusion detailsUrl url databaseId } + ... on StatusContext { context state targetUrl } + } pageInfo{ hasNextPage endCursor } } } } } } +} } +}`, graphQLString(ref.Repo.Owner), graphQLString(ref.Repo.Name), ref.Number, scmBatchCheckContextLimit, graphQLString(cursor)) +} + +func statusContexts(pr map[string]any) map[string]any { + roll := statusRollup(pr) + if roll == nil { + return nil + } + contexts, _ := roll["contexts"].(map[string]any) + return contexts +} + +func appendStatusContextNodes(dst, src map[string]any) { + if dst == nil || src == nil { + return + } + merged, _ := dst["nodes"].([]any) + for _, n := range nodes(src["nodes"]) { + merged = append(merged, n) + } + dst["nodes"] = merged + dst["pageInfo"] = src["pageInfo"] +} + +func pageInfoEndCursor(connection map[string]any) string { + pi, _ := connection["pageInfo"].(map[string]any) + return str(pi["endCursor"]) +} + +func scmObservationFromGraphQL(ref ports.SCMPRRef, pr map[string]any) ports.SCMObservation { + checks := scmChecksFromGraphQL(pr) + failed := failedSCMChecks(checks) + ci := string(ciSummaryFromRollupState(pr)) + prURL := firstNonEmpty(str(pr["url"]), ref.URL) + review := string(reviewDecisionFromGraphQL(pr)) + providerMergeable := str(pr["mergeable"]) + providerMergeState := str(pr["mergeStateStatus"]) + merged := boolv(pr["merged"]) + closed := boolv(pr["closed"]) && !merged + draft := boolv(pr["isDraft"]) + obs := ports.SCMObservation{ + Fetched: true, + Provider: ref.Repo.Provider, + Host: ref.Repo.Host, + Repo: repoFullName(ref.Repo), + PR: ports.SCMPRObservation{ + URL: prURL, + Number: int(num(pr["number"])), + State: normalizePRState(draft, merged, closed), + Draft: draft, + Merged: merged, + Closed: closed, + SourceBranch: str(pr["headRefName"]), + TargetBranch: str(pr["baseRefName"]), + HeadSHA: firstNonEmpty(str(pr["headRefOid"]), latestCommitOID(pr)), + Title: str(pr["title"]), + Additions: int(num(pr["additions"])), + Deletions: int(num(pr["deletions"])), + ChangedFiles: int(num(pr["changedFiles"])), + Author: authorLogin(pr["author"]), + BaseSHA: str(pr["baseRefOid"]), + MergeCommitSHA: mergeCommitOID(pr), + ProviderState: str(pr["state"]), + ProviderMergeable: providerMergeable, + ProviderMergeStateStatus: providerMergeState, + HTMLURL: str(pr["url"]), + CreatedAtProvider: parseGitHubTime(str(pr["createdAt"])), + UpdatedAtProvider: parseGitHubTime(str(pr["updatedAt"])), + MergedAtProvider: parseGitHubTime(str(pr["mergedAt"])), + ClosedAtProvider: parseGitHubTime(str(pr["closedAt"])), + }, + CI: ports.SCMCIObservation{ + Summary: ci, + HeadSHA: firstNonEmpty(str(pr["headRefOid"]), latestCommitOID(pr)), + Checks: checks, + FailedChecks: failed, + FailedFingerprint: githubFailedFingerprint(firstNonEmpty(str(pr["headRefOid"]), latestCommitOID(pr)), failed), + }, + Review: ports.SCMReviewObservation{Decision: review}, + } + obs.Mergeability = mergeabilityObservation(providerMergeable, providerMergeState, ci, review, draft) + return obs +} + +func ciSummaryFromRollupState(pr map[string]any) domain.CIState { + roll := statusRollup(pr) + if roll == nil { + return domain.CIUnknown + } + return mapRollupState(str(roll["state"])) +} + +func scmContextsPaginated(pr map[string]any) bool { + return pageInfoHasMore(statusContexts(pr)) +} + +func scmChecksFromGraphQL(pr map[string]any) []ports.SCMCheckObservation { + roll := statusRollup(pr) + contexts, _ := roll["contexts"].(map[string]any) + rawNodes := nodes(contexts["nodes"]) + out := make([]ports.SCMCheckObservation, 0, len(rawNodes)) + for _, n := range rawNodes { + typ := str(n["__typename"]) + var ch ports.SCMCheckObservation + switch typ { + case "CheckRun": + ch.Name = str(n["name"]) + ch.Status = string(checkStatusFromGraphQL(n)) + ch.Conclusion = strings.ToLower(str(n["conclusion"])) + ch.URL = firstNonEmpty(str(n["detailsUrl"]), str(n["url"])) + if id := int64(num(n["databaseId"])); id > 0 { + ch.ProviderID = strconv.FormatInt(id, 10) + } + case "StatusContext": + ch.Name = str(n["context"]) + ch.Status = string(checkStatusFromGraphQL(n)) + ch.Conclusion = strings.ToLower(str(n["state"])) + ch.URL = str(n["targetUrl"]) + default: + continue + } + if ch.Name == "" { + continue + } + out = append(out, ch) + } + return out +} + +func failedSCMChecks(checks []ports.SCMCheckObservation) []ports.SCMCheckObservation { + out := make([]ports.SCMCheckObservation, 0, len(checks)) + for _, ch := range checks { + status := domain.PRCheckStatus(ch.Status) + if status == domain.PRCheckFailed || status == domain.PRCheckCancelled { + out = append(out, ch) + } + } + return out +} + +func githubFailedFingerprint(head string, checks []ports.SCMCheckObservation) string { + if len(checks) == 0 { + return "" + } + parts := make([]string, 0, len(checks)) + for _, ch := range checks { + parts = append(parts, strings.Join([]string{head, ch.Name, ch.Status, ch.Conclusion, ch.URL, ch.ProviderID}, "\x00")) + } + sort.Strings(parts) + sum := sha256.Sum256([]byte(strings.Join(parts, "\x1e"))) + return hex.EncodeToString(sum[:]) +} + +func mergeabilityObservation(providerMergeable, providerMergeState, ci, review string, draft bool) ports.SCMMergeabilityObservation { + state := strings.ToUpper(strings.TrimSpace(providerMergeState)) + mergeable := strings.ToUpper(strings.TrimSpace(providerMergeable)) + out := ports.SCMMergeabilityObservation{State: string(domain.MergeUnknown)} + addBlocker := func(b string) { out.Blockers = append(out.Blockers, b) } + if state == "DIRTY" || mergeable == "CONFLICTING" { + out.State = string(domain.MergeConflicting) + out.Conflict = true + addBlocker("conflicts") + return out + } + if state == "BEHIND" || state == "BEHIND_BASE" { + out.BehindBase = true + addBlocker("behind_base") + } + if state == "BLOCKED" { + out.State = string(domain.MergeBlocked) + addBlocker("blocked_by_provider") + } + if draft { + out.State = string(domain.MergeBlocked) + addBlocker("draft") + } + if ci == string(domain.CIFailing) { + out.State = string(domain.MergeBlocked) + addBlocker("ci_failing") + } + switch review { + case string(domain.ReviewChangesRequest): + out.State = string(domain.MergeBlocked) + addBlocker("changes_requested") + case string(domain.ReviewRequired): + out.State = string(domain.MergeBlocked) + addBlocker("review_required") + } + if out.State == string(domain.MergeBlocked) { + return out + } + if state == "UNSTABLE" { + out.State = string(domain.MergeUnstable) + return out + } + if mergeable == "MERGEABLE" && (state == "CLEAN" || state == "HAS_HOOKS" || state == "") && + (review == "" || review == string(domain.ReviewApproved) || review == string(domain.ReviewNone)) && !draft { + out.State = string(domain.MergeMergeable) + out.Mergeable = true + return out + } + return out +} + +func (p *Provider) fetchReviewThreadPage(ctx context.Context, ref ports.SCMPRRef, beforeCursor string) ([]ports.SCMReviewThreadObservation, string, map[string]any, error) { + query := buildReviewThreadsQuery(ref, beforeCursor) + data, err := p.client.doGraphQL(ctx, query, nil) + if err != nil { + return nil, "", nil, err + } + repoData, _ := data["repo"].(map[string]any) + pr, _ := repoData["pullRequest"].(map[string]any) + if pr == nil { + return nil, "", nil, fmt.Errorf("%w: pull request not found in review response", ErrNotFound) + } + decision := string(reviewDecisionFromGraphQL(pr)) + threads, _ := pr["reviewThreads"].(map[string]any) + out := make([]ports.SCMReviewThreadObservation, 0, len(nodes(threads["nodes"]))) + for _, th := range nodes(threads["nodes"]) { + out = append(out, scmThreadFromGraphQL(th)) + } + pi, _ := threads["pageInfo"].(map[string]any) + return out, decision, pi, nil +} + +func buildReviewThreadsQuery(ref ports.SCMPRRef, beforeCursor string) string { + before := "null" + if beforeCursor != "" { + before = graphQLString(beforeCursor) + } + return fmt.Sprintf(`query{ +repo: repository(owner:%s,name:%s){ pullRequest(number:%d){ reviewDecision reviewThreads(last:%d, before:%s){ nodes{ + id isResolved path line + comments(first:%d){ nodes{ id body url author{ login __typename } } } +} pageInfo{ hasPreviousPage startCursor } } } } +}`, graphQLString(ref.Repo.Owner), graphQLString(ref.Repo.Name), ref.Number, githubReviewThreadPageSize, before, githubReviewCommentLimitPerThread) +} + +func scmThreadFromGraphQL(th map[string]any) ports.SCMReviewThreadObservation { + out := ports.SCMReviewThreadObservation{ + ID: str(th["id"]), + Path: str(th["path"]), + Line: int(num(th["line"])), + Resolved: boolv(th["isResolved"]), + } + comments, _ := th["comments"].(map[string]any) + commentNodes := nodes(comments["nodes"]) + allCommentsBot := len(commentNodes) > 0 + for _, cn := range commentNodes { + author, _ := cn["author"].(map[string]any) + isBot := isBotAuthor(author) + if !isBot { + allCommentsBot = false + } + out.Comments = append(out.Comments, ports.SCMReviewCommentObservation{ + ID: str(cn["id"]), + Author: str(author["login"]), + Body: str(cn["body"]), + URL: str(cn["url"]), + IsBot: isBot, + }) + } + out.IsBot = allCommentsBot + return out +} + +func parseGitHubRepo(remote string) (ports.SCMRepo, bool) { + raw := strings.TrimSpace(remote) + if raw == "" { + return ports.SCMRepo{}, false + } + if strings.HasPrefix(raw, "git@") { + raw = strings.TrimPrefix(raw, "git@") + parts := strings.SplitN(raw, ":", 2) + if len(parts) != 2 { + return ports.SCMRepo{}, false + } + host := strings.ToLower(parts[0]) + owner, name, ok := splitOwnerRepo(parts[1]) + return makeGitHubRepo(host, owner, name), ok && isGitHubHost(host) + } + if !strings.Contains(raw, "://") && strings.Count(strings.Trim(raw, "/"), "/") == 1 { + owner, name, ok := splitOwnerRepo(raw) + return makeGitHubRepo("github.com", owner, name), ok + } + u, err := url.Parse(raw) + if err != nil { + return ports.SCMRepo{}, false + } + host := strings.ToLower(u.Host) + owner, name, ok := splitOwnerRepo(u.Path) + return makeGitHubRepo(host, owner, name), ok && isGitHubHost(host) +} + +func splitOwnerRepo(p string) (string, string, bool) { + parts := strings.Split(strings.Trim(p, "/"), "/") + if len(parts) < 2 { + return "", "", false + } + owner := parts[0] + name := strings.TrimSuffix(parts[1], ".git") + return owner, name, owner != "" && name != "" +} + +func makeGitHubRepo(host, owner, name string) ports.SCMRepo { + return ports.SCMRepo{Provider: "github", Host: host, Owner: owner, Name: name, Repo: owner + "/" + name} +} + +func isGitHubHost(host string) bool { + return host == "github.com" || host == "www.github.com" || host == "api.github.com" || strings.HasSuffix(host, ".github.com") || strings.HasSuffix(host, ".ghe.io") +} + +func graphQLString(s string) string { + b, err := json.Marshal(s) + if err != nil { + return `""` + } + return string(b) +} + +func repoFullName(repo ports.SCMRepo) string { + if repo.Repo != "" { + return repo.Repo + } + return repo.Owner + "/" + repo.Name +} + +func normalizePRState(draft, merged, closed bool) string { + switch { + case merged: + return string(domain.PRStateMerged) + case closed: + return string(domain.PRStateClosed) + case draft: + return string(domain.PRStateDraft) + default: + return string(domain.PRStateOpen) + } +} + +func parseGitHubTime(s string) time.Time { + if strings.TrimSpace(s) == "" { + return time.Time{} + } + if t, err := time.Parse(time.RFC3339, s); err == nil { + return t + } + return time.Time{} +} + +func authorLogin(v any) string { + author, _ := v.(map[string]any) + return str(author["login"]) +} + +func mergeCommitOID(pr map[string]any) string { + mc, _ := pr["mergeCommit"].(map[string]any) + return str(mc["oid"]) +} + +func latestCommitOID(pr map[string]any) string { + commits, _ := pr["commits"].(map[string]any) + for _, n := range nodes(commits["nodes"]) { + commit, _ := n["commit"].(map[string]any) + if oid := str(commit["oid"]); oid != "" { + return oid + } + } + return "" +} diff --git a/backend/internal/adapters/scm/github/provider.go b/backend/internal/adapters/scm/github/provider.go index b57babdb..f961cdaa 100644 --- a/backend/internal/adapters/scm/github/provider.go +++ b/backend/internal/adapters/scm/github/provider.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "net/http" "net/url" "path" @@ -26,9 +27,13 @@ type ProviderOptions struct { Client *Client HTTPClient *http.Client Token TokenSource - RESTBase string - GraphQLURL string - UserAgent string + // SkipTokenPreflight defers token validation until the first provider call. + // Daemon wiring uses this so gh-token shell-out never blocks readiness. + SkipTokenPreflight bool + RESTBase string + GraphQLURL string + UserAgent string + Logger *slog.Logger } // Provider observes one GitHub pull request and returns a normalized @@ -37,15 +42,17 @@ type ProviderOptions struct { // observation primitive that loop will call. type Provider struct { client *Client + logger *slog.Logger } // NewProvider returns a Provider. If opts.Client is supplied it is used // verbatim; otherwise a Client is built from the other options. When a // Token source is supplied it is exercised once so missing credentials -// surface at daemon startup rather than at first observation. Tests that -// want an unauthenticated fake pass opts.Client directly. +// surface at daemon startup rather than at first observation, unless +// SkipTokenPreflight is set. Tests that want an unauthenticated fake pass +// opts.Client directly. func NewProvider(opts ProviderOptions) (*Provider, error) { - if opts.Client == nil && opts.Token != nil { + if opts.Client == nil && opts.Token != nil && !opts.SkipTokenPreflight { if _, err := opts.Token.Token(context.Background()); err != nil { return nil, err } @@ -60,7 +67,28 @@ func NewProvider(opts ProviderOptions) (*Provider, error) { UserAgent: opts.UserAgent, }) } - return &Provider{client: c}, nil + logger := opts.Logger + if logger == nil { + logger = slog.Default() + } + return &Provider{client: c, logger: logger}, nil +} + +// SCMCredentialsAvailable checks whether this provider can obtain a token. The +// SCM observer calls it lazily during the first poll that has SCM subjects, so +// daemon readiness is not blocked by shelling out to gh auth token and idle +// daemons do not warn about missing credentials. +func (p *Provider) SCMCredentialsAvailable(ctx context.Context) (bool, error) { + if p.client == nil || p.client.tokens == nil { + return true, nil + } + if _, err := p.client.tokens.Token(ctx); err != nil { + if errors.Is(err, ErrNoToken) { + return false, nil + } + return false, err + } + return true, nil } // Observe fetches the current state of one PR by its github.com URL and @@ -362,7 +390,10 @@ func mergeabilityFromGraphQL(pr map[string]any, rest restPull, ci domain.CIState return domain.MergeConflicting } - if review == domain.ReviewChangesRequest { + if rest.Draft || boolv(pr["isDraft"]) { + return domain.MergeBlocked + } + if review == domain.ReviewChangesRequest || review == domain.ReviewRequired { return domain.MergeBlocked } if ci == domain.CIFailing { diff --git a/backend/internal/adapters/scm/github/provider_test.go b/backend/internal/adapters/scm/github/provider_test.go index eb407bdd..a4d58cc8 100644 --- a/backend/internal/adapters/scm/github/provider_test.go +++ b/backend/internal/adapters/scm/github/provider_test.go @@ -1120,3 +1120,313 @@ func TestObserve_AssertsPRObservationShape(t *testing.T) { o.Comments = nil _ = o } + +func TestSCMChecksFromGraphQL_StatusContextUsesState(t *testing.T) { + pr := map[string]any{ + "commits": map[string]any{"nodes": []any{ + map[string]any{"commit": map[string]any{"statusCheckRollup": map[string]any{ + "contexts": map[string]any{"nodes": []any{ + map[string]any{"__typename": "StatusContext", "context": "legacy", "state": "FAILURE", "targetUrl": "https://ci/legacy"}, + map[string]any{"__typename": "CheckRun", "name": "actions", "status": "COMPLETED", "conclusion": "SUCCESS", "detailsUrl": "https://ci/actions"}, + }}, + }}}, + }}, + } + checks := scmChecksFromGraphQL(pr) + if len(checks) != 2 { + t.Fatalf("checks = %d, want 2: %+v", len(checks), checks) + } + if checks[0].Name != "legacy" || checks[0].Status != string(domain.PRCheckFailed) || checks[0].Conclusion != "failure" { + t.Fatalf("legacy StatusContext not normalized from state: %+v", checks[0]) + } + if checks[1].Name != "actions" || checks[1].Status != string(domain.PRCheckPassed) || checks[1].Conclusion != "success" { + t.Fatalf("CheckRun not normalized from conclusion: %+v", checks[1]) + } +} + +func TestSCMThreadFromGraphQLMarksThreadBotOnlyWhenAllCommentsAreBots(t *testing.T) { + mixed := scmThreadFromGraphQL(map[string]any{ + "id": "T-mixed", + "path": "main.go", + "line": float64(12), + "isResolved": false, + "comments": map[string]any{"nodes": []any{ + map[string]any{"id": "C-human", "body": "please fix", "author": map[string]any{"login": "alice", "__typename": "User"}}, + map[string]any{"id": "C-bot", "body": "automated note", "author": map[string]any{"login": "review-bot", "__typename": "Bot"}}, + }}, + }) + if mixed.IsBot { + t.Fatalf("mixed human+bot thread marked as bot: %+v", mixed) + } + if len(mixed.Comments) != 2 || mixed.Comments[0].IsBot || !mixed.Comments[1].IsBot { + t.Fatalf("comment bot flags not preserved on mixed thread: %+v", mixed.Comments) + } + + allBot := scmThreadFromGraphQL(map[string]any{ + "id": "T-bot", + "path": "main.go", + "line": float64(12), + "isResolved": false, + "comments": map[string]any{"nodes": []any{ + map[string]any{"id": "C-bot-1", "body": "automated note", "author": map[string]any{"login": "review-bot", "__typename": "Bot"}}, + map[string]any{"id": "C-bot-2", "body": "more automation", "author": map[string]any{"login": "other-bot", "__typename": "Bot"}}, + }}, + }) + if !allBot.IsBot { + t.Fatalf("all-bot thread not marked as bot: %+v", allBot) + } +} + +func TestSCMObservationUsesRollupStateWhenContextsPaginated(t *testing.T) { + fx := basePRFixture() + var pr map[string]any + fx.prData(func(m map[string]any) { + pr = m + commits := m["commits"].(map[string]any)["nodes"].([]any)[0].(map[string]any) + commit := commits["commit"].(map[string]any) + roll := commit["statusCheckRollup"].(map[string]any) + roll["state"] = "FAILURE" + ctxs := roll["contexts"].(map[string]any) + ctxs["nodes"] = []any{ + map[string]any{"__typename": "CheckRun", "name": "visible-pass", "status": "COMPLETED", "conclusion": "SUCCESS"}, + } + ctxs["pageInfo"] = map[string]any{"hasNextPage": true} + }) + obs := scmObservationFromGraphQL(ports.SCMPRRef{Repo: ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "octocat", Name: "hello", Repo: "octocat/hello"}, Number: 42}, pr) + if obs.CI.Summary != string(domain.CIFailing) { + t.Fatalf("observer CI summary = %q, want failing from aggregate rollup state", obs.CI.Summary) + } +} + +func TestSCMMergeabilityBlocksReviewRequiredAndDraft(t *testing.T) { + cases := []struct { + name string + review string + draft bool + wantBlocker string + }{ + {name: "review required", review: string(domain.ReviewRequired), wantBlocker: "review_required"}, + {name: "draft", review: string(domain.ReviewApproved), draft: true, wantBlocker: "draft"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := mergeabilityObservation("MERGEABLE", "CLEAN", string(domain.CIPassing), tc.review, tc.draft) + if got.State != string(domain.MergeBlocked) || got.Mergeable { + t.Fatalf("mergeability = %+v, want blocked and not mergeable", got) + } + if !contains(got.Blockers, tc.wantBlocker) { + t.Fatalf("blockers = %v, want %q", got.Blockers, tc.wantBlocker) + } + }) + } +} + +func TestFetchPullRequestsDoesNotFallbackWhenContextPageComplete(t *testing.T) { + fake := newFakeGH(t) + fx := basePRFixture() + var pr map[string]any + fx.prData(func(m map[string]any) { pr = m }) + fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + if !strings.Contains(string(body), "contexts(first:20)") { + t.Fatalf("batch query should request 20 contexts, body=%s", body) + } + if !strings.Contains(string(body), "pageInfo{ hasNextPage endCursor }") { + t.Fatalf("batch query should request endCursor for fallback, body=%s", body) + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"pr0": map[string]any{"pullRequest": pr}}, + }) + }) + p := newProviderForTest(t, fake) + obs, err := p.FetchPullRequests(ctx(), []ports.SCMPRRef{{Repo: ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "octocat", Name: "hello", Repo: "octocat/hello"}, Number: 42}}) + if err != nil { + t.Fatalf("FetchPullRequests: %v", err) + } + if got := fake.callsTo(http.MethodPost, "/graphql"); got != 1 { + t.Fatalf("graphql calls = %d, want no fallback", got) + } + if len(obs) != 1 || len(obs[0].CI.Checks) != 1 || obs[0].CI.Summary != string(domain.CIPassing) { + t.Fatalf("observation = %#v", obs) + } +} + +func TestFetchPullRequestsFetchesRemainingCheckContexts(t *testing.T) { + fake := newFakeGH(t) + fx := basePRFixture() + var pr map[string]any + fx.prData(func(m map[string]any) { + pr = m + commits := m["commits"].(map[string]any)["nodes"].([]any)[0].(map[string]any) + commit := commits["commit"].(map[string]any) + roll := commit["statusCheckRollup"].(map[string]any) + roll["state"] = "FAILURE" + ctxs := roll["contexts"].(map[string]any) + ctxs["nodes"] = []any{ + map[string]any{"__typename": "CheckRun", "name": "visible-pass", "status": "COMPLETED", "conclusion": "SUCCESS"}, + } + ctxs["pageInfo"] = map[string]any{"hasNextPage": true, "endCursor": "cursor-1"} + }) + fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { + call := fake.callsTo(http.MethodPost, "/graphql") + body, _ := io.ReadAll(r.Body) + w.Header().Set("Content-Type", "application/json") + switch call { + case 1: + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"pr0": map[string]any{"pullRequest": pr}}, + }) + case 2: + if !strings.Contains(string(body), `after:\"cursor-1\"`) && !strings.Contains(string(body), `after:"cursor-1"`) { + t.Fatalf("fallback query missing cursor, body=%s", body) + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"repo": map[string]any{"pullRequest": map[string]any{ + "commits": map[string]any{"nodes": []any{map[string]any{"commit": map[string]any{"statusCheckRollup": map[string]any{ + "contexts": map[string]any{ + "nodes": []any{ + map[string]any{"__typename": "CheckRun", "name": "hidden-fail", "status": "COMPLETED", "conclusion": "FAILURE"}, + }, + "pageInfo": map[string]any{"hasNextPage": false, "endCursor": nil}, + }, + }}}}}, + }}}, + }) + default: + t.Fatalf("unexpected graphql call %d", call) + } + }) + p := newProviderForTest(t, fake) + obs, err := p.FetchPullRequests(ctx(), []ports.SCMPRRef{{Repo: ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "octocat", Name: "hello", Repo: "octocat/hello"}, Number: 42}}) + if err != nil { + t.Fatalf("FetchPullRequests: %v", err) + } + if got := fake.callsTo(http.MethodPost, "/graphql"); got != 2 { + t.Fatalf("graphql calls = %d, want batch + fallback", got) + } + if len(obs) != 1 { + t.Fatalf("observations = %#v", obs) + } + if obs[0].CI.Summary != string(domain.CIFailing) { + t.Fatalf("CI summary = %q, want aggregate failing", obs[0].CI.Summary) + } + if len(obs[0].CI.Checks) != 2 || len(obs[0].CI.FailedChecks) != 1 || obs[0].CI.FailedChecks[0].Name != "hidden-fail" { + t.Fatalf("checks not completed from fallback: %#v failed=%#v", obs[0].CI.Checks, obs[0].CI.FailedChecks) + } +} + +func TestFetchPullRequestsFailsWhenCheckContextFallbackFails(t *testing.T) { + fake := newFakeGH(t) + fx := basePRFixture() + var pr map[string]any + fx.prData(func(m map[string]any) { + pr = m + commits := m["commits"].(map[string]any)["nodes"].([]any)[0].(map[string]any) + commit := commits["commit"].(map[string]any) + ctxs := commit["statusCheckRollup"].(map[string]any)["contexts"].(map[string]any) + ctxs["pageInfo"] = map[string]any{"hasNextPage": true, "endCursor": "cursor-1"} + }) + fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { + call := fake.callsTo(http.MethodPost, "/graphql") + if call == 1 { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"pr0": map[string]any{"pullRequest": pr}}, + }) + return + } + http.Error(w, `{"message":"graphql down"}`, http.StatusInternalServerError) + }) + p := newProviderForTest(t, fake) + if _, err := p.FetchPullRequests(ctx(), []ports.SCMPRRef{{Repo: ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "octocat", Name: "hello", Repo: "octocat/hello"}, Number: 42}}); err == nil { + t.Fatal("FetchPullRequests error = nil, want fallback failure") + } +} + +func TestFetchReviewThreadsUsesLatestWindowWithoutFallbackWhenOldestResolved(t *testing.T) { + fake := newFakeGH(t) + fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + if !strings.Contains(string(body), "reviewThreads(last:50, before:null)") { + t.Fatalf("review query should fetch latest 50, body=%s", body) + } + if !strings.Contains(string(body), "comments(first:5)") { + t.Fatalf("review query should cap comments per thread, body=%s", body) + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"repo": map[string]any{"pullRequest": map[string]any{ + "reviewDecision": "CHANGES_REQUESTED", + "reviewThreads": map[string]any{ + "nodes": []any{map[string]any{"id": "latest-resolved", "path": "main.go", "line": 1, "isResolved": true, "comments": map[string]any{"nodes": []any{}}}}, + "pageInfo": map[string]any{"hasPreviousPage": true, "startCursor": "latest-start"}, + }, + }}}, + }) + }) + p := newProviderForTest(t, fake) + review, err := p.FetchReviewThreads(ctx(), ports.SCMPRRef{Repo: ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "o", Name: "r", Repo: "o/r"}, Number: 1}) + if err != nil { + t.Fatalf("FetchReviewThreads: %v", err) + } + if got := fake.callsTo(http.MethodPost, "/graphql"); got != 1 { + t.Fatalf("graphql calls = %d, want no fallback when oldest latest thread is resolved", got) + } + if !review.Partial { + t.Fatalf("review Partial = false, want true because older pages exist") + } + if len(review.Threads) != 1 || review.Threads[0].ID != "latest-resolved" { + t.Fatalf("threads = %#v", review.Threads) + } +} + +func TestFetchReviewThreadsFetchesOneOlderPageWhenOldestUnresolved(t *testing.T) { + fake := newFakeGH(t) + fake.on(http.MethodPost, "/graphql", func(w http.ResponseWriter, r *http.Request) { + call := fake.callsTo(http.MethodPost, "/graphql") + body, _ := io.ReadAll(r.Body) + w.Header().Set("Content-Type", "application/json") + switch call { + case 1: + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"repo": map[string]any{"pullRequest": map[string]any{ + "reviewDecision": "CHANGES_REQUESTED", + "reviewThreads": map[string]any{ + "nodes": []any{map[string]any{"id": "latest-unresolved", "path": "main.go", "line": 2, "isResolved": false, "comments": map[string]any{"nodes": []any{}}}}, + "pageInfo": map[string]any{"hasPreviousPage": true, "startCursor": "latest-start"}, + }, + }}}, + }) + case 2: + if !strings.Contains(string(body), `before:\"latest-start\"`) && !strings.Contains(string(body), `before:"latest-start"`) { + t.Fatalf("older review query missing before cursor, body=%s", body) + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"repo": map[string]any{"pullRequest": map[string]any{ + "reviewDecision": "CHANGES_REQUESTED", + "reviewThreads": map[string]any{ + "nodes": []any{map[string]any{"id": "older", "path": "old.go", "line": 1, "isResolved": false, "comments": map[string]any{"nodes": []any{}}}}, + "pageInfo": map[string]any{"hasPreviousPage": true, "startCursor": "older-start"}, + }, + }}}, + }) + default: + t.Fatalf("unexpected graphql call %d", call) + } + }) + p := newProviderForTest(t, fake) + review, err := p.FetchReviewThreads(ctx(), ports.SCMPRRef{Repo: ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "o", Name: "r", Repo: "o/r"}, Number: 1}) + if err != nil { + t.Fatalf("FetchReviewThreads: %v", err) + } + if got := fake.callsTo(http.MethodPost, "/graphql"); got != githubReviewThreadMaxPages { + t.Fatalf("graphql calls = %d, want capped at %d", got, githubReviewThreadMaxPages) + } + if !review.Partial { + t.Fatalf("review Partial = false, want true because pagination remains bounded") + } + if len(review.Threads) != 2 || review.Threads[0].ID != "older" || review.Threads[1].ID != "latest-unresolved" { + t.Fatalf("threads order = %#v", review.Threads) + } +} diff --git a/backend/internal/cdc/event.go b/backend/internal/cdc/event.go index 571ede2d..32c651b4 100644 --- a/backend/internal/cdc/event.go +++ b/backend/internal/cdc/event.go @@ -20,11 +20,13 @@ type EventType string // Event types, one per row-change the DB triggers emit into change_log. const ( - EventSessionCreated EventType = "session_created" - EventSessionUpdated EventType = "session_updated" - EventPRCreated EventType = "pr_created" - EventPRUpdated EventType = "pr_updated" - EventPRCheckRecorded EventType = "pr_check_recorded" + EventSessionCreated EventType = "session_created" + EventSessionUpdated EventType = "session_updated" + EventPRCreated EventType = "pr_created" + EventPRUpdated EventType = "pr_updated" + EventPRCheckRecorded EventType = "pr_check_recorded" + EventPRReviewThreadAdded EventType = "pr_review_thread_added" + EventPRReviewThreadResolved EventType = "pr_review_thread_resolved" ) // Event is one CDC change read from change_log. Seq is the monotonic ordering + diff --git a/backend/internal/daemon/daemon.go b/backend/internal/daemon/daemon.go index 3b75e4fb..e514addf 100644 --- a/backend/internal/daemon/daemon.go +++ b/backend/internal/daemon/daemon.go @@ -70,6 +70,7 @@ func Run() error { // lifecycle write path live (reducer write -> store -> DB trigger -> // change_log -> poller -> broadcaster) and gives startSession the shared LCM. lcStack := startLifecycle(ctx, store, runtimeAdapter, log) + lcStack.scmDone = startSCMObserver(ctx, store, lcStack.LCM, log) // Wire the controller-facing session service over the same store + LCM, the // zellij runtime, a gitworktree workspace, and the per-session agent resolver diff --git a/backend/internal/daemon/lifecycle_wiring.go b/backend/internal/daemon/lifecycle_wiring.go index 34685670..80bbcb20 100644 --- a/backend/internal/daemon/lifecycle_wiring.go +++ b/backend/internal/daemon/lifecycle_wiring.go @@ -28,6 +28,7 @@ type lifecycleStack struct { // standing up a second store+LCM pair that would diverge under writes. LCM *lifecycle.Manager reaperDone <-chan struct{} + scmDone <-chan struct{} } // startLifecycle constructs the Lifecycle Manager over the store and starts the @@ -40,7 +41,12 @@ func startLifecycle(ctx context.Context, store *sqlite.Store, runtime ports.Runt // Stop waits for the reaper goroutine to exit. The caller must cancel the ctx // passed to startLifecycle before calling Stop. -func (l *lifecycleStack) Stop() { <-l.reaperDone } +func (l *lifecycleStack) Stop() { + <-l.reaperDone + if l.scmDone != nil { + <-l.scmDone + } +} // noopMessenger is a stub ports.AgentMessenger: durable writes and notifications // work without it; only live agent nudges are absent until the runtime/agent diff --git a/backend/internal/daemon/scm_wiring.go b/backend/internal/daemon/scm_wiring.go new file mode 100644 index 00000000..772bbd52 --- /dev/null +++ b/backend/internal/daemon/scm_wiring.go @@ -0,0 +1,48 @@ +package daemon + +// This file wires the provider-neutral SCM observer into daemon startup using +// the GitHub provider for v1. It keeps provider setup non-blocking for readiness +// by resolving tokens lazily inside the background observer path. + +import ( + "context" + "errors" + "log/slog" + + scmgithub "github.com/aoagents/agent-orchestrator/backend/internal/adapters/scm/github" + "github.com/aoagents/agent-orchestrator/backend/internal/lifecycle" + scmobserve "github.com/aoagents/agent-orchestrator/backend/internal/observe/scm" + "github.com/aoagents/agent-orchestrator/backend/internal/storage/sqlite" +) + +// startSCMObserver wires the provider-neutral SCM observer with the GitHub +// provider used by v1. Missing credentials do not fail daemon startup; the +// observer performs a lazy credential check in its background goroutine, logs +// one warning, and disables itself before any provider API calls. +func startSCMObserver(ctx context.Context, store *sqlite.Store, lcm *lifecycle.Manager, logger *slog.Logger) <-chan struct{} { + tokens := scmgithub.FallbackTokenSource{ + scmgithub.EnvTokenSource{EnvVars: []string{"AO_GITHUB_TOKEN"}}, + &scmgithub.GHTokenSource{}, + } + // Avoid token preflight on daemon startup. GHTokenSource may shell out to `gh`, + // which is too slow/flaky for the startup readiness path (especially on + // Windows CI). The provider will resolve credentials lazily in its background + // observer goroutine before it makes any GitHub API call. + provider, err := scmgithub.NewProvider(scmgithub.ProviderOptions{Token: tokens, SkipTokenPreflight: true}) + if err != nil { + if errors.Is(err, scmgithub.ErrNoToken) || errors.Is(err, scmgithub.ErrAuthFailed) { + logger.Warn("scm observer disabled: no usable GitHub token", "err", err) + } else { + logger.Warn("scm observer disabled: GitHub provider setup failed", "err", err) + } + return closedDone() + } + observer := scmobserve.New(provider, store, lcm, scmobserve.Config{Logger: logger}) + return observer.Start(ctx) +} + +func closedDone() <-chan struct{} { + done := make(chan struct{}) + close(done) + return done +} diff --git a/backend/internal/domain/pr.go b/backend/internal/domain/pr.go index 8d1c2451..6195dc3c 100644 --- a/backend/internal/domain/pr.go +++ b/backend/internal/domain/pr.go @@ -32,6 +32,39 @@ type PullRequest struct { Review ReviewDecision Mergeability Mergeability UpdatedAt time.Time + + Provider string + Host string + Repo string + + SourceBranch string + TargetBranch string + HeadSHA string + Title string + Additions int + Deletions int + ChangedFiles int + Author string + BaseSHA string + MergeCommitSHA string + + ProviderState string + ProviderMergeable string + ProviderMergeStateStatus string + HTMLURL string + + CreatedAtProvider time.Time + UpdatedAtProvider time.Time + MergedAtProvider time.Time + ClosedAtProvider time.Time + + MetadataHash string + CIHash string + ReviewHash string + + ObservedAt time.Time + CIObservedAt time.Time + ReviewObservedAt time.Time } // PullRequestCheck is one normalized CI check run for a pull request. @@ -39,22 +72,38 @@ type PullRequestCheck struct { Name string CommitHash string Status PRCheckStatus + Conclusion string URL string + Details string LogTail string CreatedAt time.Time } // PullRequestComment is one normalized review comment for a pull request. type PullRequestComment struct { + ThreadID string ID string Author string File string Line int Body string + URL string Resolved bool + IsBot bool CreatedAt time.Time } +// PullRequestReviewThread is one normalized review thread for a pull request. +type PullRequestReviewThread struct { + ThreadID string + Path string + Line int + Resolved bool + IsBot bool + SemanticHash string + UpdatedAt time.Time +} + // CIState is the aggregate CI status of a PR. type CIState string diff --git a/backend/internal/lifecycle/manager_test.go b/backend/internal/lifecycle/manager_test.go index 3a075085..24fb3c6c 100644 --- a/backend/internal/lifecycle/manager_test.go +++ b/backend/internal/lifecycle/manager_test.go @@ -142,6 +142,53 @@ func TestPRObservation_ReviewCommentsNudgeAgent(t *testing.T) { } } +func TestSCMObservationProjectsToExistingPRReactions(t *testing.T) { + m, st, msg := newManager() + st.sessions["mer-1"] = working("mer-1") + o := ports.SCMObservation{ + Fetched: true, + PR: ports.SCMPRObservation{URL: "pr1", Number: 1}, + CI: ports.SCMCIObservation{ + Summary: string(domain.CIFailing), + HeadSHA: "c1", + FailedChecks: []ports.SCMCheckObservation{{ + Name: "build", Status: string(domain.PRCheckFailed), LogTail: "boom", + }}, + }, + } + if err := m.ApplySCMObservation(ctx, "mer-1", o); err != nil { + t.Fatal(err) + } + if len(msg.msgs) != 1 || !strings.Contains(msg.msgs[0], "boom") { + t.Fatalf("want SCM CI nudge with log tail, got %v", msg.msgs) + } +} + +func TestSCMObservationUsesPRHeadWhenCIHeadMissing(t *testing.T) { + m, st, msg := newManager() + st.sessions["mer-1"] = working("mer-1") + o := ports.SCMObservation{ + Fetched: true, + PR: ports.SCMPRObservation{URL: "pr1", HeadSHA: "c1"}, + CI: ports.SCMCIObservation{ + Summary: string(domain.CIFailing), + FailedChecks: []ports.SCMCheckObservation{{ + Name: "build", Status: string(domain.PRCheckFailed), + }}, + }, + } + if err := m.ApplySCMObservation(ctx, "mer-1", o); err != nil { + t.Fatal(err) + } + o.PR.HeadSHA = "c2" + if err := m.ApplySCMObservation(ctx, "mer-1", o); err != nil { + t.Fatal(err) + } + if len(msg.msgs) != 2 { + t.Fatalf("want separate CI nudges for distinct PR heads when CI head is absent, got %d: %v", len(msg.msgs), msg.msgs) + } +} + func TestPRObservation_MergeConflictNudgesAgent(t *testing.T) { m, st, msg := newManager() st.sessions["mer-1"] = working("mer-1") diff --git a/backend/internal/lifecycle/reactions.go b/backend/internal/lifecycle/reactions.go index 024bda21..abf65891 100644 --- a/backend/internal/lifecycle/reactions.go +++ b/backend/internal/lifecycle/reactions.go @@ -70,6 +70,84 @@ func (m *Manager) ApplyPRObservation(ctx context.Context, id domain.SessionID, o return nil } +// ApplySCMObservation is the provider-neutral lifecycle entrypoint used by the +// SCM observer. The existing reaction logic still operates on PRObservation, so +// lifecycle performs the compatibility projection internally instead of leaking +// the old PR DTO back into the observer/provider boundary. +func (m *Manager) ApplySCMObservation(ctx context.Context, id domain.SessionID, o ports.SCMObservation) error { + if !o.Fetched { + return nil + } + return m.ApplyPRObservation(ctx, id, scmToPRObservation(o)) +} + +func scmToPRObservation(o ports.SCMObservation) ports.PRObservation { + pr := ports.PRObservation{ + Fetched: o.Fetched, + URL: firstSCMNonEmpty(o.PR.URL, o.PR.HTMLURL), + Number: o.PR.Number, + Draft: o.PR.Draft, + Merged: o.PR.Merged, + Closed: o.PR.Closed, + CI: domain.CIState(o.CI.Summary), + Review: domain.ReviewDecision(o.Review.Decision), + Mergeability: domain.Mergeability(o.Mergeability.State), + } + if pr.CI == "" { + pr.CI = domain.CIUnknown + } + if pr.Review == "" { + pr.Review = domain.ReviewNone + } + if pr.Mergeability == "" { + pr.Mergeability = domain.MergeUnknown + } + checkCommit := firstSCMNonEmpty(o.CI.HeadSHA, o.PR.HeadSHA) + for _, ch := range o.CI.FailedChecks { + status := domain.PRCheckStatus(ch.Status) + if status == "" { + status = domain.PRCheckFailed + } + logTail := ch.LogTail + if logTail == "" { + logTail = o.CI.FailureLogTail + } + pr.Checks = append(pr.Checks, ports.PRCheckObservation{ + Name: ch.Name, + CommitHash: checkCommit, + Status: status, + URL: ch.URL, + LogTail: logTail, + }) + } + for _, th := range o.Review.Threads { + if th.Resolved || th.IsBot { + continue + } + for _, c := range th.Comments { + if c.IsBot { + continue + } + pr.Comments = append(pr.Comments, ports.PRCommentObservation{ + ID: c.ID, + Author: c.Author, + File: th.Path, + Line: th.Line, + Body: c.Body, + Resolved: th.Resolved, + }) + } + } + return pr +} + +func firstSCMNonEmpty(a, b string) string { + if strings.TrimSpace(a) != "" { + return a + } + return b +} + func hasUnresolvedComments(comments []ports.PRCommentObservation) bool { for _, c := range comments { if !c.Resolved { @@ -80,8 +158,8 @@ func hasUnresolvedComments(comments []ports.PRCommentObservation) bool { } func reviewContent(comments []ports.PRCommentObservation) (string, string) { - var bodies []string - var ids []string + bodies := make([]string, 0, len(comments)) + ids := make([]string, 0, len(comments)) for _, c := range comments { if c.Resolved { continue diff --git a/backend/internal/observe/scm/observer.go b/backend/internal/observe/scm/observer.go new file mode 100644 index 00000000..a1ebd8f7 --- /dev/null +++ b/backend/internal/observe/scm/observer.go @@ -0,0 +1,1185 @@ +// Package scm implements the provider-neutral SCM polling observer. It owns the +// polling loop, ETag/cache checks, semantic diffing, DB persistence, and +// lifecycle notification; provider adapters only normalize provider-specific +// APIs into ports.SCMObservation values. +package scm + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "log/slog" + "strings" + "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +const ( + // DefaultTickInterval is the SCM observer's PR/CI polling cadence. + DefaultTickInterval = 30 * time.Second + // DefaultReviewInterval is the minimum interval between review-thread polls + // for a PR whose review state warrants thread refresh. + DefaultReviewInterval = 2 * time.Minute + // DefaultCacheMax bounds each in-memory ETag/review cache map. + DefaultCacheMax = 512 + // BatchSize is the maximum number of PRs in one provider batch fetch. + BatchSize = 25 +) + +// Provider is the normalized SCM provider contract used by the observer. +type Provider interface { + ParseRepository(remote string) (ports.SCMRepo, bool) + RepoPRListGuard(ctx context.Context, repo ports.SCMRepo, etag string) (ports.SCMGuardResult, error) + DetectPRByBranch(ctx context.Context, repo ports.SCMRepo, branch string) (ports.SCMPRObservation, error) + CommitChecksGuard(ctx context.Context, repo ports.SCMRepo, headSHA, etag string) (ports.SCMGuardResult, error) + FetchPullRequests(ctx context.Context, refs []ports.SCMPRRef) ([]ports.SCMObservation, error) + FetchFailedCheckLogTail(ctx context.Context, repo ports.SCMRepo, check ports.SCMCheckObservation) (string, error) + FetchReviewThreads(ctx context.Context, ref ports.SCMPRRef) (ports.SCMReviewObservation, error) +} + +// Store is the persistence contract the observer needs for discovery, local +// hash reads, and transactional SCM writes. +type Store interface { + ListAllSessions(ctx context.Context) ([]domain.SessionRecord, error) + GetProject(ctx context.Context, id string) (domain.ProjectRecord, bool, error) + ListPRsBySession(ctx context.Context, sessionID domain.SessionID) ([]domain.PullRequest, error) + ListChecks(ctx context.Context, prURL string) ([]domain.PullRequestCheck, error) + WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, reviewMode ports.ReviewWriteMode) error +} + +// Lifecycle is the provider-neutral lifecycle notification sink. +type Lifecycle interface { + ApplySCMObservation(ctx context.Context, sessionID domain.SessionID, obs ports.SCMObservation) error +} + +type credentialChecker interface { + SCMCredentialsAvailable(ctx context.Context) (bool, error) +} + +// Config holds optional observer knobs. Zero values use production defaults. +type Config struct { + // Tick is the fast PR/CI polling interval. Zero uses DefaultTickInterval. + Tick time.Duration + // ReviewInterval is the slower review-thread refresh interval. + ReviewInterval time.Duration + // Clock supplies timestamps for observations and tests. Nil uses time.Now. + Clock func() time.Time + // Logger receives operational diagnostics for provider/store/lifecycle failures. + Logger *slog.Logger + // CacheMax bounds each in-memory ETag/review cache. Zero uses DefaultCacheMax. + CacheMax int +} + +// ObserverCache stores provider ETags and review polling timestamps in memory. +// It is intentionally non-persistent for v1; cold restarts simply revalidate. +type ObserverCache struct { + // RepoPRListETag maps repository keys to the last open-PR-list ETag. + RepoPRListETag map[string]string + // CommitChecksETag maps repo+commit keys to the last check-runs ETag. + CommitChecksETag map[string]string + // LastReviewPollAt maps PR keys to the last review-thread fetch timestamp. + LastReviewPollAt map[string]time.Time + // ReviewRefreshFailed marks PRs whose review-thread refresh failed; the + // next poll retries regardless of the normal review cadence/status rules. + ReviewRefreshFailed map[string]bool + // repoOrder tracks FIFO eviction order for RepoPRListETag. + repoOrder []string + // commitOrder tracks FIFO eviction order for CommitChecksETag. + commitOrder []string + // lastReviewPollOrder tracks FIFO eviction order for LastReviewPollAt. + lastReviewPollOrder []string + // reviewFailedOrder tracks FIFO eviction order for ReviewRefreshFailed. + reviewFailedOrder []string + // max is the maximum number of entries each cache map retains. + max int +} + +func newCache(maxEntries int) ObserverCache { + if maxEntries <= 0 { + maxEntries = DefaultCacheMax + } + return ObserverCache{ + RepoPRListETag: map[string]string{}, + CommitChecksETag: map[string]string{}, + LastReviewPollAt: map[string]time.Time{}, + ReviewRefreshFailed: map[string]bool{}, + max: maxEntries, + } +} + +// Observer coordinates provider polling, semantic diffing, persistence, and +// lifecycle notifications for SCM observations. +type Observer struct { + // provider is the SCM adapter used for all provider/network operations. + provider Provider + // store supplies sessions/projects/local PR state and receives transactional writes. + store Store + // lifecycle is notified after successful persistence of meaningful changes. + lifecycle Lifecycle + // tick is the active PR/CI polling cadence. + tick time.Duration + // reviewInterval is the minimum duration between review-thread fetches per PR. + reviewInterval time.Duration + // clock supplies observation timestamps. + clock func() time.Time + // logger receives non-fatal operational failures. + logger *slog.Logger + // credentialsChecked records whether an optional provider credential gate ran. + credentialsChecked bool + // disabled is set after the credential gate reports unavailable credentials. + disabled bool + // Cache holds bounded in-memory provider ETags and review poll timestamps. + Cache ObserverCache +} + +// New constructs an Observer with default cadence/cache settings for zero +// values in cfg. +func New(provider Provider, store Store, lifecycle Lifecycle, cfg Config) *Observer { + o := &Observer{provider: provider, store: store, lifecycle: lifecycle, tick: cfg.Tick, reviewInterval: cfg.ReviewInterval, clock: cfg.Clock, logger: cfg.Logger, Cache: newCache(cfg.CacheMax)} + if o.tick <= 0 { + o.tick = DefaultTickInterval + } + if o.reviewInterval <= 0 { + o.reviewInterval = DefaultReviewInterval + } + if o.clock == nil { + o.clock = time.Now + } + if o.logger == nil { + o.logger = slog.Default() + } + return o +} + +// Start launches the observer loop. The first Poll runs immediately inside the +// goroutine so daemon startup is not blocked; subsequent polls run on the tick. +func (o *Observer) Start(ctx context.Context) <-chan struct{} { + done := make(chan struct{}) + go o.loop(ctx, done) + return done +} + +func (o *Observer) loop(ctx context.Context, done chan<- struct{}) { + defer close(done) + if err := o.Poll(ctx); err != nil && !errors.Is(err, context.Canceled) { + o.logger.Error("scm observer: initial poll failed", "err", err) + } + t := time.NewTicker(o.tick) + defer t.Stop() + for { + select { + case <-ctx.Done(): + return + case <-t.C: + if err := o.Poll(ctx); err != nil && !errors.Is(err, context.Canceled) { + o.logger.Error("scm observer: poll failed", "err", err) + } + } + } +} + +type subject struct { + session domain.SessionRecord + repo ports.SCMRepo + branch string + known domain.PullRequest + hasPR bool +} + +type repoGuardState struct { + result ports.SCMGuardResult + hadETag bool + err error +} + +type pendingCacheString struct { + key string + value string +} + +type refreshSelection struct { + refs []ports.SCMPRRef + subjectsByPR map[string]*subject + commitETags map[string]pendingCacheString + candidateKeys map[string]bool +} + +type persistenceOptions struct { + reviewFetched bool + preserveLocalMetadataHash bool + preserveLocalCIHash bool + preserveLocalReviewHash bool + preserveLocalReviewDecision bool +} + +// Poll runs one synchronous SCM observation cycle. +func (o *Observer) Poll(ctx context.Context) error { + now := o.clock().UTC() + if err := ctx.Err(); err != nil { + return err + } + if o.disabled { + return nil + } + subjects, err := o.discoverSubjects(ctx) + if err != nil { + return err + } + if len(subjects) == 0 { + return nil + } + proceed, err := o.checkCredentials(ctx) + if err != nil { + return err + } + if !proceed || o.disabled { + return nil + } + + repoGuards := o.guardRepos(ctx, subjects) + repoRefreshOK := pendingRepoRefreshes(repoGuards) + markRepoRefreshFailed := func(repo ports.SCMRepo) { + key := prKey(repo, 0) + if _, ok := repoRefreshOK[key]; ok { + repoRefreshOK[key] = false + } + } + if err := ctx.Err(); err != nil { + return err + } + o.detectMissingPRs(ctx, subjects, repoGuards, now, markRepoRefreshFailed) + if err := ctx.Err(); err != nil { + return err + } + + selection := o.selectRefreshCandidates(ctx, subjects, repoGuards, markRepoRefreshFailed) + observations := map[string]ports.SCMObservation{} + prRefreshOK := map[string]bool{} + for key := range selection.candidateKeys { + prRefreshOK[key] = false + } + for _, chunk := range chunks(selection.refs, BatchSize) { + if err := ctx.Err(); err != nil { + return err + } + batch, err := o.provider.FetchPullRequests(ctx, chunk) + if err != nil { + o.logger.Error("scm observer: GraphQL PR batch failed", "err", err) + for _, ref := range chunk { + markRepoRefreshFailed(ref.Repo) + } + continue + } + chunkSeen := map[string]bool{} + for _, obs := range batch { + obs.ObservedAt = now + key := prKeyFromObs(obs) + if key == "" { + continue + } + observations[key] = obs + chunkSeen[key] = true + } + for _, ref := range chunk { + key := prKey(ref.Repo, ref.Number) + if !chunkSeen[key] { + markRepoRefreshFailed(ref.Repo) + } + } + } + + for key, subj := range selection.subjectsByPR { + if err := ctx.Err(); err != nil { + return err + } + obs, ok := observations[key] + if !ok { + continue + } + local := subj.known + o.enrichFailureLogs(ctx, &obs, local) + observations[key] = obs + } + + reviewModes := map[string]ports.ReviewWriteMode{} + localOnlyObservations := map[string]bool{} + reviewStale := map[string]bool{} + o.refreshReviews(ctx, subjects, observations, selection.subjectsByPR, reviewModes, localOnlyObservations, reviewStale, now) + if err := ctx.Err(); err != nil { + return err + } + + for key, obs := range observations { + if err := ctx.Err(); err != nil { + return err + } + subj, ok := selection.subjectsByPR[key] + if !ok { + continue + } + local := subj.known + reviewMode := reviewModes[key] + opts := persistenceOptions{ + reviewFetched: reviewMode != ports.ReviewWritePreserve, + preserveLocalMetadataHash: localOnlyObservations[key], + preserveLocalCIHash: localOnlyObservations[key], + preserveLocalReviewHash: reviewStale[key], + preserveLocalReviewDecision: reviewStale[key], + } + prepared := o.prepareForPersistence(obs, local, opts, now) + if !prepared.Changed.Metadata && !prepared.Changed.CI && !prepared.Changed.Review { + prRefreshOK[key] = true + continue + } + finalPR, finalChecks, finalThreads, finalComments := domainFromObservation(subj.session.ID, prepared, local, opts, now) + pr, checks, threads, comments := finalPR, finalChecks, finalThreads, finalComments + // Lifecycle is allowed to run only after the observed facts are durable, + // but semantic hashes are the observer's acknowledgement cursor. Keep + // changed hashes at their local values until lifecycle succeeds; if the + // daemon restarts after a lifecycle failure, the stale hashes force the + // same observation to be fetched and delivered again. + if o.lifecycle != nil { + pendingOpts := opts + if prepared.Changed.Metadata { + pendingOpts.preserveLocalMetadataHash = true + } + if prepared.Changed.CI { + pendingOpts.preserveLocalCIHash = true + } + if prepared.Changed.Review { + pendingOpts.preserveLocalReviewHash = true + } + pr, checks, threads, comments = domainFromObservation(subj.session.ID, prepared, local, pendingOpts, now) + } + if err := o.store.WriteSCMObservation(ctx, pr, checks, threads, comments, reviewMode); err != nil { + o.logger.Error("scm observer: DB write failed", "session", subj.session.ID, "pr", pr.URL, "err", err) + markRepoRefreshFailed(subj.repo) + continue + } + if o.lifecycle != nil { + if err := o.lifecycle.ApplySCMObservation(ctx, subj.session.ID, prepared); err != nil { + o.logger.Error("scm observer: lifecycle notification failed", "session", subj.session.ID, "pr", firstNonEmpty(prepared.PR.URL, prepared.PR.HTMLURL, local.URL), "err", err) + markRepoRefreshFailed(subj.repo) + continue + } + if err := o.store.WriteSCMObservation(ctx, finalPR, finalChecks, nil, nil, ports.ReviewWritePreserve); err != nil { + o.logger.Error("scm observer: DB lifecycle acknowledgement failed", "session", subj.session.ID, "pr", finalPR.URL, "err", err) + markRepoRefreshFailed(subj.repo) + continue + } + } + prRefreshOK[key] = true + } + for key, ok := range prRefreshOK { + if !ok { + continue + } + if pending, found := selection.commitETags[key]; found { + o.cacheSetString(o.Cache.CommitChecksETag, &o.Cache.commitOrder, pending.key, pending.value) + } + if reviewModes[key] != ports.ReviewWritePreserve { + o.cacheSetTime(o.Cache.LastReviewPollAt, &o.Cache.lastReviewPollOrder, key, now) + } + } + for key, ok := range repoRefreshOK { + if !ok { + continue + } + if etag := repoGuards[key].result.ETag; etag != "" { + o.cacheSetString(o.Cache.RepoPRListETag, &o.Cache.repoOrder, key, etag) + } + } + return nil +} + +func (o *Observer) checkCredentials(ctx context.Context) (bool, error) { + if o.credentialsChecked { + return true, nil + } + if err := ctx.Err(); err != nil { + return false, err + } + checker, ok := o.provider.(credentialChecker) + if !ok { + o.credentialsChecked = true + return true, nil + } + available, err := checker.SCMCredentialsAvailable(ctx) + if err != nil { + o.logger.Warn("scm observer credentials check failed; will retry", "err", err) + return false, nil + } + o.credentialsChecked = true + if !available { + o.disabled = true + o.logger.Warn("scm observer disabled: provider credentials unavailable") + return false, nil + } + return true, nil +} + +func (o *Observer) discoverSubjects(ctx context.Context) (map[string]*subject, error) { + sessions, err := o.store.ListAllSessions(ctx) + if err != nil { + return nil, err + } + projects := map[domain.ProjectID]domain.ProjectRecord{} + out := map[string]*subject{} + for _, sess := range sessions { + if sess.IsTerminated { + continue + } + branch := strings.TrimSpace(sess.Metadata.Branch) + if branch == "" { + continue + } + proj, ok := projects[sess.ProjectID] + if !ok { + p, found, err := o.store.GetProject(ctx, string(sess.ProjectID)) + if err != nil { + return nil, err + } + if !found || !p.ArchivedAt.IsZero() { + continue + } + projects[sess.ProjectID] = p + proj = p + } + repo, ok := o.provider.ParseRepository(proj.RepoOriginURL) + if !ok { + o.logger.Debug("scm observer: project has no supported SCM origin", "project", proj.ID, "origin", proj.RepoOriginURL) + continue + } + prs, err := o.store.ListPRsBySession(ctx, sess.ID) + if err != nil { + return nil, err + } + known, hasPR := chooseKnownPR(prs) + s := &subject{session: sess, repo: repo, branch: branch, known: known, hasPR: hasPR} + if hasPR && known.Number > 0 { + key := prKey(repo, known.Number) + if existing, ok := out[key]; ok { + o.logger.Warn("scm observer: duplicate tracked PR ownership skipped", "pr", key, "kept_session", existing.session.ID, "skipped_session", sess.ID) + continue + } + out[key] = s + } else { + out["session:"+string(sess.ID)] = s + } + } + return out, nil +} + +func chooseKnownPR(prs []domain.PullRequest) (domain.PullRequest, bool) { + if len(prs) == 0 { + return domain.PullRequest{}, false + } + for _, pr := range prs { + if pr.Number > 0 && !pr.Merged && !pr.Closed { + return pr, true + } + } + for _, pr := range prs { + if pr.Number > 0 { + return pr, true + } + } + return domain.PullRequest{}, false +} + +func (o *Observer) guardRepos(ctx context.Context, subjects map[string]*subject) map[string]repoGuardState { + repos := map[string]ports.SCMRepo{} + for _, s := range subjects { + repos[prKey(s.repo, 0)] = s.repo + } + out := map[string]repoGuardState{} + for key, repo := range repos { + prev, had := o.Cache.RepoPRListETag[key] + res, err := o.provider.RepoPRListGuard(ctx, repo, prev) + if err != nil { + o.logger.Error("scm observer: repo PR-list guard failed", "repo", repoFullName(repo), "err", err) + out[key] = repoGuardState{hadETag: had, err: err} + continue + } + out[key] = repoGuardState{result: res, hadETag: had} + } + return out +} + +func pendingRepoRefreshes(guards map[string]repoGuardState) map[string]bool { + out := map[string]bool{} + for key, g := range guards { + if g.err == nil && g.result.ETag != "" { + out[key] = true + } + } + return out +} + +func (o *Observer) detectMissingPRs(ctx context.Context, subjects map[string]*subject, guards map[string]repoGuardState, now time.Time, markRepoFailed func(ports.SCMRepo)) { + for oldKey, s := range subjects { + if s.hasPR { + continue + } + g := guards[prKey(s.repo, 0)] + if g.err != nil { + continue + } + if g.result.NotModified && g.hadETag { + continue + } + pr, err := o.provider.DetectPRByBranch(ctx, s.repo, s.branch) + if err != nil { + o.logger.Debug("scm observer: no PR detected for branch", "session", s.session.ID, "branch", s.branch, "err", err) + if markRepoFailed != nil && !errors.Is(err, ports.ErrSCMNotFound) { + markRepoFailed(s.repo) + } + continue + } + if pr.Number <= 0 { + continue + } + newKey := prKey(s.repo, pr.Number) + if existing, ok := subjects[newKey]; ok && existing != s { + o.logger.Warn("scm observer: detected PR is already tracked by another session", "pr", newKey, "kept_session", existing.session.ID, "skipped_session", s.session.ID) + continue + } + s.known = domain.PullRequest{URL: pr.URL, SessionID: s.session.ID, Number: pr.Number, SourceBranch: pr.SourceBranch, TargetBranch: pr.TargetBranch, HeadSHA: pr.HeadSHA, Provider: s.repo.Provider, Host: s.repo.Host, Repo: repoFullName(s.repo), UpdatedAt: now} + s.hasPR = true + delete(subjects, oldKey) + subjects[newKey] = s + } +} + +func (o *Observer) selectRefreshCandidates(ctx context.Context, subjects map[string]*subject, guards map[string]repoGuardState, markRepoFailed func(ports.SCMRepo)) refreshSelection { + selection := refreshSelection{ + subjectsByPR: map[string]*subject{}, + commitETags: map[string]pendingCacheString{}, + candidateKeys: map[string]bool{}, + } + for _, s := range subjects { + if !s.hasPR || s.known.Number <= 0 { + continue + } + key := prKey(s.repo, s.known.Number) + selection.subjectsByPR[key] = s + candidate := missingLocalState(s.known) + g := guards[prKey(s.repo, 0)] + if g.err == nil && !g.result.NotModified { + candidate = true + } + if s.known.HeadSHA != "" { + commitKey := commitKey(s.repo, s.known.HeadSHA) + prev := o.Cache.CommitChecksETag[commitKey] + res, err := o.provider.CommitChecksGuard(ctx, s.repo, s.known.HeadSHA, prev) + if err != nil { + o.logger.Error("scm observer: commit check-runs guard failed", "pr", s.known.URL, "sha", s.known.HeadSHA, "err", err) + if markRepoFailed != nil { + markRepoFailed(s.repo) + } + } else if !res.NotModified { + candidate = true + if res.ETag != "" { + selection.commitETags[key] = pendingCacheString{key: commitKey, value: res.ETag} + } + } + } + if candidate { + selection.refs = append(selection.refs, ports.SCMPRRef{Repo: s.repo, Number: s.known.Number, URL: s.known.URL}) + selection.candidateKeys[key] = true + } + } + return selection +} + +func missingLocalState(pr domain.PullRequest) bool { + return pr.URL == "" || pr.HeadSHA == "" || pr.MetadataHash == "" || pr.CIHash == "" +} + +func (o *Observer) enrichFailureLogs(ctx context.Context, obs *ports.SCMObservation, local domain.PullRequest) { + if obs.CI.Summary != string(domain.CIFailing) || obs.CI.FailedFingerprint == "" { + return + } + if strings.HasPrefix(local.CIHash, obs.CI.FailedFingerprint+":") { + checks, err := o.store.ListChecks(ctx, local.URL) + if err == nil && applyStoredFailedLogTails(obs, checks) { + return + } + } + tails := make([]string, 0, len(obs.CI.FailedChecks)) + checksByProviderID := make(map[string][]int, len(obs.CI.Checks)) + for i := range obs.CI.Checks { + key := checkProviderKey(obs.CI.Checks[i]) + checksByProviderID[key] = append(checksByProviderID[key], i) + } + for i := range obs.CI.FailedChecks { + tail := obs.CI.FailedChecks[i].LogTail + if tail == "" && obs.CI.FailedChecks[i].ProviderID != "" { + var err error + tail, err = o.provider.FetchFailedCheckLogTail(ctx, ports.SCMRepo{Provider: obs.Provider, Host: obs.Host, Repo: obs.Repo, Owner: ownerOf(obs.Repo), Name: nameOf(obs.Repo)}, obs.CI.FailedChecks[i]) + if err != nil { + tail = "" + } + } + obs.CI.FailedChecks[i].LogTail = tail + if tail != "" { + tails = append(tails, tail) + } + for _, j := range checksByProviderID[checkProviderKey(obs.CI.FailedChecks[i])] { + obs.CI.Checks[j].LogTail = tail + } + } + obs.CI.FailureLogTail = strings.Join(tails, "\n---\n") +} + +func checkProviderKey(ch ports.SCMCheckObservation) string { + return ch.Name + "\x00" + ch.ProviderID +} + +func applyStoredFailedLogTails(obs *ports.SCMObservation, checks []domain.PullRequestCheck) bool { + tailsByName := map[string]string{} + for _, ch := range checks { + if obs.CI.HeadSHA != "" && ch.CommitHash != "" && ch.CommitHash != obs.CI.HeadSHA { + continue + } + if ch.LogTail != "" && (ch.Status == domain.PRCheckFailed || ch.Status == domain.PRCheckCancelled) { + tailsByName[ch.Name] = ch.LogTail + } + } + if len(tailsByName) == 0 { + return false + } + tails := make([]string, 0, len(obs.CI.FailedChecks)) + for i := range obs.CI.FailedChecks { + tail := tailsByName[obs.CI.FailedChecks[i].Name] + if tail == "" { + return false + } + obs.CI.FailedChecks[i].LogTail = tail + tails = append(tails, tail) + } + for i := range obs.CI.Checks { + if tail := tailsByName[obs.CI.Checks[i].Name]; tail != "" { + obs.CI.Checks[i].LogTail = tail + } + } + obs.CI.FailureLogTail = strings.Join(tails, "\n---\n") + return true +} + +func (o *Observer) refreshReviews(ctx context.Context, subjects map[string]*subject, observations map[string]ports.SCMObservation, subjectsByPR map[string]*subject, reviewModes map[string]ports.ReviewWriteMode, localOnlyObservations, reviewStale map[string]bool, now time.Time) { + for _, s := range subjects { + if !s.hasPR || s.known.Number <= 0 { + continue + } + pkey := prKey(s.repo, s.known.Number) + obs, hasObs := observations[pkey] + decision := string(s.known.Review) + if hasObs && obs.Review.Decision != "" { + decision = obs.Review.Decision + } + if !o.needsReviewRefresh(pkey, s.known, decision, hasObs, now) { + continue + } + review, err := o.provider.FetchReviewThreads(ctx, ports.SCMPRRef{Repo: s.repo, Number: s.known.Number, URL: s.known.URL}) + if err != nil { + o.logger.Error("scm observer: review refresh failed", "pr", s.known.URL, "err", err) + o.cacheSetBool(o.Cache.ReviewRefreshFailed, &o.Cache.reviewFailedOrder, pkey, true) + if hasObs { + obs.Review.Decision = string(s.known.Review) + obs.Review.Threads = nil + observations[pkey] = obs + subjectsByPR[pkey] = s + reviewStale[pkey] = true + } + continue + } + if !hasObs { + checks, err := o.store.ListChecks(ctx, s.known.URL) + if err != nil { + o.logger.Error("scm observer: list local checks for review-only refresh failed", "pr", s.known.URL, "err", err) + } + obs = observationFromLocal(s.repo, s.known, checks) + localOnlyObservations[pkey] = true + } + if review.Decision != "" { + obs.Review.Decision = review.Decision + } + obs.Review.Threads = review.Threads + obs.Review.Partial = review.Partial + obs.ObservedAt = now + observations[pkey] = obs + subjectsByPR[pkey] = s + if review.Partial { + reviewModes[pkey] = ports.ReviewWriteMerge + } else { + reviewModes[pkey] = ports.ReviewWriteReplace + } + cacheDelete(o.Cache.ReviewRefreshFailed, &o.Cache.reviewFailedOrder, pkey) + } +} + +func (o *Observer) needsReviewRefresh(key string, local domain.PullRequest, decision string, hasObs bool, now time.Time) bool { + if o.Cache.ReviewRefreshFailed[key] { + return true + } + if local.ReviewHash == "" { + return true + } + if decision == string(domain.ReviewChangesRequest) { + last := o.Cache.LastReviewPollAt[key] + return last.IsZero() || now.Sub(last) >= o.reviewInterval + } + if hasObs && decision != string(local.Review) { + return true + } + if local.ReviewHash != "" && string(local.Review) == string(domain.ReviewChangesRequest) && decision != string(domain.ReviewChangesRequest) { + return true + } + return false +} + +func (o *Observer) prepareForPersistence(obs ports.SCMObservation, local domain.PullRequest, opts persistenceOptions, now time.Time) ports.SCMObservation { + metadataHash := metadataSemanticHash(obs) + if opts.preserveLocalMetadataHash { + metadataHash = local.MetadataHash + } + ciHash := ciSemanticHash(obs.CI) + if opts.preserveLocalCIHash { + ciHash = local.CIHash + } + reviewHash := local.ReviewHash + if !opts.preserveLocalReviewHash && (opts.reviewFetched || local.ReviewHash == "" || obs.Review.Decision != string(local.Review)) { + reviewHash = reviewSemanticHash(obs.Review) + } + obs.Changed = ports.SCMChanged{ + Metadata: metadataHash != local.MetadataHash, + CI: ciHash != local.CIHash, + Review: reviewHash != local.ReviewHash, + } + obs.PR.State = firstNonEmpty(obs.PR.State, normalizePRState(obs.PR.Draft, obs.PR.Merged, obs.PR.Closed)) + obs.ObservedAt = firstTime(obs.ObservedAt, now) + return obs +} + +func domainFromObservation(sessionID domain.SessionID, obs ports.SCMObservation, local domain.PullRequest, opts persistenceOptions, now time.Time) (domain.PullRequest, []domain.PullRequestCheck, []domain.PullRequestReviewThread, []domain.PullRequestComment) { + metadataHash := metadataSemanticHash(obs) + if opts.preserveLocalMetadataHash { + metadataHash = local.MetadataHash + } + ciHash := ciSemanticHash(obs.CI) + if opts.preserveLocalCIHash { + ciHash = local.CIHash + } + reviewHash := reviewSemanticHash(obs.Review) + reviewDecision := domain.ReviewDecision(firstNonEmpty(obs.Review.Decision, string(domain.ReviewNone))) + if opts.preserveLocalReviewDecision { + reviewDecision = local.Review + } + if opts.preserveLocalReviewHash { + reviewHash = local.ReviewHash + } else if !opts.reviewFetched && local.ReviewHash != "" && reviewDecision == local.Review { + reviewHash = local.ReviewHash + } + observedAt := obs.ObservedAt + if !obs.Changed.Metadata && !obs.Changed.CI && !local.ObservedAt.IsZero() { + observedAt = local.ObservedAt + } + ciObservedAt := local.CIObservedAt + if obs.Changed.CI || ciObservedAt.IsZero() { + ciObservedAt = obs.ObservedAt + } + reviewObservedAt := local.ReviewObservedAt + if opts.reviewFetched || reviewObservedAt.IsZero() { + reviewObservedAt = obs.ObservedAt + } + pr := domain.PullRequest{ + URL: firstNonEmpty(obs.PR.URL, obs.PR.HTMLURL), + SessionID: sessionID, + Number: obs.PR.Number, + Draft: obs.PR.Draft, + Merged: obs.PR.Merged, + Closed: obs.PR.Closed, + CI: domain.CIState(firstNonEmpty(obs.CI.Summary, string(domain.CIUnknown))), + Review: reviewDecision, + Mergeability: domain.Mergeability(firstNonEmpty(obs.Mergeability.State, string(domain.MergeUnknown))), + UpdatedAt: now, + Provider: obs.Provider, + Host: obs.Host, + Repo: obs.Repo, + SourceBranch: obs.PR.SourceBranch, + TargetBranch: obs.PR.TargetBranch, + HeadSHA: obs.PR.HeadSHA, + Title: obs.PR.Title, + Additions: obs.PR.Additions, + Deletions: obs.PR.Deletions, + ChangedFiles: obs.PR.ChangedFiles, + Author: obs.PR.Author, + BaseSHA: obs.PR.BaseSHA, + MergeCommitSHA: obs.PR.MergeCommitSHA, + ProviderState: obs.PR.ProviderState, + ProviderMergeable: obs.PR.ProviderMergeable, + ProviderMergeStateStatus: obs.PR.ProviderMergeStateStatus, + HTMLURL: obs.PR.HTMLURL, + CreatedAtProvider: obs.PR.CreatedAtProvider, + UpdatedAtProvider: obs.PR.UpdatedAtProvider, + MergedAtProvider: obs.PR.MergedAtProvider, + ClosedAtProvider: obs.PR.ClosedAtProvider, + MetadataHash: metadataHash, + CIHash: ciHash, + ReviewHash: reviewHash, + ObservedAt: observedAt, + CIObservedAt: ciObservedAt, + ReviewObservedAt: reviewObservedAt, + } + checks := make([]domain.PullRequestCheck, 0, len(obs.CI.Checks)) + for _, ch := range obs.CI.Checks { + checks = append(checks, domain.PullRequestCheck{Name: ch.Name, CommitHash: obs.CI.HeadSHA, Status: domain.PRCheckStatus(ch.Status), Conclusion: ch.Conclusion, URL: ch.URL, Details: ch.ProviderID, LogTail: ch.LogTail, CreatedAt: now}) + } + threads := make([]domain.PullRequestReviewThread, 0, len(obs.Review.Threads)) + commentCount := 0 + for _, th := range obs.Review.Threads { + commentCount += len(th.Comments) + } + comments := make([]domain.PullRequestComment, 0, commentCount) + for _, th := range obs.Review.Threads { + threads = append(threads, domain.PullRequestReviewThread{ThreadID: th.ID, Path: th.Path, Line: th.Line, Resolved: th.Resolved, IsBot: th.IsBot, SemanticHash: threadSemanticHash(th), UpdatedAt: now}) + for _, c := range th.Comments { + comments = append(comments, domain.PullRequestComment{ThreadID: th.ID, ID: c.ID, Author: c.Author, File: th.Path, Line: th.Line, Body: c.Body, URL: c.URL, Resolved: th.Resolved, IsBot: c.IsBot || th.IsBot, CreatedAt: now}) + } + } + return pr, checks, threads, comments +} + +func observationFromLocal(repo ports.SCMRepo, pr domain.PullRequest, checks []domain.PullRequestCheck) ports.SCMObservation { + return ports.SCMObservation{ + Fetched: true, + Provider: firstNonEmpty(pr.Provider, repo.Provider), + Host: firstNonEmpty(pr.Host, repo.Host), + Repo: firstNonEmpty(pr.Repo, repoFullName(repo)), + PR: ports.SCMPRObservation{URL: pr.URL, Number: pr.Number, State: normalizePRState(pr.Draft, pr.Merged, pr.Closed), Draft: pr.Draft, Merged: pr.Merged, Closed: pr.Closed, SourceBranch: pr.SourceBranch, TargetBranch: pr.TargetBranch, HeadSHA: pr.HeadSHA, Title: pr.Title, Additions: pr.Additions, Deletions: pr.Deletions, ChangedFiles: pr.ChangedFiles, Author: pr.Author, BaseSHA: pr.BaseSHA, MergeCommitSHA: pr.MergeCommitSHA, ProviderState: pr.ProviderState, ProviderMergeable: pr.ProviderMergeable, ProviderMergeStateStatus: pr.ProviderMergeStateStatus, HTMLURL: pr.HTMLURL, CreatedAtProvider: pr.CreatedAtProvider, UpdatedAtProvider: pr.UpdatedAtProvider, MergedAtProvider: pr.MergedAtProvider, ClosedAtProvider: pr.ClosedAtProvider}, + CI: ciObservationFromLocal(pr, checks), + Review: ports.SCMReviewObservation{Decision: string(pr.Review)}, + Mergeability: mergeabilityObservationFromLocal(pr), + } +} + +func ciObservationFromLocal(pr domain.PullRequest, checks []domain.PullRequestCheck) ports.SCMCIObservation { + ci := ports.SCMCIObservation{ + Summary: firstNonEmpty(string(pr.CI), string(domain.CIUnknown)), + HeadSHA: pr.HeadSHA, + FailedFingerprint: failedFingerprintFromCIHash(pr.CIHash), + } + tails := []string{} + for _, ch := range checks { + if pr.HeadSHA != "" && ch.CommitHash != "" && ch.CommitHash != pr.HeadSHA { + continue + } + if ci.HeadSHA == "" { + ci.HeadSHA = ch.CommitHash + } + check := ports.SCMCheckObservation{ + Name: ch.Name, + Status: string(ch.Status), + Conclusion: ch.Conclusion, + URL: ch.URL, + LogTail: ch.LogTail, + ProviderID: ch.Details, + } + ci.Checks = append(ci.Checks, check) + if ch.Status == domain.PRCheckFailed || ch.Status == domain.PRCheckCancelled { + ci.FailedChecks = append(ci.FailedChecks, check) + if ch.LogTail != "" { + tails = append(tails, ch.LogTail) + } + } + } + ci.FailureLogTail = strings.Join(tails, "\n---\n") + return ci +} + +func failedFingerprintFromCIHash(hash string) string { + before, _, ok := strings.Cut(hash, ":") + if !ok { + return "" + } + return before +} + +func mergeabilityObservationFromLocal(pr domain.PullRequest) ports.SCMMergeabilityObservation { + out := mergeabilityFromProviderFacts(pr.ProviderMergeable, pr.ProviderMergeStateStatus, string(pr.CI), string(pr.Review), pr.Draft) + if pr.Mergeability != "" && out.State != string(pr.Mergeability) { + out = ports.SCMMergeabilityObservation{State: string(pr.Mergeability)} + } else if pr.Mergeability != "" { + out.State = string(pr.Mergeability) + } + switch domain.Mergeability(out.State) { + case domain.MergeMergeable: + out.Mergeable = true + case domain.MergeConflicting: + out.Conflict = true + if len(out.Blockers) == 0 { + out.Blockers = append(out.Blockers, "conflicts") + } + case domain.MergeBlocked: + if len(out.Blockers) == 0 { + out.Blockers = mergeBlockersFromLocal(pr) + } + } + return out +} + +func mergeBlockersFromLocal(pr domain.PullRequest) []string { + blockers := []string{} + if pr.Draft { + blockers = append(blockers, "draft") + } + if pr.CI == domain.CIFailing { + blockers = append(blockers, "ci_failing") + } + switch pr.Review { + case domain.ReviewChangesRequest: + blockers = append(blockers, "changes_requested") + case domain.ReviewRequired: + blockers = append(blockers, "review_required") + } + if len(blockers) == 0 { + blockers = append(blockers, "blocked_by_provider") + } + return blockers +} + +func mergeabilityFromProviderFacts(providerMergeable, providerMergeState, ci, review string, draft bool) ports.SCMMergeabilityObservation { + state := strings.ToUpper(strings.TrimSpace(providerMergeState)) + mergeable := strings.ToUpper(strings.TrimSpace(providerMergeable)) + out := ports.SCMMergeabilityObservation{State: string(domain.MergeUnknown)} + addBlocker := func(b string) { out.Blockers = append(out.Blockers, b) } + if state == "DIRTY" || mergeable == "CONFLICTING" { + out.State = string(domain.MergeConflicting) + out.Conflict = true + addBlocker("conflicts") + return out + } + if state == "BEHIND" || state == "BEHIND_BASE" { + out.BehindBase = true + addBlocker("behind_base") + } + if state == "BLOCKED" { + out.State = string(domain.MergeBlocked) + addBlocker("blocked_by_provider") + } + if draft { + out.State = string(domain.MergeBlocked) + addBlocker("draft") + } + if ci == string(domain.CIFailing) { + out.State = string(domain.MergeBlocked) + addBlocker("ci_failing") + } + switch review { + case string(domain.ReviewChangesRequest): + out.State = string(domain.MergeBlocked) + addBlocker("changes_requested") + case string(domain.ReviewRequired): + out.State = string(domain.MergeBlocked) + addBlocker("review_required") + } + if out.State == string(domain.MergeBlocked) { + return out + } + if state == "UNSTABLE" { + out.State = string(domain.MergeUnstable) + return out + } + if mergeable == "MERGEABLE" && (state == "CLEAN" || state == "HAS_HOOKS" || state == "") && + (review == "" || review == string(domain.ReviewApproved) || review == string(domain.ReviewNone)) && !draft { + out.State = string(domain.MergeMergeable) + out.Mergeable = true + return out + } + return out +} + +func chunks[T any](in []T, n int) [][]T { + if n <= 0 || len(in) == 0 { + return nil + } + out := make([][]T, 0, (len(in)+n-1)/n) + for len(in) > 0 { + end := n + if len(in) < end { + end = len(in) + } + out = append(out, in[:end]) + in = in[end:] + } + return out +} + +func metadataSemanticHash(obs ports.SCMObservation) string { + return stableHash(map[string]any{"provider": obs.Provider, "host": obs.Host, "repo": obs.Repo, "pr": obs.PR, "mergeability": obs.Mergeability}) +} + +func ciSemanticHash(ci ports.SCMCIObservation) string { + h := stableHash(map[string]any{"summary": ci.Summary, "head": ci.HeadSHA, "checks": ci.Checks, "failed": ci.FailedChecks, "tail": ci.FailureLogTail}) + if ci.FailedFingerprint != "" { + return ci.FailedFingerprint + ":" + h + } + return h +} + +func reviewSemanticHash(review ports.SCMReviewObservation) string { + type reviewHashPayload struct { + Decision string + Threads []ports.SCMReviewThreadObservation + Partial bool `json:",omitempty"` + } + return stableHash(reviewHashPayload{Decision: review.Decision, Threads: review.Threads, Partial: review.Partial}) +} + +func threadSemanticHash(th ports.SCMReviewThreadObservation) string { + return stableHash(th) +} + +func stableHash(v any) string { + b, err := json.Marshal(v) + if err != nil { + b = []byte(fmt.Sprintf("%#v", v)) + } + sum := sha256.Sum256(b) + return hex.EncodeToString(sum[:]) +} + +func prKeyFromObs(obs ports.SCMObservation) string { + if obs.Repo == "" || obs.PR.Number <= 0 { + return "" + } + return obs.Provider + ":" + obs.Host + ":" + obs.Repo + "#" + fmt.Sprint(obs.PR.Number) +} + +func prKey(repo ports.SCMRepo, number int) string { + base := repo.Provider + ":" + repo.Host + ":" + repoFullName(repo) + if number <= 0 { + return base + } + return base + "#" + fmt.Sprint(number) +} + +func commitKey(repo ports.SCMRepo, sha string) string { return prKey(repo, 0) + "@" + sha } + +func repoFullName(repo ports.SCMRepo) string { + if repo.Repo != "" { + return repo.Repo + } + return repo.Owner + "/" + repo.Name +} + +func ownerOf(full string) string { + parts := strings.SplitN(full, "/", 2) + if len(parts) == 2 { + return parts[0] + } + return "" +} + +func nameOf(full string) string { + parts := strings.SplitN(full, "/", 2) + if len(parts) == 2 { + return parts[1] + } + return full +} + +func firstNonEmpty(vals ...string) string { + for _, v := range vals { + if strings.TrimSpace(v) != "" { + return v + } + } + return "" +} + +func firstTime(a, b time.Time) time.Time { + if !a.IsZero() { + return a + } + return b +} + +func normalizePRState(draft, merged, closed bool) string { + switch { + case merged: + return string(domain.PRStateMerged) + case closed: + return string(domain.PRStateClosed) + case draft: + return string(domain.PRStateDraft) + default: + return string(domain.PRStateOpen) + } +} + +func scrubLine(s string) string { + s = strings.ReplaceAll(s, "\n", " ") + s = strings.ReplaceAll(s, "\r", " ") + return strings.TrimSpace(s) +} + +func (o *Observer) cacheSetString(m map[string]string, order *[]string, key, value string) { + if _, ok := m[key]; !ok { + *order = append(*order, key) + } + m[key] = value + o.evictStrings(m, order) +} + +func (o *Observer) cacheSetTime(m map[string]time.Time, order *[]string, key string, value time.Time) { + if _, ok := m[key]; !ok { + *order = append(*order, key) + } + m[key] = value + for len(*order) > o.Cache.max { + evict := (*order)[0] + *order = (*order)[1:] + delete(m, evict) + } +} + +func (o *Observer) cacheSetBool(m map[string]bool, order *[]string, key string, value bool) { + if _, ok := m[key]; !ok { + *order = append(*order, key) + } + m[key] = value + for len(*order) > o.Cache.max { + evict := (*order)[0] + *order = (*order)[1:] + delete(m, evict) + } +} + +func cacheDelete[V any](m map[string]V, order *[]string, key string) { + if _, ok := m[key]; !ok { + return + } + delete(m, key) + dst := (*order)[:0] + for _, cachedKey := range *order { + if cachedKey != key { + dst = append(dst, cachedKey) + } + } + *order = dst +} + +func (o *Observer) evictStrings(m map[string]string, order *[]string) { + for len(*order) > o.Cache.max { + evict := (*order)[0] + *order = (*order)[1:] + delete(m, evict) + } +} diff --git a/backend/internal/observe/scm/observer_test.go b/backend/internal/observe/scm/observer_test.go new file mode 100644 index 00000000..55c61823 --- /dev/null +++ b/backend/internal/observe/scm/observer_test.go @@ -0,0 +1,833 @@ +package scm + +// This file tests the SCM observer orchestration contract with fake provider, +// store, and lifecycle collaborators so ETag decisions, batching, log fetching, +// review cadence, semantic hashes, and notification behavior stay provider-neutral. + +import ( + "context" + "errors" + "fmt" + "io" + "log/slog" + "sync" + "testing" + "time" + + "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" +) + +var testRepo = ports.SCMRepo{Provider: "github", Host: "github.com", Owner: "o", Name: "r", Repo: "o/r"} + +type fakeStore struct { + mu sync.Mutex + + sessions []domain.SessionRecord + projects map[string]domain.ProjectRecord + prs map[domain.SessionID][]domain.PullRequest + checks map[string][]domain.PullRequestCheck + writeErr error + + writes []fakeWrite + + listEntered chan struct{} + listRelease chan struct{} +} + +type fakeWrite struct { + pr domain.PullRequest + checks []domain.PullRequestCheck + comments []domain.PullRequestComment + reviewMode ports.ReviewWriteMode +} + +func (s *fakeStore) ListAllSessions(ctx context.Context) ([]domain.SessionRecord, error) { + if s.listEntered != nil { + select { + case <-s.listEntered: + default: + close(s.listEntered) + } + } + if s.listRelease != nil { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-s.listRelease: + } + } + s.mu.Lock() + defer s.mu.Unlock() + return append([]domain.SessionRecord(nil), s.sessions...), nil +} + +func (s *fakeStore) GetProject(_ context.Context, id string) (domain.ProjectRecord, bool, error) { + s.mu.Lock() + defer s.mu.Unlock() + p, ok := s.projects[id] + return p, ok, nil +} + +func (s *fakeStore) ListPRsBySession(_ context.Context, id domain.SessionID) ([]domain.PullRequest, error) { + s.mu.Lock() + defer s.mu.Unlock() + return append([]domain.PullRequest(nil), s.prs[id]...), nil +} + +func (s *fakeStore) ListChecks(_ context.Context, prURL string) ([]domain.PullRequestCheck, error) { + s.mu.Lock() + defer s.mu.Unlock() + return append([]domain.PullRequestCheck(nil), s.checks[prURL]...), nil +} + +func (s *fakeStore) WriteSCMObservation(_ context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, reviewMode ports.ReviewWriteMode) error { + s.mu.Lock() + defer s.mu.Unlock() + if s.writeErr != nil { + return s.writeErr + } + s.writes = append(s.writes, fakeWrite{pr: pr, checks: append([]domain.PullRequestCheck(nil), checks...), comments: append([]domain.PullRequestComment(nil), comments...), reviewMode: reviewMode}) + return nil +} + +type fakeProvider struct { + mu sync.Mutex + repoGuards map[string]ports.SCMGuardResult + checkGuards map[string]ports.SCMGuardResult + detected map[string]ports.SCMPRObservation + observations map[string]ports.SCMObservation + reviews map[string]ports.SCMReviewObservation + logTails map[string]string + fetchErr error + reviewErr error + + credentialGate bool + credentialOK bool + credentialErr error + credentialChecks int + repoGuardCalls int + detectCalls int + fetchBatches [][]ports.SCMPRRef + logCalls int + reviewCalls int +} + +func (p *fakeProvider) SCMCredentialsAvailable(context.Context) (bool, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.credentialChecks++ + if !p.credentialGate { + return true, nil + } + return p.credentialOK, p.credentialErr +} + +func (p *fakeProvider) ParseRepository(remote string) (ports.SCMRepo, bool) { + return testRepo, remote != "" +} +func (p *fakeProvider) RepoPRListGuard(_ context.Context, repo ports.SCMRepo, _ string) (ports.SCMGuardResult, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.repoGuardCalls++ + return p.repoGuards[prKey(repo, 0)], nil +} +func (p *fakeProvider) DetectPRByBranch(_ context.Context, _ ports.SCMRepo, branch string) (ports.SCMPRObservation, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.detectCalls++ + pr, ok := p.detected[branch] + if !ok { + return ports.SCMPRObservation{}, ports.ErrSCMNotFound + } + return pr, nil +} +func (p *fakeProvider) CommitChecksGuard(_ context.Context, repo ports.SCMRepo, sha, _ string) (ports.SCMGuardResult, error) { + return p.checkGuards[commitKey(repo, sha)], nil +} +func (p *fakeProvider) FetchPullRequests(_ context.Context, refs []ports.SCMPRRef) ([]ports.SCMObservation, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.fetchBatches = append(p.fetchBatches, append([]ports.SCMPRRef(nil), refs...)) + if p.fetchErr != nil { + return nil, p.fetchErr + } + out := make([]ports.SCMObservation, 0, len(refs)) + for _, ref := range refs { + if obs, ok := p.observations[prKey(ref.Repo, ref.Number)]; ok { + out = append(out, obs) + } + } + return out, nil +} +func (p *fakeProvider) FetchFailedCheckLogTail(_ context.Context, _ ports.SCMRepo, check ports.SCMCheckObservation) (string, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.logCalls++ + return p.logTails[check.Name], nil +} +func (p *fakeProvider) FetchReviewThreads(_ context.Context, ref ports.SCMPRRef) (ports.SCMReviewObservation, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.reviewCalls++ + if p.reviewErr != nil { + return ports.SCMReviewObservation{}, p.reviewErr + } + return p.reviews[prKey(ref.Repo, ref.Number)], nil +} + +type fakeLifecycle struct { + observed []ports.SCMObservation + err error +} + +func (l *fakeLifecycle) ApplySCMObservation(_ context.Context, _ domain.SessionID, obs ports.SCMObservation) error { + if l.err != nil { + return l.err + } + l.observed = append(l.observed, obs) + return nil +} + +func newTestObserver(store *fakeStore, provider *fakeProvider, lc Lifecycle, now time.Time) *Observer { + return New(provider, store, lc, Config{Clock: func() time.Time { return now }, Tick: time.Hour, Logger: quietSlog(), CacheMax: 128}) +} + +func quietSlog() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) } + +func testStoreWithSession() *fakeStore { + return &fakeStore{ + sessions: []domain.SessionRecord{{ID: "p-1", ProjectID: "p", Metadata: domain.SessionMetadata{Branch: "feat"}}}, + projects: map[string]domain.ProjectRecord{"p": {ID: "p", RepoOriginURL: "https://github.com/o/r.git"}}, + prs: map[domain.SessionID][]domain.PullRequest{}, + checks: map[string][]domain.PullRequestCheck{}, + } +} + +func testObs(num int) ports.SCMObservation { + return ports.SCMObservation{ + Fetched: true, Provider: "github", Host: "github.com", Repo: "o/r", + PR: ports.SCMPRObservation{URL: "https://github.com/o/r/pull/" + fmt.Sprint(num), Number: num, State: "open", SourceBranch: "feat", TargetBranch: "main", HeadSHA: "sha" + fmt.Sprint(num), Title: "PR"}, + CI: ports.SCMCIObservation{Summary: string(domain.CIPassing), HeadSHA: "sha" + fmt.Sprint(num), Checks: []ports.SCMCheckObservation{{Name: "build", Status: string(domain.PRCheckPassed), Conclusion: "success", URL: "ci"}}}, + Review: ports.SCMReviewObservation{Decision: string(domain.ReviewNone)}, + Mergeability: ports.SCMMergeabilityObservation{State: string(domain.MergeMergeable), Mergeable: true}, + } +} + +func knownPR(num int) domain.PullRequest { + obs := testObs(num) + pr, _, _, _ := domainFromObservation("p-1", obs, domain.PullRequest{}, persistenceOptions{}, time.Unix(1, 0).UTC()) + return pr +} + +func TestStartAsyncPerformsImmediatePollAndStopsOnCancel(t *testing.T) { + store := testStoreWithSession() + store.listEntered = make(chan struct{}) + store.listRelease = make(chan struct{}) + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{}, observations: map[string]ports.SCMObservation{}} + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + ctx, cancel := context.WithCancel(context.Background()) + done := obs.Start(ctx) + select { + case <-store.listEntered: + case <-time.After(time.Second): + t.Fatal("initial poll did not start asynchronously") + } + cancel() + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("observer did not exit after context cancellation") + } +} + +func TestPoll_DisablesOnceWhenCredentialsUnavailable(t *testing.T) { + store := testStoreWithSession() + provider := &fakeProvider{ + credentialGate: true, + credentialOK: false, + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "v1"}}, + observations: map[string]ports.SCMObservation{}, + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.credentialChecks != 1 { + t.Fatalf("credential checks = %d, want one lazy check", provider.credentialChecks) + } + if provider.repoGuardCalls != 0 || provider.detectCalls != 0 || len(provider.fetchBatches) != 0 { + t.Fatalf("provider API calls should be skipped without credentials: guards=%d detects=%d batches=%d", + provider.repoGuardCalls, provider.detectCalls, len(provider.fetchBatches)) + } +} + +func TestPoll_RetriesTransientCredentialErrors(t *testing.T) { + store := testStoreWithSession() + provider := &fakeProvider{ + credentialGate: true, + credentialErr: errors.New("gh auth token failed transiently"), + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "v1"}}, + observations: map[string]ports.SCMObservation{}, + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if obs.credentialsChecked || obs.disabled { + t.Fatalf("transient credential error should not commit checked/disabled: checked=%v disabled=%v", obs.credentialsChecked, obs.disabled) + } + if provider.credentialChecks != 1 || provider.repoGuardCalls != 0 { + t.Fatalf("first poll should check credentials only: checks=%d repoGuards=%d", provider.credentialChecks, provider.repoGuardCalls) + } + + provider.mu.Lock() + provider.credentialErr = nil + provider.credentialOK = true + provider.mu.Unlock() + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if !obs.credentialsChecked || obs.disabled { + t.Fatalf("successful retry should commit checked without disabling: checked=%v disabled=%v", obs.credentialsChecked, obs.disabled) + } + if provider.credentialChecks != 2 || provider.repoGuardCalls != 1 { + t.Fatalf("second poll should retry credentials and continue: checks=%d repoGuards=%d", provider.credentialChecks, provider.repoGuardCalls) + } +} + +func TestPoll_RepoETag304SkipsDetectPR(t *testing.T) { + store := testStoreWithSession() + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "v1", NotModified: true}}, observations: map[string]ports.SCMObservation{}} + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "v1" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.detectCalls != 0 { + t.Fatalf("detectPR called on 304: %d", provider.detectCalls) + } +} + +func TestPoll_DetectPRNotFoundCommitsRepoETag(t *testing.T) { + store := testStoreWithSession() + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "v2"}}, + detected: map[string]ports.SCMPRObservation{}, + observations: map[string]ports.SCMObservation{}, + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "v1" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.detectCalls != 1 { + t.Fatalf("detectPR calls = %d, want 1", provider.detectCalls) + } + if got := obs.Cache.RepoPRListETag[prKey(testRepo, 0)]; got != "v2" { + t.Fatalf("repo ETag after not-found detection = %q, want v2", got) + } + if len(provider.fetchBatches) != 0 { + t.Fatalf("not-found branch should not fetch PR batch: %#v", provider.fetchBatches) + } +} + +func TestPoll_RepoETag200DetectsPRAndRefreshesSamePoll(t *testing.T) { + store := testStoreWithSession() + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "v2"}}, + detected: map[string]ports.SCMPRObservation{"feat": {URL: "https://github.com/o/r/pull/1", Number: 1, SourceBranch: "feat", TargetBranch: "main", HeadSHA: "sha1"}}, + observations: map[string]ports.SCMObservation{prKey(testRepo, 1): testObs(1)}, + } + lc := &fakeLifecycle{} + obs := newTestObserver(store, provider, lc, time.Unix(1, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.detectCalls != 1 { + t.Fatalf("detectPR calls = %d, want 1", provider.detectCalls) + } + if len(provider.fetchBatches) != 1 || len(provider.fetchBatches[0]) != 1 || provider.fetchBatches[0][0].Number != 1 { + t.Fatalf("new PR not refreshed in same poll: %#v", provider.fetchBatches) + } + if len(store.writes) < 1 || len(lc.observed) != 1 { + t.Fatalf("write/lifecycle missing: writes=%d lifecycle=%d", len(store.writes), len(lc.observed)) + } +} + +func TestPoll_CIETagChangeRefreshesWhenRepoUnchanged(t *testing.T) { + store := testStoreWithSession() + store.prs["p-1"] = []domain.PullRequest{knownPR(1)} + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, + checkGuards: map[string]ports.SCMGuardResult{commitKey(testRepo, "sha1"): {ETag: "ci2"}}, + observations: map[string]ports.SCMObservation{prKey(testRepo, 1): testObs(1)}, + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(2, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")] = "ci1" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(provider.fetchBatches) != 1 { + t.Fatalf("CI ETag 200 should trigger batch fetch, got %d", len(provider.fetchBatches)) + } +} + +func TestPoll_GraphQLBatchChunksAt25(t *testing.T) { + store := &fakeStore{projects: map[string]domain.ProjectRecord{"p": {ID: "p", RepoOriginURL: "https://github.com/o/r.git"}}, prs: map[domain.SessionID][]domain.PullRequest{}, checks: map[string][]domain.PullRequestCheck{}} + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo"}}, observations: map[string]ports.SCMObservation{}} + for i := 1; i <= 26; i++ { + id := domain.SessionID("p-" + fmt.Sprint(i)) + store.sessions = append(store.sessions, domain.SessionRecord{ID: id, ProjectID: "p", Metadata: domain.SessionMetadata{Branch: "b" + fmt.Sprint(i)}}) + pr := knownPR(i) + pr.SessionID = id + pr.MetadataHash = "" // force candidate + store.prs[id] = []domain.PullRequest{pr} + provider.observations[prKey(testRepo, i)] = testObs(i) + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(provider.fetchBatches) != 2 || len(provider.fetchBatches[0]) != 25 || len(provider.fetchBatches[1]) != 1 { + t.Fatalf("batch sizes = %#v", provider.fetchBatches) + } +} + +func TestPoll_FailingCIFetchesLogTailOnlyWhenFingerprintChanged(t *testing.T) { + failing := testObs(1) + failing.CI.Summary = string(domain.CIFailing) + failing.CI.Checks = []ports.SCMCheckObservation{{Name: "build", Status: string(domain.PRCheckFailed), Conclusion: "failure", ProviderID: "99"}} + failing.CI.FailedChecks = failing.CI.Checks + failing.CI.FailedFingerprint = "fp" + + store := testStoreWithSession() + local := knownPR(1) + local.CIHash = "old" + store.prs["p-1"] = []domain.PullRequest{local} + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo"}}, checkGuards: map[string]ports.SCMGuardResult{commitKey(testRepo, "sha1"): {ETag: "ci2"}}, observations: map[string]ports.SCMObservation{prKey(testRepo, 1): failing}, logTails: map[string]string{"build": "tail"}} + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.logCalls != 1 { + t.Fatalf("log calls = %d, want 1", provider.logCalls) + } + + provider.logCalls = 0 + store.writes = nil + withTail := failing + withTail.CI.Checks[0].LogTail = "tail" + withTail.CI.FailedChecks[0].LogTail = "tail" + withTail.CI.FailureLogTail = "tail" + local.CIHash = ciSemanticHash(withTail.CI) + store.prs["p-1"] = []domain.PullRequest{local} + store.checks[local.URL] = []domain.PullRequestCheck{{Name: "build", Status: domain.PRCheckFailed, LogTail: "tail"}} + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.logCalls != 0 { + t.Fatalf("unchanged fingerprint fetched logs again: %d", provider.logCalls) + } + if len(store.writes) != 0 { + t.Fatalf("unchanged failed fingerprint with stored log tail should not write, got %d writes", len(store.writes)) + } +} + +func TestEnrichFailureLogsDoesNotRefetchExistingTailOrMissingProviderID(t *testing.T) { + obsValue := testObs(1) + obsValue.CI.Summary = string(domain.CIFailing) + obsValue.CI.FailedFingerprint = "fp" + obsValue.CI.Checks = []ports.SCMCheckObservation{ + {Name: "build", Status: string(domain.PRCheckFailed), Conclusion: "failure", ProviderID: "99", LogTail: "provider supplied tail"}, + {Name: "lint", Status: string(domain.PRCheckFailed), Conclusion: "failure"}, + } + obsValue.CI.FailedChecks = append([]ports.SCMCheckObservation(nil), obsValue.CI.Checks...) + + provider := &fakeProvider{logTails: map[string]string{"build": "fetched tail", "lint": "should not fetch"}} + obs := newTestObserver(testStoreWithSession(), provider, &fakeLifecycle{}, time.Unix(1, 0).UTC()) + obs.enrichFailureLogs(context.Background(), &obsValue, domain.PullRequest{}) + + if provider.logCalls != 0 { + t.Fatalf("log calls = %d, want 0 when tail already exists or provider id is missing", provider.logCalls) + } + if got := obsValue.CI.FailedChecks[0].LogTail; got != "provider supplied tail" { + t.Fatalf("existing tail changed: got %q", got) + } + if got := obsValue.CI.FailedChecks[1].LogTail; got != "" { + t.Fatalf("tail without provider id = %q, want empty", got) + } + if got := obsValue.CI.FailureLogTail; got != "provider supplied tail" { + t.Fatalf("FailureLogTail = %q, want only existing tail", got) + } +} + +func TestPoll_ReviewPollingRespectsInterval(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + local.Review = domain.ReviewChangesRequest + local.ReviewHash = "old-review" + store.prs["p-1"] = []domain.PullRequest{local} + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, observations: map[string]ports.SCMObservation{}, reviews: map[string]ports.SCMReviewObservation{prKey(testRepo, 1): {Decision: string(domain.ReviewChangesRequest), Threads: []ports.SCMReviewThreadObservation{{ID: "t1", Path: "f.go", Line: 1, Comments: []ports.SCMReviewCommentObservation{{ID: "c1", Body: "fix"}}}}}}} + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(120, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + obs.Cache.LastReviewPollAt[prKey(testRepo, 1)] = time.Unix(90, 0).UTC() + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.reviewCalls != 0 { + t.Fatalf("review fetched before interval: %d", provider.reviewCalls) + } + obs.Cache.LastReviewPollAt[prKey(testRepo, 1)] = time.Unix(0, 0).UTC() + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.reviewCalls != 1 { + t.Fatalf("review not fetched after interval: %d", provider.reviewCalls) + } + if len(store.writes) == 0 || store.writes[0].reviewMode != ports.ReviewWriteReplace { + t.Fatalf("review refresh not persisted with replace mode: %#v", store.writes) + } +} + +func TestPoll_UnchangedHashesDoNotWriteOrNotify(t *testing.T) { + store := testStoreWithSession() + obsValue := testObs(1) + local := knownPR(1) + local.MetadataHash = metadataSemanticHash(obsValue) + local.CIHash = ciSemanticHash(obsValue.CI) + local.ReviewHash = reviewSemanticHash(obsValue.Review) + store.prs["p-1"] = []domain.PullRequest{local} + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo"}}, observations: map[string]ports.SCMObservation{prKey(testRepo, 1): obsValue}} + lc := &fakeLifecycle{} + obs := newTestObserver(store, provider, lc, time.Unix(1, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(store.writes) != 0 || len(lc.observed) != 0 { + t.Fatalf("unchanged hashes wrote/notified: writes=%d observed=%d", len(store.writes), len(lc.observed)) + } +} + +func TestPoll_ReviewHashDrivesPersistenceAndLifecycle(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + local.ReviewHash = "old" + local.Review = domain.ReviewChangesRequest + store.prs["p-1"] = []domain.PullRequest{local} + review := ports.SCMReviewObservation{Decision: string(domain.ReviewChangesRequest), Threads: []ports.SCMReviewThreadObservation{{ID: "t1", Path: "f.go", Line: 2, Comments: []ports.SCMReviewCommentObservation{{ID: "c1", Author: "ann", Body: "fix this"}}}}} + provider := &fakeProvider{repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, observations: map[string]ports.SCMObservation{}, reviews: map[string]ports.SCMReviewObservation{prKey(testRepo, 1): review}} + lc := &fakeLifecycle{} + obs := newTestObserver(store, provider, lc, time.Unix(200, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(store.writes) == 0 || len(store.writes[0].comments) != 1 { + t.Fatalf("review change not persisted: %#v", store.writes) + } + if len(store.writes) != 2 { + t.Fatalf("review change with lifecycle should write held-back facts then acknowledgement, got %d writes", len(store.writes)) + } + if store.writes[0].reviewMode != ports.ReviewWriteReplace { + t.Fatalf("initial review write mode = %v, want replace", store.writes[0].reviewMode) + } + if store.writes[1].reviewMode != ports.ReviewWritePreserve || len(store.writes[1].comments) != 0 { + t.Fatalf("lifecycle acknowledgement should preserve review rows, got mode=%v comments=%d", store.writes[1].reviewMode, len(store.writes[1].comments)) + } + if len(lc.observed) != 1 || !lc.observed[0].Changed.Review { + t.Fatalf("review change not notified: %#v", lc.observed) + } +} + +func TestPoll_PartialReviewRefreshUsesMergeMode(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + local.ReviewHash = "old" + local.Review = domain.ReviewChangesRequest + store.prs["p-1"] = []domain.PullRequest{local} + review := ports.SCMReviewObservation{ + Decision: string(domain.ReviewChangesRequest), + Partial: true, + Threads: []ports.SCMReviewThreadObservation{{ID: "t1", Path: "f.go", Line: 2, Comments: []ports.SCMReviewCommentObservation{{ID: "c1", Author: "ann", Body: "fix this"}}}}, + } + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, + reviews: map[string]ports.SCMReviewObservation{prKey(testRepo, 1): review}, + } + obs := newTestObserver(store, provider, nil, time.Unix(210, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(store.writes) != 1 { + t.Fatalf("writes = %#v, want one partial review merge", store.writes) + } + if store.writes[0].reviewMode != ports.ReviewWriteMerge { + t.Fatalf("review mode = %v, want merge", store.writes[0].reviewMode) + } + if store.writes[0].pr.ReviewHash != reviewSemanticHash(review) { + t.Fatalf("review hash = %q, want partial hash %q", store.writes[0].pr.ReviewHash, reviewSemanticHash(review)) + } +} + +func TestPoll_ReviewOnlyRefreshPreservesLocalCIAndMetadata(t *testing.T) { + store := testStoreWithSession() + localObs := testObs(1) + local := knownPR(1) + local.CI = domain.CIPassing + local.Review = domain.ReviewChangesRequest + local.ReviewHash = "old-review" + local.MetadataHash = metadataSemanticHash(localObs) + local.CIHash = ciSemanticHash(localObs.CI) + local.ObservedAt = time.Unix(10, 0).UTC() + local.CIObservedAt = time.Unix(11, 0).UTC() + local.ReviewObservedAt = time.Unix(12, 0).UTC() + store.prs["p-1"] = []domain.PullRequest{local} + store.checks[local.URL] = []domain.PullRequestCheck{ + {Name: "build", CommitHash: "sha1", Status: domain.PRCheckPassed, Conclusion: "success", URL: "ci"}, + {Name: "stale", CommitHash: "old-sha", Status: domain.PRCheckFailed, Conclusion: "failure", URL: "old-ci", LogTail: "old tail"}, + } + review := ports.SCMReviewObservation{ + Decision: string(domain.ReviewChangesRequest), + Threads: []ports.SCMReviewThreadObservation{{ID: "t1", Path: "f.go", Line: 2, Comments: []ports.SCMReviewCommentObservation{{ID: "c1", Author: "ann", Body: "fix"}}}}, + } + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, + reviews: map[string]ports.SCMReviewObservation{prKey(testRepo, 1): review}, + } + now := time.Unix(200, 0).UTC() + obs := newTestObserver(store, provider, &fakeLifecycle{}, now) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(store.writes) == 0 { + t.Fatalf("writes = %d, want review-only write", len(store.writes)) + } + write := store.writes[len(store.writes)-1] + if write.pr.MetadataHash != local.MetadataHash { + t.Fatalf("metadata hash changed on review-only refresh: got %q want %q", write.pr.MetadataHash, local.MetadataHash) + } + if write.pr.CIHash != local.CIHash { + t.Fatalf("CI hash changed on review-only refresh: got %q want %q", write.pr.CIHash, local.CIHash) + } + if !write.pr.ObservedAt.Equal(local.ObservedAt) { + t.Fatalf("ObservedAt changed on review-only refresh: got %s want %s", write.pr.ObservedAt, local.ObservedAt) + } + if !write.pr.CIObservedAt.Equal(local.CIObservedAt) { + t.Fatalf("CIObservedAt changed on review-only refresh: got %s want %s", write.pr.CIObservedAt, local.CIObservedAt) + } + if !write.pr.ReviewObservedAt.Equal(now) { + t.Fatalf("ReviewObservedAt = %s, want %s", write.pr.ReviewObservedAt, now) + } + if len(write.checks) != 1 || write.checks[0].Name != "build" { + t.Fatalf("review-only local reconstruction should include current-head checks only: %#v", write.checks) + } +} + +func TestPoll_ReviewFetchFailureDoesNotUpdateReviewDecision(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + local.Review = domain.ReviewChangesRequest + local.ReviewHash = reviewSemanticHash(ports.SCMReviewObservation{Decision: string(domain.ReviewChangesRequest), Threads: []ports.SCMReviewThreadObservation{{ID: "old", Comments: []ports.SCMReviewCommentObservation{{ID: "c-old", Body: "old"}}}}}) + obsValue := testObs(1) + obsValue.Review.Decision = string(domain.ReviewApproved) + local.MetadataHash = metadataSemanticHash(obsValue) + local.CIHash = ciSemanticHash(obsValue.CI) + store.prs["p-1"] = []domain.PullRequest{local} + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo2"}}, + observations: map[string]ports.SCMObservation{prKey(testRepo, 1): obsValue}, + reviewErr: errors.New("review API down"), + } + lc := &fakeLifecycle{} + obs := newTestObserver(store, provider, lc, time.Unix(300, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if provider.reviewCalls != 1 { + t.Fatalf("reviewCalls = %d, want 1", provider.reviewCalls) + } + if len(store.writes) != 0 || len(lc.observed) != 0 { + t.Fatalf("review fetch failure should not persist/notify stale review data: writes=%#v lifecycle=%#v", store.writes, lc.observed) + } + if !obs.Cache.ReviewRefreshFailed[prKey(testRepo, 1)] { + t.Fatalf("review fetch failure was not marked for retry") + } +} + +func TestPoll_SuccessfulReviewRefreshClearsRetryCacheSlot(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + local.Review = domain.ReviewChangesRequest + local.ReviewHash = "old-review" + store.prs["p-1"] = []domain.PullRequest{local} + review := ports.SCMReviewObservation{ + Decision: string(domain.ReviewChangesRequest), + Threads: []ports.SCMReviewThreadObservation{{ID: "t1", Path: "f.go", Line: 2, Comments: []ports.SCMReviewCommentObservation{{ID: "c1", Body: "fix"}}}}, + } + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, + reviews: map[string]ports.SCMReviewObservation{prKey(testRepo, 1): review}, + } + obs := newTestObserver(store, provider, nil, time.Unix(350, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + obs.cacheSetBool(obs.Cache.ReviewRefreshFailed, &obs.Cache.reviewFailedOrder, prKey(testRepo, 1), true) + + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if _, ok := obs.Cache.ReviewRefreshFailed[prKey(testRepo, 1)]; ok { + t.Fatalf("successful review refresh should delete retry map entry, got %#v", obs.Cache.ReviewRefreshFailed) + } + for _, key := range obs.Cache.reviewFailedOrder { + if key == prKey(testRepo, 1) { + t.Fatalf("successful review refresh should remove retry order slot, got %#v", obs.Cache.reviewFailedOrder) + } + } +} + +func TestPoll_DoesNotCommitCommitETagWhenFetchFails(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + store.prs["p-1"] = []domain.PullRequest{local} + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo", NotModified: true}}, + checkGuards: map[string]ports.SCMGuardResult{commitKey(testRepo, "sha1"): {ETag: "ci2"}}, + fetchErr: errors.New("graphql down"), + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(400, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo" + obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")] = "ci1" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if got := obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")]; got != "ci1" { + t.Fatalf("commit ETag advanced after failed fetch: got %q want ci1", got) + } +} + +func TestPoll_LifecycleFailureHoldsBackHashesForDurableRetry(t *testing.T) { + store := testStoreWithSession() + local := knownPR(1) + local.MetadataHash = "old-metadata" + local.CIHash = "old-ci" + store.prs["p-1"] = []domain.PullRequest{local} + changed := testObs(1) + changed.PR.Title = "changed title" + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo2"}}, + checkGuards: map[string]ports.SCMGuardResult{commitKey(testRepo, "sha1"): {ETag: "ci2"}}, + observations: map[string]ports.SCMObservation{prKey(testRepo, 1): changed}, + } + lc := &fakeLifecycle{err: errors.New("lifecycle down")} + obs := newTestObserver(store, provider, lc, time.Unix(500, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo1" + obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")] = "ci1" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(store.writes) != 1 { + t.Fatalf("DB write should succeed before lifecycle retry is queued, got writes=%#v", store.writes) + } + if store.writes[0].pr.Title != "changed title" { + t.Fatalf("DB write did not persist the observed PR state: %#v", store.writes[0].pr) + } + if store.writes[0].pr.MetadataHash != local.MetadataHash { + t.Fatalf("metadata hash advanced before lifecycle acknowledgement: got %q want %q", store.writes[0].pr.MetadataHash, local.MetadataHash) + } + if store.writes[0].pr.CIHash != local.CIHash { + t.Fatalf("CI hash advanced before lifecycle acknowledgement: got %q want %q", store.writes[0].pr.CIHash, local.CIHash) + } + if got := obs.Cache.RepoPRListETag[prKey(testRepo, 0)]; got != "repo1" { + t.Fatalf("repo ETag advanced after lifecycle failure: got %q want repo1", got) + } + if got := obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")]; got != "ci1" { + t.Fatalf("commit ETag advanced after lifecycle failure: got %q want ci1", got) + } + + lc.err = nil + store.prs["p-1"] = []domain.PullRequest{store.writes[0].pr} + restarted := newTestObserver(store, provider, lc, time.Unix(501, 0).UTC()) + if err := restarted.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(lc.observed) != 1 { + t.Fatalf("held-back semantic hashes did not force lifecycle retry after restart: %#v", lc.observed) + } + if len(store.writes) != 3 { + t.Fatalf("retry should write held-back facts then acknowledge hashes, got writes=%d", len(store.writes)) + } + last := store.writes[len(store.writes)-1].pr + if last.MetadataHash != metadataSemanticHash(changed) { + t.Fatalf("metadata hash not acknowledged after lifecycle success: got %q want %q", last.MetadataHash, metadataSemanticHash(changed)) + } + if last.CIHash != ciSemanticHash(changed.CI) { + t.Fatalf("CI hash not acknowledged after lifecycle success: got %q want %q", last.CIHash, ciSemanticHash(changed.CI)) + } +} + +func TestPoll_WriteFailureDoesNotAdvanceGuardETags(t *testing.T) { + store := testStoreWithSession() + store.writeErr = errors.New("db down") + local := knownPR(1) + local.MetadataHash = "old-metadata" + local.CIHash = "old-ci" + store.prs["p-1"] = []domain.PullRequest{local} + changed := testObs(1) + changed.PR.Title = "changed title" + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo2"}}, + checkGuards: map[string]ports.SCMGuardResult{commitKey(testRepo, "sha1"): {ETag: "ci2"}}, + observations: map[string]ports.SCMObservation{prKey(testRepo, 1): changed}, + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(550, 0).UTC()) + obs.Cache.RepoPRListETag[prKey(testRepo, 0)] = "repo1" + obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")] = "ci1" + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if got := obs.Cache.RepoPRListETag[prKey(testRepo, 0)]; got != "repo1" { + t.Fatalf("repo ETag advanced after write failure: got %q want repo1", got) + } + if got := obs.Cache.CommitChecksETag[commitKey(testRepo, "sha1")]; got != "ci1" { + t.Fatalf("commit ETag advanced after write failure: got %q want ci1", got) + } +} + +func TestPoll_DuplicateTrackedPRKeepsFirstSession(t *testing.T) { + store := &fakeStore{ + sessions: []domain.SessionRecord{ + {ID: "p-1", ProjectID: "p", Metadata: domain.SessionMetadata{Branch: "feat"}}, + {ID: "p-2", ProjectID: "p", Metadata: domain.SessionMetadata{Branch: "feat"}}, + }, + projects: map[string]domain.ProjectRecord{"p": {ID: "p", RepoOriginURL: "https://github.com/o/r.git"}}, + prs: map[domain.SessionID][]domain.PullRequest{}, + checks: map[string][]domain.PullRequestCheck{}, + } + pr1 := knownPR(1) + pr1.MetadataHash = "old-1" + pr2 := pr1 + pr2.SessionID = "p-2" + store.prs["p-1"] = []domain.PullRequest{pr1} + store.prs["p-2"] = []domain.PullRequest{pr2} + provider := &fakeProvider{ + repoGuards: map[string]ports.SCMGuardResult{prKey(testRepo, 0): {ETag: "repo2"}}, + observations: map[string]ports.SCMObservation{prKey(testRepo, 1): testObs(1)}, + } + obs := newTestObserver(store, provider, &fakeLifecycle{}, time.Unix(600, 0).UTC()) + if err := obs.Poll(context.Background()); err != nil { + t.Fatal(err) + } + if len(store.writes) == 0 { + t.Fatalf("writes = %d, want exactly one owner write", len(store.writes)) + } + if store.writes[0].pr.SessionID != "p-1" { + t.Fatalf("duplicate owner overwrote first session: wrote session %s", store.writes[0].pr.SessionID) + } +} diff --git a/backend/internal/ports/outbound.go b/backend/internal/ports/outbound.go index 2614f5bb..efab1ea7 100644 --- a/backend/internal/ports/outbound.go +++ b/backend/internal/ports/outbound.go @@ -15,6 +15,29 @@ type PRWriter interface { WritePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, comments []domain.PullRequestComment) error } +// ReviewWriteMode describes how an SCM observation should update normalized +// review-thread/comment rows. +type ReviewWriteMode int + +const ( + // ReviewWritePreserve leaves stored review rows untouched. Metadata/CI-only + // refreshes and failed review fetches use this mode. + ReviewWritePreserve ReviewWriteMode = iota + // ReviewWriteReplace treats the fetched review rows as a complete snapshot + // and replaces all stored review rows for the PR. + ReviewWriteReplace + // ReviewWriteMerge treats the fetched review rows as a partial window: + // fetched threads/comments are updated while older unseen rows are preserved. + ReviewWriteMerge +) + +// SCMWriter records provider-neutral SCM observations. reviewMode decides +// whether review facts are preserved, replaced with a complete snapshot, or +// merged as a bounded partial window. +type SCMWriter interface { + WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, reviewMode ReviewWriteMode) error +} + // AgentMessenger injects a message into a running agent. type AgentMessenger interface { Send(ctx context.Context, id domain.SessionID, message string) error diff --git a/backend/internal/ports/scm_observations.go b/backend/internal/ports/scm_observations.go new file mode 100644 index 00000000..86a44670 --- /dev/null +++ b/backend/internal/ports/scm_observations.go @@ -0,0 +1,233 @@ +// This file defines provider-neutral SCM DTOs used at the boundary between the +// SCM observer, persistence layer, and lifecycle manager. Provider adapters fill +// these structs with normalized facts so downstream code does not depend on raw +// GitHub payloads or GitHub-specific enum names. +package ports + +import ( + "errors" + "time" +) + +// ErrSCMNotFound is the provider-neutral sentinel for successful SCM lookups +// that found no matching resource, such as a branch with no open pull request. +var ErrSCMNotFound = errors.New("scm: not found") + +// SCMRepo identifies a repository without assuming a provider-specific URL +// shape. Repo is conventionally "owner/name" for providers that expose an +// owner namespace, while Owner/Name are kept split for provider calls. +type SCMRepo struct { + // Provider is the normalized SCM provider name, e.g. "github". + Provider string + // Host is the SCM host, e.g. "github.com" or a GitHub Enterprise host. + Host string + // Owner is the provider-specific namespace/organization/user. + Owner string + // Name is the repository name without the owner namespace. + Name string + // Repo is the display/stable full repository name, usually "owner/name". + Repo string +} + +// SCMPRRef identifies a pull request within a provider-neutral repository. +type SCMPRRef struct { + // Repo is the normalized repository that owns the pull request. + Repo SCMRepo + // Number is the provider's pull request number within the repository. + Number int + // URL is the canonical browser URL when already known locally. + URL string +} + +// SCMGuardResult is an ETag-style cache guard result. NotModified maps to HTTP +// 304 for providers that support it. +type SCMGuardResult struct { + // ETag is the latest provider cache validator for this guard endpoint. + ETag string + // NotModified is true when the provider reported no change since the ETag. + NotModified bool +} + +// SCMObservation is the provider-neutral pull-request observation emitted by +// the SCM observer and consumed by lifecycle. Provider adapters normalize their +// SCM-specific payloads into this DTO before the observer persists/notifies. +type SCMObservation struct { + // Fetched is true only when the provider refresh succeeded and the nested + // facts are authoritative for this poll. + Fetched bool + // ObservedAt is the observer timestamp for this normalized snapshot. + ObservedAt time.Time + + // Provider is the normalized SCM provider name, e.g. "github". + Provider string + // Host is the SCM host that served this observation. + Host string + // Repo is the full repository name shown to AO users, usually "owner/name". + Repo string + + // PR contains pull-request metadata such as branches, title, state, and diff stats. + PR SCMPRObservation + // CI contains the rolled-up CI state, checks, failing fingerprint, and log tail. + CI SCMCIObservation + // Review contains review decision plus normalized review threads/comments. + Review SCMReviewObservation + // Mergeability contains AO's mergeability verdict and blockers. + Mergeability SCMMergeabilityObservation + + // Changed marks which semantic buckets changed compared with the DB snapshot. + Changed SCMChanged +} + +// SCMChanged marks which semantic state buckets changed in the successful poll. +type SCMChanged struct { + // Metadata is true when PR metadata or mergeability facts changed. + Metadata bool + // CI is true when check/CI facts or failure logs changed. + CI bool + // Review is true when review decision, threads, or comments changed. + Review bool +} + +// SCMPRObservation carries provider-neutral PR metadata. +type SCMPRObservation struct { + // URL is the canonical PR URL used as the persistence key. + URL string + // Number is the provider's PR number in the repository. + Number int + // State is AO's normalized PR state: draft, open, merged, or closed. + State string + // Draft is true when the PR is marked draft/work-in-progress. + Draft bool + // Merged is true when the PR has been merged. + Merged bool + // Closed is true when the PR is closed without being merged. + Closed bool + // SourceBranch is the PR head/source branch name. + SourceBranch string + // TargetBranch is the PR base/target branch name. + TargetBranch string + // HeadSHA is the current head commit SHA for the PR. + HeadSHA string + // Title is the provider PR title. + Title string + // Additions is the provider-reported added line count. + Additions int + // Deletions is the provider-reported deleted line count. + Deletions int + // ChangedFiles is the provider-reported changed file count. + ChangedFiles int + // Author is the provider login/name of the PR author. + Author string + // BaseSHA is the current base branch SHA when the provider supplies it. + BaseSHA string + // MergeCommitSHA is the merge commit SHA when the PR has one. + MergeCommitSHA string + + // ProviderState preserves the provider's raw PR state enum/string. + ProviderState string + // ProviderMergeable preserves the provider's raw mergeable enum/string. + ProviderMergeable string + // ProviderMergeStateStatus preserves provider-specific merge status detail. + ProviderMergeStateStatus string + // HTMLURL is the provider browser URL; it usually matches URL. + HTMLURL string + + // CreatedAtProvider is the provider's PR creation timestamp. + CreatedAtProvider time.Time + // UpdatedAtProvider is the provider's last PR update timestamp. + UpdatedAtProvider time.Time + // MergedAtProvider is the provider's merge timestamp when merged. + MergedAtProvider time.Time + // ClosedAtProvider is the provider's close timestamp when closed. + ClosedAtProvider time.Time +} + +// SCMCIObservation carries aggregate CI state plus failing-check details. +type SCMCIObservation struct { + // Summary is AO's normalized aggregate CI state: unknown, pending, passing, or failing. + Summary string + // HeadSHA is the commit SHA that the check data applies to. + HeadSHA string + // FailedFingerprint is a stable semantic signature of current failing checks. + FailedFingerprint string + // Checks contains all normalized visible check/status contexts. + Checks []SCMCheckObservation + // FailedChecks contains only failing/cancelled checks that may need agent action. + FailedChecks []SCMCheckObservation + // FailureLogTail is the combined tail of newly fetched failed-check logs. + FailureLogTail string +} + +// SCMCheckObservation is one normalized check/status context. ProviderID is an +// optional provider-owned identifier (for GitHub, Actions job/check-run id) used +// by the provider to fetch logs; consumers should not attach meaning to it. +type SCMCheckObservation struct { + // Name is the check run name or commit status context name. + Name string + // Status is AO's normalized check status. + Status string + // Conclusion is the provider conclusion/state string preserved for detail. + Conclusion string + // URL is a provider link to the check/status details. + URL string + // LogTail is the last 20 lines of a failed job log when fetched. + LogTail string + // ProviderID is an opaque provider id used for follow-up provider calls. + ProviderID string +} + +// SCMReviewObservation carries normalized review-decision and review-thread facts. +type SCMReviewObservation struct { + // Decision is AO's normalized review decision. + Decision string + // Threads contains normalized review threads fetched on the slower review cadence. + Threads []SCMReviewThreadObservation + // Partial is true when the provider intentionally fetched and persisted a + // bounded review-thread window instead of a complete PR-lifetime snapshot. + // Consumers should treat Threads as a merge/update set in that case. + Partial bool +} + +// SCMReviewThreadObservation is a normalized review thread with comments. +type SCMReviewThreadObservation struct { + // ID is the provider's stable review thread identifier. + ID string + // Path is the file path the thread is anchored to. + Path string + // Line is the line number the thread is anchored to when supplied. + Line int + // Resolved is true when the provider marks the thread resolved. + Resolved bool + // IsBot is true when the thread's comments are all/primarily bot-authored. + IsBot bool + // Comments contains normalized comments in this review thread. + Comments []SCMReviewCommentObservation +} + +// SCMReviewCommentObservation is one normalized review comment. +type SCMReviewCommentObservation struct { + // ID is the provider's stable review comment identifier. + ID string + // Author is the provider login/name of the commenter. + Author string + // Body is the review comment text. + Body string + // URL is a provider link to the comment. + URL string + // IsBot is true when the provider identifies the author as a bot. + IsBot bool +} + +// SCMMergeabilityObservation is the normalized mergeability verdict. +type SCMMergeabilityObservation struct { + // State is AO's normalized mergeability state. + State string + // Mergeable is true when the PR is currently mergeable. + Mergeable bool + // Conflict is true when merge conflicts block merging. + Conflict bool + // BehindBase is true when the head branch is behind the base branch. + BehindBase bool + // Blockers lists normalized reasons preventing merge. + Blockers []string +} diff --git a/backend/internal/session_manager/manager.go b/backend/internal/session_manager/manager.go index d1cc1007..8648e7a4 100644 --- a/backend/internal/session_manager/manager.go +++ b/backend/internal/session_manager/manager.go @@ -263,7 +263,7 @@ func (m *Manager) Cleanup(ctx context.Context, project domain.ProjectID) ([]doma if err != nil { return nil, fmt.Errorf("cleanup %s: %w", project, err) } - var cleaned []domain.SessionID + cleaned := make([]domain.SessionID, 0, len(recs)) for _, rec := range recs { if !rec.IsTerminated { continue diff --git a/backend/internal/storage/sqlite/gen/models.go b/backend/internal/storage/sqlite/gen/models.go index e65add74..b76bd2b7 100644 --- a/backend/internal/storage/sqlite/gen/models.go +++ b/backend/internal/storage/sqlite/gen/models.go @@ -22,14 +22,44 @@ type ChangeLog struct { } type PR struct { - URL string - SessionID domain.SessionID - Number int64 - PRState domain.PRState - ReviewDecision domain.ReviewDecision - CIState domain.CIState - Mergeability domain.Mergeability - UpdatedAt time.Time + URL string + SessionID domain.SessionID + Number int64 + PRState domain.PRState + ReviewDecision domain.ReviewDecision + CIState domain.CIState + Mergeability domain.Mergeability + UpdatedAt time.Time + Provider string + Host string + Repo string + SourceBranch string + TargetBranch string + HeadSha string + Title string + Additions int64 + Deletions int64 + ChangedFiles int64 + Author string + BaseSha string + MergeCommitSha string + IsDraft int64 + IsMerged int64 + IsClosed int64 + ProviderState string + ProviderMergeable string + ProviderMergeStateStatus string + HtmlURL string + CreatedAtProvider sql.NullTime + UpdatedAtProvider sql.NullTime + MergedAtProvider sql.NullTime + ClosedAtProvider sql.NullTime + MetadataHash string + CIHash string + ReviewHash string + ObservedAt sql.NullTime + CIObservedAt sql.NullTime + ReviewObservedAt sql.NullTime } type PRCheck struct { @@ -40,6 +70,8 @@ type PRCheck struct { URL string LogTail string CreatedAt time.Time + Conclusion string + Details string } type PRComment struct { @@ -51,6 +83,20 @@ type PRComment struct { Body string Resolved bool CreatedAt time.Time + ThreadID string + URL string + IsBot int64 +} + +type PRReviewThread struct { + PRURL string + ThreadID string + Path string + Line int64 + Resolved int64 + IsBot int64 + SemanticHash string + UpdatedAt time.Time } type Project struct { diff --git a/backend/internal/storage/sqlite/gen/pr.sql.go b/backend/internal/storage/sqlite/gen/pr.sql.go index 154885cf..4edc7cd5 100644 --- a/backend/internal/storage/sqlite/gen/pr.sql.go +++ b/backend/internal/storage/sqlite/gen/pr.sql.go @@ -7,6 +7,7 @@ package gen import ( "context" + "database/sql" "time" "github.com/aoagents/agent-orchestrator/backend/internal/domain" @@ -25,6 +26,7 @@ SELECT FROM pr_comment WHERE pr_comment.pr_url = pr.url AND pr_comment.resolved = 0 + AND pr_comment.is_bot = 0 ) AS review_comments FROM pr WHERE pr.session_id = ? @@ -60,7 +62,14 @@ func (q *Queries) GetDisplayPRFactsBySession(ctx context.Context, sessionID doma } const getPR = `-- name: GetPR :one -SELECT url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at +SELECT + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + provider, host, repo, source_branch, target_branch, head_sha, title, + additions, deletions, changed_files, author, base_sha, merge_commit_sha, + is_draft, is_merged, is_closed, + provider_state, provider_mergeable, provider_merge_state_status, html_url, + created_at_provider, updated_at_provider, merged_at_provider, closed_at_provider, + metadata_hash, ci_hash, review_hash, observed_at, ci_observed_at, review_observed_at FROM pr WHERE url = ? ` @@ -77,12 +86,49 @@ func (q *Queries) GetPR(ctx context.Context, url string) (PR, error) { &i.CIState, &i.Mergeability, &i.UpdatedAt, + &i.Provider, + &i.Host, + &i.Repo, + &i.SourceBranch, + &i.TargetBranch, + &i.HeadSha, + &i.Title, + &i.Additions, + &i.Deletions, + &i.ChangedFiles, + &i.Author, + &i.BaseSha, + &i.MergeCommitSha, + &i.IsDraft, + &i.IsMerged, + &i.IsClosed, + &i.ProviderState, + &i.ProviderMergeable, + &i.ProviderMergeStateStatus, + &i.HtmlURL, + &i.CreatedAtProvider, + &i.UpdatedAtProvider, + &i.MergedAtProvider, + &i.ClosedAtProvider, + &i.MetadataHash, + &i.CIHash, + &i.ReviewHash, + &i.ObservedAt, + &i.CIObservedAt, + &i.ReviewObservedAt, ) return i, err } const listPRsBySession = `-- name: ListPRsBySession :many -SELECT url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at +SELECT + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + provider, host, repo, source_branch, target_branch, head_sha, title, + additions, deletions, changed_files, author, base_sha, merge_commit_sha, + is_draft, is_merged, is_closed, + provider_state, provider_mergeable, provider_merge_state_status, html_url, + created_at_provider, updated_at_provider, merged_at_provider, closed_at_provider, + metadata_hash, ci_hash, review_hash, observed_at, ci_observed_at, review_observed_at FROM pr WHERE session_id = ? ORDER BY updated_at DESC @@ -106,6 +152,36 @@ func (q *Queries) ListPRsBySession(ctx context.Context, sessionID domain.Session &i.CIState, &i.Mergeability, &i.UpdatedAt, + &i.Provider, + &i.Host, + &i.Repo, + &i.SourceBranch, + &i.TargetBranch, + &i.HeadSha, + &i.Title, + &i.Additions, + &i.Deletions, + &i.ChangedFiles, + &i.Author, + &i.BaseSha, + &i.MergeCommitSha, + &i.IsDraft, + &i.IsMerged, + &i.IsClosed, + &i.ProviderState, + &i.ProviderMergeable, + &i.ProviderMergeStateStatus, + &i.HtmlURL, + &i.CreatedAtProvider, + &i.UpdatedAtProvider, + &i.MergedAtProvider, + &i.ClosedAtProvider, + &i.MetadataHash, + &i.CIHash, + &i.ReviewHash, + &i.ObservedAt, + &i.CIObservedAt, + &i.ReviewObservedAt, ); err != nil { return nil, err } @@ -120,19 +196,25 @@ func (q *Queries) ListPRsBySession(ctx context.Context, sessionID domain.Session return items, nil } -const upsertPR = `-- name: UpsertPR :exec -INSERT INTO pr (url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at) -VALUES (?, ?, ?, ?, ?, ?, ?, ?) +const upsertLegacyPR = `-- name: UpsertLegacyPR :exec +INSERT INTO pr ( + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + is_draft, is_merged, is_closed +) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT (url) DO UPDATE SET number = excluded.number, pr_state = excluded.pr_state, review_decision = excluded.review_decision, ci_state = excluded.ci_state, mergeability = excluded.mergeability, - updated_at = excluded.updated_at + updated_at = excluded.updated_at, + is_draft = excluded.is_draft, + is_merged = excluded.is_merged, + is_closed = excluded.is_closed ` -type UpsertPRParams struct { +type UpsertLegacyPRParams struct { URL string SessionID domain.SessionID Number int64 @@ -141,6 +223,117 @@ type UpsertPRParams struct { CIState domain.CIState Mergeability domain.Mergeability UpdatedAt time.Time + IsDraft int64 + IsMerged int64 + IsClosed int64 +} + +func (q *Queries) UpsertLegacyPR(ctx context.Context, arg UpsertLegacyPRParams) error { + _, err := q.db.ExecContext(ctx, upsertLegacyPR, + arg.URL, + arg.SessionID, + arg.Number, + arg.PRState, + arg.ReviewDecision, + arg.CIState, + arg.Mergeability, + arg.UpdatedAt, + arg.IsDraft, + arg.IsMerged, + arg.IsClosed, + ) + return err +} + +const upsertPR = `-- name: UpsertPR :exec +INSERT INTO pr ( + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + provider, host, repo, source_branch, target_branch, head_sha, title, + additions, deletions, changed_files, author, base_sha, merge_commit_sha, + is_draft, is_merged, is_closed, + provider_state, provider_mergeable, provider_merge_state_status, html_url, + created_at_provider, updated_at_provider, merged_at_provider, closed_at_provider, + metadata_hash, ci_hash, review_hash, observed_at, ci_observed_at, review_observed_at +) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +ON CONFLICT (url) DO UPDATE SET + number = excluded.number, + pr_state = excluded.pr_state, + review_decision = excluded.review_decision, + ci_state = excluded.ci_state, + mergeability = excluded.mergeability, + updated_at = excluded.updated_at, + provider = excluded.provider, + host = excluded.host, + repo = excluded.repo, + source_branch = excluded.source_branch, + target_branch = excluded.target_branch, + head_sha = excluded.head_sha, + title = excluded.title, + additions = excluded.additions, + deletions = excluded.deletions, + changed_files = excluded.changed_files, + author = excluded.author, + base_sha = excluded.base_sha, + merge_commit_sha = excluded.merge_commit_sha, + is_draft = excluded.is_draft, + is_merged = excluded.is_merged, + is_closed = excluded.is_closed, + provider_state = excluded.provider_state, + provider_mergeable = excluded.provider_mergeable, + provider_merge_state_status = excluded.provider_merge_state_status, + html_url = excluded.html_url, + created_at_provider = excluded.created_at_provider, + updated_at_provider = excluded.updated_at_provider, + merged_at_provider = excluded.merged_at_provider, + closed_at_provider = excluded.closed_at_provider, + metadata_hash = excluded.metadata_hash, + ci_hash = excluded.ci_hash, + review_hash = excluded.review_hash, + observed_at = excluded.observed_at, + ci_observed_at = excluded.ci_observed_at, + review_observed_at = excluded.review_observed_at +` + +type UpsertPRParams struct { + URL string + SessionID domain.SessionID + Number int64 + PRState domain.PRState + ReviewDecision domain.ReviewDecision + CIState domain.CIState + Mergeability domain.Mergeability + UpdatedAt time.Time + Provider string + Host string + Repo string + SourceBranch string + TargetBranch string + HeadSha string + Title string + Additions int64 + Deletions int64 + ChangedFiles int64 + Author string + BaseSha string + MergeCommitSha string + IsDraft int64 + IsMerged int64 + IsClosed int64 + ProviderState string + ProviderMergeable string + ProviderMergeStateStatus string + HtmlURL string + CreatedAtProvider sql.NullTime + UpdatedAtProvider sql.NullTime + MergedAtProvider sql.NullTime + ClosedAtProvider sql.NullTime + MetadataHash string + CIHash string + ReviewHash string + ObservedAt sql.NullTime + CIObservedAt sql.NullTime + ReviewObservedAt sql.NullTime } func (q *Queries) UpsertPR(ctx context.Context, arg UpsertPRParams) error { @@ -153,6 +346,36 @@ func (q *Queries) UpsertPR(ctx context.Context, arg UpsertPRParams) error { arg.CIState, arg.Mergeability, arg.UpdatedAt, + arg.Provider, + arg.Host, + arg.Repo, + arg.SourceBranch, + arg.TargetBranch, + arg.HeadSha, + arg.Title, + arg.Additions, + arg.Deletions, + arg.ChangedFiles, + arg.Author, + arg.BaseSha, + arg.MergeCommitSha, + arg.IsDraft, + arg.IsMerged, + arg.IsClosed, + arg.ProviderState, + arg.ProviderMergeable, + arg.ProviderMergeStateStatus, + arg.HtmlURL, + arg.CreatedAtProvider, + arg.UpdatedAtProvider, + arg.MergedAtProvider, + arg.ClosedAtProvider, + arg.MetadataHash, + arg.CIHash, + arg.ReviewHash, + arg.ObservedAt, + arg.CIObservedAt, + arg.ReviewObservedAt, ) return err } diff --git a/backend/internal/storage/sqlite/gen/pr_checks.sql.go b/backend/internal/storage/sqlite/gen/pr_checks.sql.go index fde21e67..1ee0df2c 100644 --- a/backend/internal/storage/sqlite/gen/pr_checks.sql.go +++ b/backend/internal/storage/sqlite/gen/pr_checks.sql.go @@ -13,7 +13,7 @@ import ( ) const listChecksByPR = `-- name: ListChecksByPR :many -SELECT pr_url, name, commit_hash, status, url, log_tail, created_at +SELECT pr_url, name, commit_hash, status, url, log_tail, created_at, conclusion, details FROM pr_checks WHERE pr_url = ? ORDER BY name, created_at ` @@ -34,6 +34,8 @@ func (q *Queries) ListChecksByPR(ctx context.Context, prUrl string) ([]PRCheck, &i.URL, &i.LogTail, &i.CreatedAt, + &i.Conclusion, + &i.Details, ); err != nil { return nil, err } @@ -49,12 +51,14 @@ func (q *Queries) ListChecksByPR(ctx context.Context, prUrl string) ([]PRCheck, } const upsertPRCheck = `-- name: UpsertPRCheck :exec -INSERT INTO pr_checks (pr_url, name, commit_hash, status, url, log_tail, created_at) -VALUES (?, ?, ?, ?, ?, ?, ?) +INSERT INTO pr_checks (pr_url, name, commit_hash, status, url, log_tail, created_at, conclusion, details) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT (pr_url, name, commit_hash) DO UPDATE SET status = excluded.status, url = excluded.url, - log_tail = excluded.log_tail + log_tail = excluded.log_tail, + conclusion = excluded.conclusion, + details = excluded.details ` type UpsertPRCheckParams struct { @@ -65,6 +69,8 @@ type UpsertPRCheckParams struct { URL string LogTail string CreatedAt time.Time + Conclusion string + Details string } func (q *Queries) UpsertPRCheck(ctx context.Context, arg UpsertPRCheckParams) error { @@ -76,6 +82,8 @@ func (q *Queries) UpsertPRCheck(ctx context.Context, arg UpsertPRCheckParams) er arg.URL, arg.LogTail, arg.CreatedAt, + arg.Conclusion, + arg.Details, ) return err } diff --git a/backend/internal/storage/sqlite/gen/pr_comment.sql.go b/backend/internal/storage/sqlite/gen/pr_comment.sql.go index f69cc17b..a5dba653 100644 --- a/backend/internal/storage/sqlite/gen/pr_comment.sql.go +++ b/backend/internal/storage/sqlite/gen/pr_comment.sql.go @@ -10,6 +10,15 @@ import ( "time" ) +const deleteLegacyPRComments = `-- name: DeleteLegacyPRComments :exec +DELETE FROM pr_comment WHERE pr_url = ? AND thread_id = '' +` + +func (q *Queries) DeleteLegacyPRComments(ctx context.Context, prUrl string) error { + _, err := q.db.ExecContext(ctx, deleteLegacyPRComments, prUrl) + return err +} + const deletePRComments = `-- name: DeletePRComments :exec DELETE FROM pr_comment WHERE pr_url = ? ` @@ -19,9 +28,59 @@ func (q *Queries) DeletePRComments(ctx context.Context, prUrl string) error { return err } +const deletePRCommentsByThread = `-- name: DeletePRCommentsByThread :exec +DELETE FROM pr_comment WHERE pr_url = ? AND thread_id = ? +` + +type DeletePRCommentsByThreadParams struct { + PRURL string + ThreadID string +} + +func (q *Queries) DeletePRCommentsByThread(ctx context.Context, arg DeletePRCommentsByThreadParams) error { + _, err := q.db.ExecContext(ctx, deletePRCommentsByThread, arg.PRURL, arg.ThreadID) + return err +} + +const insertLegacyPRComment = `-- name: InsertLegacyPRComment :exec +INSERT OR IGNORE INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +` + +type InsertLegacyPRCommentParams struct { + PRURL string + CommentID string + Author string + File string + Line int64 + Body string + Resolved bool + CreatedAt time.Time + ThreadID string + URL string + IsBot int64 +} + +func (q *Queries) InsertLegacyPRComment(ctx context.Context, arg InsertLegacyPRCommentParams) error { + _, err := q.db.ExecContext(ctx, insertLegacyPRComment, + arg.PRURL, + arg.CommentID, + arg.Author, + arg.File, + arg.Line, + arg.Body, + arg.Resolved, + arg.CreatedAt, + arg.ThreadID, + arg.URL, + arg.IsBot, + ) + return err +} + const insertPRComment = `-- name: InsertPRComment :exec -INSERT INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at) -VALUES (?, ?, ?, ?, ?, ?, ?, ?) +INSERT INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ` type InsertPRCommentParams struct { @@ -33,6 +92,9 @@ type InsertPRCommentParams struct { Body string Resolved bool CreatedAt time.Time + ThreadID string + URL string + IsBot int64 } func (q *Queries) InsertPRComment(ctx context.Context, arg InsertPRCommentParams) error { @@ -45,12 +107,15 @@ func (q *Queries) InsertPRComment(ctx context.Context, arg InsertPRCommentParams arg.Body, arg.Resolved, arg.CreatedAt, + arg.ThreadID, + arg.URL, + arg.IsBot, ) return err } const listPRComments = `-- name: ListPRComments :many -SELECT pr_url, comment_id, author, file, line, body, resolved, created_at +SELECT pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot FROM pr_comment WHERE pr_url = ? ORDER BY created_at, comment_id ` @@ -72,6 +137,9 @@ func (q *Queries) ListPRComments(ctx context.Context, prUrl string) ([]PRComment &i.Body, &i.Resolved, &i.CreatedAt, + &i.ThreadID, + &i.URL, + &i.IsBot, ); err != nil { return nil, err } diff --git a/backend/internal/storage/sqlite/gen/pr_review_threads.sql.go b/backend/internal/storage/sqlite/gen/pr_review_threads.sql.go new file mode 100644 index 00000000..0f2eda25 --- /dev/null +++ b/backend/internal/storage/sqlite/gen/pr_review_threads.sql.go @@ -0,0 +1,95 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.31.1 +// source: pr_review_threads.sql + +package gen + +import ( + "context" + "time" +) + +const deletePRReviewThreads = `-- name: DeletePRReviewThreads :exec +DELETE FROM pr_review_threads WHERE pr_url = ? +` + +func (q *Queries) DeletePRReviewThreads(ctx context.Context, prUrl string) error { + _, err := q.db.ExecContext(ctx, deletePRReviewThreads, prUrl) + return err +} + +const listPRReviewThreads = `-- name: ListPRReviewThreads :many +SELECT pr_url, thread_id, path, line, resolved, is_bot, semantic_hash, updated_at +FROM pr_review_threads WHERE pr_url = ? ORDER BY updated_at, thread_id +` + +func (q *Queries) ListPRReviewThreads(ctx context.Context, prUrl string) ([]PRReviewThread, error) { + rows, err := q.db.QueryContext(ctx, listPRReviewThreads, prUrl) + if err != nil { + return nil, err + } + defer rows.Close() + items := []PRReviewThread{} + for rows.Next() { + var i PRReviewThread + if err := rows.Scan( + &i.PRURL, + &i.ThreadID, + &i.Path, + &i.Line, + &i.Resolved, + &i.IsBot, + &i.SemanticHash, + &i.UpdatedAt, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const upsertPRReviewThread = `-- name: UpsertPRReviewThread :exec +INSERT INTO pr_review_threads (pr_url, thread_id, path, line, resolved, is_bot, semantic_hash, updated_at) +VALUES (?, ?, ?, ?, ?, ?, ?, ?) +ON CONFLICT (pr_url, thread_id) DO UPDATE SET + path = excluded.path, + line = excluded.line, + resolved = excluded.resolved, + is_bot = excluded.is_bot, + semantic_hash = excluded.semantic_hash, + updated_at = excluded.updated_at +` + +type UpsertPRReviewThreadParams struct { + PRURL string + ThreadID string + Path string + Line int64 + Resolved int64 + IsBot int64 + SemanticHash string + UpdatedAt time.Time +} + +// Summary: SQLC queries for replacing and reading normalized PR review threads. +func (q *Queries) UpsertPRReviewThread(ctx context.Context, arg UpsertPRReviewThreadParams) error { + _, err := q.db.ExecContext(ctx, upsertPRReviewThread, + arg.PRURL, + arg.ThreadID, + arg.Path, + arg.Line, + arg.Resolved, + arg.IsBot, + arg.SemanticHash, + arg.UpdatedAt, + ) + return err +} diff --git a/backend/internal/storage/sqlite/gen/projects.sql.go b/backend/internal/storage/sqlite/gen/projects.sql.go index 89c99d1e..dea720c6 100644 --- a/backend/internal/storage/sqlite/gen/projects.sql.go +++ b/backend/internal/storage/sqlite/gen/projects.sql.go @@ -14,7 +14,7 @@ import ( ) const archiveProject = `-- name: ArchiveProject :execrows -UPDATE projects SET archived_at = ? WHERE id = ? +UPDATE projects SET archived_at = ? WHERE id = ? AND archived_at IS NULL ` type ArchiveProjectParams struct { diff --git a/backend/internal/storage/sqlite/migrations/0004_scm_observer_schema.sql b/backend/internal/storage/sqlite/migrations/0004_scm_observer_schema.sql new file mode 100644 index 00000000..d29dc683 --- /dev/null +++ b/backend/internal/storage/sqlite/migrations/0004_scm_observer_schema.sql @@ -0,0 +1,365 @@ +-- Summary: extend PR persistence for provider-neutral SCM observations, CI/check detail, +-- review-thread storage, and semantic hashes used by the SCM observer. +-- +goose Up +-- +goose StatementBegin +ALTER TABLE pr ADD COLUMN provider TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN host TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN repo TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN source_branch TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN target_branch TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN head_sha TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN title TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN additions INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pr ADD COLUMN deletions INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pr ADD COLUMN changed_files INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pr ADD COLUMN author TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN base_sha TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN merge_commit_sha TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN is_draft INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pr ADD COLUMN is_merged INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pr ADD COLUMN is_closed INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pr ADD COLUMN provider_state TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN provider_mergeable TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN provider_merge_state_status TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN html_url TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN created_at_provider TIMESTAMP; +ALTER TABLE pr ADD COLUMN updated_at_provider TIMESTAMP; +ALTER TABLE pr ADD COLUMN merged_at_provider TIMESTAMP; +ALTER TABLE pr ADD COLUMN closed_at_provider TIMESTAMP; +ALTER TABLE pr ADD COLUMN metadata_hash TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN ci_hash TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN review_hash TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr ADD COLUMN observed_at TIMESTAMP; +ALTER TABLE pr ADD COLUMN ci_observed_at TIMESTAMP; +ALTER TABLE pr ADD COLUMN review_observed_at TIMESTAMP; + +ALTER TABLE pr_checks ADD COLUMN conclusion TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr_checks ADD COLUMN details TEXT NOT NULL DEFAULT ''; + +ALTER TABLE pr_comment ADD COLUMN thread_id TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr_comment ADD COLUMN url TEXT NOT NULL DEFAULT ''; +ALTER TABLE pr_comment ADD COLUMN is_bot INTEGER NOT NULL DEFAULT 0; + +-- Widen change_log.event_type CHECK to include the new pr_review_thread_* events. +-- SQLite cannot ALTER an in-place CHECK constraint. Drop CDC triggers before +-- rebuilding change_log; otherwise dropping the old table invalidates triggers +-- that still reference it. +DROP TRIGGER IF EXISTS sessions_cdc_insert; +DROP TRIGGER IF EXISTS sessions_cdc_update; +DROP TRIGGER IF EXISTS pr_cdc_insert; +DROP TRIGGER IF EXISTS pr_cdc_update; +DROP TRIGGER IF EXISTS pr_checks_cdc_insert; +DROP TRIGGER IF EXISTS pr_checks_cdc_update; + +CREATE TABLE change_log_new ( + seq INTEGER PRIMARY KEY AUTOINCREMENT, + project_id TEXT NOT NULL REFERENCES projects (id), + session_id TEXT REFERENCES sessions (id), + event_type TEXT NOT NULL + CHECK (event_type IN ( + 'session_created', + 'session_updated', + 'pr_created', + 'pr_updated', + 'pr_check_recorded', + 'pr_review_thread_added', + 'pr_review_thread_resolved' + )), + payload TEXT NOT NULL CHECK (json_valid(payload)), + created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')) +); +INSERT INTO change_log_new (seq, project_id, session_id, event_type, payload, created_at) +SELECT seq, project_id, session_id, event_type, payload, created_at FROM change_log; +DROP INDEX IF EXISTS idx_change_log_project; +DROP TABLE change_log; +ALTER TABLE change_log_new RENAME TO change_log; +CREATE INDEX idx_change_log_project ON change_log (project_id, seq); + +CREATE TABLE pr_review_threads ( + pr_url TEXT NOT NULL REFERENCES pr (url) ON DELETE CASCADE, + thread_id TEXT NOT NULL, + path TEXT NOT NULL DEFAULT '', + line INTEGER NOT NULL DEFAULT 0, + resolved INTEGER NOT NULL DEFAULT 0, + is_bot INTEGER NOT NULL DEFAULT 0, + semantic_hash TEXT NOT NULL DEFAULT '', + updated_at TIMESTAMP NOT NULL, + PRIMARY KEY (pr_url, thread_id) +); +CREATE INDEX idx_pr_review_threads_lookup ON pr_review_threads (pr_url, updated_at); +-- +goose StatementEnd + +-- +goose StatementBegin +-- Emit on every new review thread the SCM observer persists, so the broadcaster +-- can stream per-thread additions instead of waiting for a rolled-up review_decision flip. +CREATE TRIGGER pr_review_threads_cdc_insert +AFTER INSERT ON pr_review_threads +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ( + (SELECT s.project_id FROM pr p JOIN sessions s ON s.id = p.session_id WHERE p.url = NEW.pr_url), + (SELECT session_id FROM pr WHERE url = NEW.pr_url), + 'pr_review_thread_added', + json_object( + 'pr', NEW.pr_url, + 'thread', NEW.thread_id, + 'path', NEW.path, + 'line', NEW.line, + 'resolved', json(CASE WHEN NEW.resolved THEN 'true' ELSE 'false' END), + 'isBot', json(CASE WHEN NEW.is_bot THEN 'true' ELSE 'false' END) + ), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +-- Emit only on resolved <-> unresolved transitions. Other thread mutations +-- (semantic_hash refresh, line shifts) are captured by the slower review-decision +-- rollup so we don't flood CDC with no-op semantic-hash updates. +CREATE TRIGGER pr_review_threads_cdc_update +AFTER UPDATE ON pr_review_threads +WHEN OLD.resolved <> NEW.resolved +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ( + (SELECT s.project_id FROM pr p JOIN sessions s ON s.id = p.session_id WHERE p.url = NEW.pr_url), + (SELECT session_id FROM pr WHERE url = NEW.pr_url), + 'pr_review_thread_resolved', + json_object( + 'pr', NEW.pr_url, + 'thread', NEW.thread_id, + 'path', NEW.path, + 'line', NEW.line, + 'resolved', json(CASE WHEN NEW.resolved THEN 'true' ELSE 'false' END) + ), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER sessions_cdc_insert +AFTER INSERT ON sessions +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES (NEW.project_id, NEW.id, 'session_created', + json_object('id', NEW.id, 'activity', NEW.activity_state, 'isTerminated', json(CASE WHEN NEW.is_terminated THEN 'true' ELSE 'false' END)), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER sessions_cdc_update +AFTER UPDATE ON sessions +WHEN OLD.activity_state <> NEW.activity_state + OR OLD.is_terminated <> NEW.is_terminated +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES (NEW.project_id, NEW.id, 'session_updated', + json_object('id', NEW.id, 'activity', NEW.activity_state, 'isTerminated', json(CASE WHEN NEW.is_terminated THEN 'true' ELSE 'false' END)), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_cdc_insert +AFTER INSERT ON pr +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ((SELECT project_id FROM sessions WHERE id = NEW.session_id), NEW.session_id, 'pr_created', + json_object('url', NEW.url, 'session', NEW.session_id, 'state', NEW.pr_state, + 'ci', NEW.ci_state, 'review', NEW.review_decision, 'mergeability', NEW.mergeability), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_cdc_update +AFTER UPDATE ON pr +WHEN OLD.pr_state <> NEW.pr_state + OR OLD.ci_state <> NEW.ci_state + OR OLD.review_decision <> NEW.review_decision + OR OLD.mergeability <> NEW.mergeability +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ((SELECT project_id FROM sessions WHERE id = NEW.session_id), NEW.session_id, 'pr_updated', + json_object('url', NEW.url, 'session', NEW.session_id, 'state', NEW.pr_state, + 'ci', NEW.ci_state, 'review', NEW.review_decision, 'mergeability', NEW.mergeability), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_checks_cdc_insert +AFTER INSERT ON pr_checks +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ( + (SELECT s.project_id FROM pr p JOIN sessions s ON s.id = p.session_id WHERE p.url = NEW.pr_url), + (SELECT session_id FROM pr WHERE url = NEW.pr_url), + 'pr_check_recorded', + json_object('pr', NEW.pr_url, 'name', NEW.name, 'commit', NEW.commit_hash, 'status', NEW.status), + NEW.created_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_checks_cdc_update +AFTER UPDATE ON pr_checks +WHEN OLD.status <> NEW.status +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ( + (SELECT s.project_id FROM pr p JOIN sessions s ON s.id = p.session_id WHERE p.url = NEW.pr_url), + (SELECT session_id FROM pr WHERE url = NEW.pr_url), + 'pr_check_recorded', + json_object('pr', NEW.pr_url, 'name', NEW.name, 'commit', NEW.commit_hash, 'status', NEW.status), + datetime('now')); +END; +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +DROP TRIGGER IF EXISTS pr_review_threads_cdc_update; +DROP TRIGGER IF EXISTS pr_review_threads_cdc_insert; +DROP TRIGGER IF EXISTS sessions_cdc_insert; +DROP TRIGGER IF EXISTS sessions_cdc_update; +DROP TRIGGER IF EXISTS pr_cdc_insert; +DROP TRIGGER IF EXISTS pr_cdc_update; +DROP TRIGGER IF EXISTS pr_checks_cdc_insert; +DROP TRIGGER IF EXISTS pr_checks_cdc_update; + +CREATE TABLE change_log_old ( + seq INTEGER PRIMARY KEY AUTOINCREMENT, + project_id TEXT NOT NULL REFERENCES projects (id), + session_id TEXT REFERENCES sessions (id), + event_type TEXT NOT NULL + CHECK (event_type IN ('session_created', 'session_updated', 'pr_created', 'pr_updated', 'pr_check_recorded')), + payload TEXT NOT NULL CHECK (json_valid(payload)), + created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')) +); +INSERT INTO change_log_old (seq, project_id, session_id, event_type, payload, created_at) +SELECT seq, project_id, session_id, event_type, payload, created_at FROM change_log +WHERE event_type IN ('session_created', 'session_updated', 'pr_created', 'pr_updated', 'pr_check_recorded'); +DROP INDEX IF EXISTS idx_change_log_project; +DROP TABLE change_log; +ALTER TABLE change_log_old RENAME TO change_log; +CREATE INDEX idx_change_log_project ON change_log (project_id, seq); + +DROP TABLE pr_review_threads; +ALTER TABLE pr_comment DROP COLUMN is_bot; +ALTER TABLE pr_comment DROP COLUMN url; +ALTER TABLE pr_comment DROP COLUMN thread_id; +ALTER TABLE pr_checks DROP COLUMN details; +ALTER TABLE pr_checks DROP COLUMN conclusion; +ALTER TABLE pr DROP COLUMN review_observed_at; +ALTER TABLE pr DROP COLUMN ci_observed_at; +ALTER TABLE pr DROP COLUMN observed_at; +ALTER TABLE pr DROP COLUMN review_hash; +ALTER TABLE pr DROP COLUMN ci_hash; +ALTER TABLE pr DROP COLUMN metadata_hash; +ALTER TABLE pr DROP COLUMN closed_at_provider; +ALTER TABLE pr DROP COLUMN merged_at_provider; +ALTER TABLE pr DROP COLUMN updated_at_provider; +ALTER TABLE pr DROP COLUMN created_at_provider; +ALTER TABLE pr DROP COLUMN html_url; +ALTER TABLE pr DROP COLUMN provider_merge_state_status; +ALTER TABLE pr DROP COLUMN provider_mergeable; +ALTER TABLE pr DROP COLUMN provider_state; +ALTER TABLE pr DROP COLUMN is_closed; +ALTER TABLE pr DROP COLUMN is_merged; +ALTER TABLE pr DROP COLUMN is_draft; +ALTER TABLE pr DROP COLUMN merge_commit_sha; +ALTER TABLE pr DROP COLUMN base_sha; +ALTER TABLE pr DROP COLUMN author; +ALTER TABLE pr DROP COLUMN changed_files; +ALTER TABLE pr DROP COLUMN deletions; +ALTER TABLE pr DROP COLUMN additions; +ALTER TABLE pr DROP COLUMN title; +ALTER TABLE pr DROP COLUMN head_sha; +ALTER TABLE pr DROP COLUMN target_branch; +ALTER TABLE pr DROP COLUMN source_branch; +ALTER TABLE pr DROP COLUMN repo; +ALTER TABLE pr DROP COLUMN host; +ALTER TABLE pr DROP COLUMN provider; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER sessions_cdc_insert +AFTER INSERT ON sessions +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES (NEW.project_id, NEW.id, 'session_created', + json_object('id', NEW.id, 'activity', NEW.activity_state, 'isTerminated', json(CASE WHEN NEW.is_terminated THEN 'true' ELSE 'false' END)), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER sessions_cdc_update +AFTER UPDATE ON sessions +WHEN OLD.activity_state <> NEW.activity_state + OR OLD.is_terminated <> NEW.is_terminated +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES (NEW.project_id, NEW.id, 'session_updated', + json_object('id', NEW.id, 'activity', NEW.activity_state, 'isTerminated', json(CASE WHEN NEW.is_terminated THEN 'true' ELSE 'false' END)), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_cdc_insert +AFTER INSERT ON pr +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ((SELECT project_id FROM sessions WHERE id = NEW.session_id), NEW.session_id, 'pr_created', + json_object('url', NEW.url, 'session', NEW.session_id, 'state', NEW.pr_state, + 'ci', NEW.ci_state, 'review', NEW.review_decision, 'mergeability', NEW.mergeability), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_cdc_update +AFTER UPDATE ON pr +WHEN OLD.pr_state <> NEW.pr_state + OR OLD.ci_state <> NEW.ci_state + OR OLD.review_decision <> NEW.review_decision + OR OLD.mergeability <> NEW.mergeability +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ((SELECT project_id FROM sessions WHERE id = NEW.session_id), NEW.session_id, 'pr_updated', + json_object('url', NEW.url, 'session', NEW.session_id, 'state', NEW.pr_state, + 'ci', NEW.ci_state, 'review', NEW.review_decision, 'mergeability', NEW.mergeability), + NEW.updated_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_checks_cdc_insert +AFTER INSERT ON pr_checks +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ( + (SELECT s.project_id FROM pr p JOIN sessions s ON s.id = p.session_id WHERE p.url = NEW.pr_url), + (SELECT session_id FROM pr WHERE url = NEW.pr_url), + 'pr_check_recorded', + json_object('pr', NEW.pr_url, 'name', NEW.name, 'commit', NEW.commit_hash, 'status', NEW.status), + NEW.created_at); +END; +-- +goose StatementEnd + +-- +goose StatementBegin +CREATE TRIGGER pr_checks_cdc_update +AFTER UPDATE ON pr_checks +WHEN OLD.status <> NEW.status +BEGIN + INSERT INTO change_log (project_id, session_id, event_type, payload, created_at) + VALUES ( + (SELECT s.project_id FROM pr p JOIN sessions s ON s.id = p.session_id WHERE p.url = NEW.pr_url), + (SELECT session_id FROM pr WHERE url = NEW.pr_url), + 'pr_check_recorded', + json_object('pr', NEW.pr_url, 'name', NEW.name, 'commit', NEW.commit_hash, 'status', NEW.status), + datetime('now')); +END; +-- +goose StatementEnd diff --git a/backend/internal/storage/sqlite/queries/pr.sql b/backend/internal/storage/sqlite/queries/pr.sql index 508eddd4..9eacbb3f 100644 --- a/backend/internal/storage/sqlite/queries/pr.sql +++ b/backend/internal/storage/sqlite/queries/pr.sql @@ -1,26 +1,94 @@ -- name: UpsertPR :exec -INSERT INTO pr (url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at) -VALUES (?, ?, ?, ?, ?, ?, ?, ?) +INSERT INTO pr ( + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + provider, host, repo, source_branch, target_branch, head_sha, title, + additions, deletions, changed_files, author, base_sha, merge_commit_sha, + is_draft, is_merged, is_closed, + provider_state, provider_mergeable, provider_merge_state_status, html_url, + created_at_provider, updated_at_provider, merged_at_provider, closed_at_provider, + metadata_hash, ci_hash, review_hash, observed_at, ci_observed_at, review_observed_at +) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT (url) DO UPDATE SET number = excluded.number, pr_state = excluded.pr_state, review_decision = excluded.review_decision, ci_state = excluded.ci_state, mergeability = excluded.mergeability, - updated_at = excluded.updated_at; + updated_at = excluded.updated_at, + provider = excluded.provider, + host = excluded.host, + repo = excluded.repo, + source_branch = excluded.source_branch, + target_branch = excluded.target_branch, + head_sha = excluded.head_sha, + title = excluded.title, + additions = excluded.additions, + deletions = excluded.deletions, + changed_files = excluded.changed_files, + author = excluded.author, + base_sha = excluded.base_sha, + merge_commit_sha = excluded.merge_commit_sha, + is_draft = excluded.is_draft, + is_merged = excluded.is_merged, + is_closed = excluded.is_closed, + provider_state = excluded.provider_state, + provider_mergeable = excluded.provider_mergeable, + provider_merge_state_status = excluded.provider_merge_state_status, + html_url = excluded.html_url, + created_at_provider = excluded.created_at_provider, + updated_at_provider = excluded.updated_at_provider, + merged_at_provider = excluded.merged_at_provider, + closed_at_provider = excluded.closed_at_provider, + metadata_hash = excluded.metadata_hash, + ci_hash = excluded.ci_hash, + review_hash = excluded.review_hash, + observed_at = excluded.observed_at, + ci_observed_at = excluded.ci_observed_at, + review_observed_at = excluded.review_observed_at; + +-- name: UpsertLegacyPR :exec +INSERT INTO pr ( + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + is_draft, is_merged, is_closed +) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +ON CONFLICT (url) DO UPDATE SET + number = excluded.number, + pr_state = excluded.pr_state, + review_decision = excluded.review_decision, + ci_state = excluded.ci_state, + mergeability = excluded.mergeability, + updated_at = excluded.updated_at, + is_draft = excluded.is_draft, + is_merged = excluded.is_merged, + is_closed = excluded.is_closed; -- name: GetPR :one -SELECT url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at +SELECT + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + provider, host, repo, source_branch, target_branch, head_sha, title, + additions, deletions, changed_files, author, base_sha, merge_commit_sha, + is_draft, is_merged, is_closed, + provider_state, provider_mergeable, provider_merge_state_status, html_url, + created_at_provider, updated_at_provider, merged_at_provider, closed_at_provider, + metadata_hash, ci_hash, review_hash, observed_at, ci_observed_at, review_observed_at FROM pr WHERE url = ?; -- name: ListPRsBySession :many -SELECT url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at +SELECT + url, session_id, number, pr_state, review_decision, ci_state, mergeability, updated_at, + provider, host, repo, source_branch, target_branch, head_sha, title, + additions, deletions, changed_files, author, base_sha, merge_commit_sha, + is_draft, is_merged, is_closed, + provider_state, provider_mergeable, provider_merge_state_status, html_url, + created_at_provider, updated_at_provider, merged_at_provider, closed_at_provider, + metadata_hash, ci_hash, review_hash, observed_at, ci_observed_at, review_observed_at FROM pr WHERE session_id = ? ORDER BY updated_at DESC; - -- name: GetDisplayPRFactsBySession :one SELECT pr.url, @@ -34,6 +102,7 @@ SELECT FROM pr_comment WHERE pr_comment.pr_url = pr.url AND pr_comment.resolved = 0 + AND pr_comment.is_bot = 0 ) AS review_comments FROM pr WHERE pr.session_id = ? diff --git a/backend/internal/storage/sqlite/queries/pr_checks.sql b/backend/internal/storage/sqlite/queries/pr_checks.sql index 2e223729..e0d42946 100644 --- a/backend/internal/storage/sqlite/queries/pr_checks.sql +++ b/backend/internal/storage/sqlite/queries/pr_checks.sql @@ -1,11 +1,13 @@ -- name: UpsertPRCheck :exec -INSERT INTO pr_checks (pr_url, name, commit_hash, status, url, log_tail, created_at) -VALUES (?, ?, ?, ?, ?, ?, ?) +INSERT INTO pr_checks (pr_url, name, commit_hash, status, url, log_tail, created_at, conclusion, details) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT (pr_url, name, commit_hash) DO UPDATE SET status = excluded.status, url = excluded.url, - log_tail = excluded.log_tail; + log_tail = excluded.log_tail, + conclusion = excluded.conclusion, + details = excluded.details; -- name: ListChecksByPR :many -SELECT pr_url, name, commit_hash, status, url, log_tail, created_at +SELECT pr_url, name, commit_hash, status, url, log_tail, created_at, conclusion, details FROM pr_checks WHERE pr_url = ? ORDER BY name, created_at; diff --git a/backend/internal/storage/sqlite/queries/pr_comment.sql b/backend/internal/storage/sqlite/queries/pr_comment.sql index 870a87d7..9a22cd1c 100644 --- a/backend/internal/storage/sqlite/queries/pr_comment.sql +++ b/backend/internal/storage/sqlite/queries/pr_comment.sql @@ -1,10 +1,20 @@ -- name: InsertPRComment :exec -INSERT INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at) -VALUES (?, ?, ?, ?, ?, ?, ?, ?); +INSERT INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); + +-- name: InsertLegacyPRComment :exec +INSERT OR IGNORE INTO pr_comment (pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); -- name: DeletePRComments :exec DELETE FROM pr_comment WHERE pr_url = ?; +-- name: DeleteLegacyPRComments :exec +DELETE FROM pr_comment WHERE pr_url = ? AND thread_id = ''; + +-- name: DeletePRCommentsByThread :exec +DELETE FROM pr_comment WHERE pr_url = ? AND thread_id = ?; + -- name: ListPRComments :many -SELECT pr_url, comment_id, author, file, line, body, resolved, created_at +SELECT pr_url, comment_id, author, file, line, body, resolved, created_at, thread_id, url, is_bot FROM pr_comment WHERE pr_url = ? ORDER BY created_at, comment_id; diff --git a/backend/internal/storage/sqlite/queries/pr_review_threads.sql b/backend/internal/storage/sqlite/queries/pr_review_threads.sql new file mode 100644 index 00000000..19f54481 --- /dev/null +++ b/backend/internal/storage/sqlite/queries/pr_review_threads.sql @@ -0,0 +1,18 @@ +-- Summary: SQLC queries for replacing and reading normalized PR review threads. +-- name: UpsertPRReviewThread :exec +INSERT INTO pr_review_threads (pr_url, thread_id, path, line, resolved, is_bot, semantic_hash, updated_at) +VALUES (?, ?, ?, ?, ?, ?, ?, ?) +ON CONFLICT (pr_url, thread_id) DO UPDATE SET + path = excluded.path, + line = excluded.line, + resolved = excluded.resolved, + is_bot = excluded.is_bot, + semantic_hash = excluded.semantic_hash, + updated_at = excluded.updated_at; + +-- name: DeletePRReviewThreads :exec +DELETE FROM pr_review_threads WHERE pr_url = ?; + +-- name: ListPRReviewThreads :many +SELECT pr_url, thread_id, path, line, resolved, is_bot, semantic_hash, updated_at +FROM pr_review_threads WHERE pr_url = ? ORDER BY updated_at, thread_id; diff --git a/backend/internal/storage/sqlite/store/pr_cdc_test.go b/backend/internal/storage/sqlite/store/pr_cdc_test.go index 82f53b75..971dc00a 100644 --- a/backend/internal/storage/sqlite/store/pr_cdc_test.go +++ b/backend/internal/storage/sqlite/store/pr_cdc_test.go @@ -2,12 +2,14 @@ package store_test import ( "context" + "encoding/json" "strings" "testing" "time" "github.com/aoagents/agent-orchestrator/backend/internal/cdc" "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" ) // A check can change status on the same commit (in_progress -> failed) via @@ -50,6 +52,69 @@ func TestPRChecksCDC_EmitsOnInsertAndStatusUpdate(t *testing.T) { } } +func TestPRReviewThreadsCDC_EmitsOnInsertAndResolvedTransition(t *testing.T) { + s := newTestStore(t) + ctx := context.Background() + seedProject(t, s, "mer") + rec, err := s.CreateSession(ctx, sampleRecord("mer")) + if err != nil { + t.Fatal(err) + } + now := time.Now().UTC().Truncate(time.Second) + pr := domain.PullRequest{URL: "https://example/pr/9", SessionID: rec.ID, Number: 9, UpdatedAt: now} + + if err := s.WriteSCMObservation(ctx, pr, nil, []domain.PullRequestReviewThread{{ + ThreadID: "t1", Path: "main.go", Line: 7, IsBot: true, SemanticHash: "v1", UpdatedAt: now, + }}, nil, ports.ReviewWriteReplace); err != nil { + t.Fatal(err) + } + if err := s.WriteSCMObservation(ctx, pr, nil, []domain.PullRequestReviewThread{{ + ThreadID: "t1", Path: "main.go", Line: 8, Resolved: true, IsBot: true, SemanticHash: "v2", UpdatedAt: now.Add(time.Second), + }}, nil, ports.ReviewWriteMerge); err != nil { + t.Fatal(err) + } + if err := s.WriteSCMObservation(ctx, pr, nil, []domain.PullRequestReviewThread{{ + ThreadID: "t1", Path: "main.go", Line: 9, Resolved: true, IsBot: true, SemanticHash: "v3", UpdatedAt: now.Add(2 * time.Second), + }}, nil, ports.ReviewWriteMerge); err != nil { + t.Fatal(err) + } + + rows, err := s.EventsAfter(ctx, 0, 100) + if err != nil { + t.Fatal(err) + } + var added, resolved []cdc.Event + for _, r := range rows { + switch r.Type { + case cdc.EventPRReviewThreadAdded: + added = append(added, r) + case cdc.EventPRReviewThreadResolved: + resolved = append(resolved, r) + } + } + if len(added) != 1 { + t.Fatalf("want 1 review-thread added CDC event, got %d", len(added)) + } + if len(resolved) != 1 { + t.Fatalf("want 1 review-thread resolved CDC event (resolved transition only), got %d", len(resolved)) + } + + var addPayload map[string]any + if err := json.Unmarshal(added[0].Payload, &addPayload); err != nil { + t.Fatalf("added payload JSON: %v", err) + } + if addPayload["thread"] != "t1" || addPayload["isBot"] != true || addPayload["resolved"] != false { + t.Fatalf("added payload = %#v", addPayload) + } + var resolvedPayload map[string]any + if err := json.Unmarshal(resolved[0].Payload, &resolvedPayload); err != nil { + t.Fatalf("resolved payload JSON: %v", err) + } + if resolvedPayload["thread"] != "t1" || resolvedPayload["line"] != float64(8) || resolvedPayload["resolved"] != true { + t.Fatalf("resolved payload = %#v", resolvedPayload) + } +} + // WritePR persists scalar facts, checks, and comments in one tx; all three // should be queryable afterward. func TestWritePR_PersistsScalarsChecksAndComments(t *testing.T) { diff --git a/backend/internal/storage/sqlite/store/pr_store.go b/backend/internal/storage/sqlite/store/pr_store.go index 0f609f7b..be664912 100644 --- a/backend/internal/storage/sqlite/store/pr_store.go +++ b/backend/internal/storage/sqlite/store/pr_store.go @@ -5,6 +5,7 @@ import ( "database/sql" "errors" "fmt" + "time" "github.com/aoagents/agent-orchestrator/backend/internal/domain" "github.com/aoagents/agent-orchestrator/backend/internal/ports" @@ -22,15 +23,31 @@ import ( // drift between either interface and this implementation fails here at the point // of definition rather than later at the call sites in lifecycle_wiring / tests. var ( - _ ports.PRWriter = (*Store)(nil) + _ ports.PRWriter = (*Store)(nil) + _ ports.SCMWriter = (*Store)(nil) ) -// WritePR persists a full PR observation — scalar facts, check runs, and the +// WritePR persists a legacy PR observation — scalar facts, check runs, and the // replacement comment set — in one write transaction, so the rows and the // change_log events their triggers emit are committed all-or-nothing. The scalar // PR upsert runs first so the checks'/comments' CDC triggers can resolve the -// session id from the pr row within the same transaction. +// session id from the pr row within the same transaction. It intentionally does +// not touch pr_review_threads: those rows are owned by WriteSCMObservation's +// slower review-thread refresh path. func (s *Store) WritePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, comments []domain.PullRequestComment) error { + return s.writePR(ctx, pr, checks, nil, comments, ports.ReviewWritePreserve, true) +} + +// WriteSCMObservation persists a provider-neutral SCM observation in one write +// transaction. It upserts the full PR metadata row and CI checks. Review threads +// and comments are preserved, replaced, or merged according to reviewMode +// because review polling runs at a slower and sometimes intentionally bounded +// cadence than metadata/CI polling. +func (s *Store) WriteSCMObservation(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, reviewMode ports.ReviewWriteMode) error { + return s.writePR(ctx, pr, checks, threads, comments, reviewMode, false) +} + +func (s *Store) writePR(ctx context.Context, pr domain.PullRequest, checks []domain.PullRequestCheck, threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment, reviewMode ports.ReviewWriteMode, replaceLegacyComments bool) error { s.writeMu.Lock() defer s.writeMu.Unlock() return s.inTx(ctx, "write pr observation", func(q *gen.Queries) error { @@ -41,26 +58,85 @@ func (s *Store) WritePR(ctx context.Context, pr domain.PullRequest, checks []dom if err == nil && existing.SessionID != pr.SessionID { return fmt.Errorf("pr %s already belongs to session %s", pr.URL, existing.SessionID) } - if err := q.UpsertPR(ctx, genPRParams(pr)); err != nil { - return err + if replaceLegacyComments { + if err := q.UpsertLegacyPR(ctx, genLegacyPRParams(pr)); err != nil { + return err + } + } else { + if err := q.UpsertPR(ctx, genPRParams(pr)); err != nil { + return err + } } for _, c := range checks { if err := q.UpsertPRCheck(ctx, genCheckParams(pr.URL, c)); err != nil { return err } } - if err := q.DeletePRComments(ctx, pr.URL); err != nil { - return err + if reviewMode == ports.ReviewWriteReplace { + if err := q.DeletePRReviewThreads(ctx, pr.URL); err != nil { + return err + } } - for _, c := range comments { - if err := q.InsertPRComment(ctx, genCommentParams(pr.URL, c)); err != nil { - return fmt.Errorf("comment %q: %w", c.ID, err) + if reviewMode == ports.ReviewWriteReplace { + if err := q.DeletePRComments(ctx, pr.URL); err != nil { + return err + } + } else if replaceLegacyComments { + if err := q.DeleteLegacyPRComments(ctx, pr.URL); err != nil { + return err + } + } + if reviewMode == ports.ReviewWriteReplace || reviewMode == ports.ReviewWriteMerge { + for _, th := range threads { + if err := q.UpsertPRReviewThread(ctx, genReviewThreadParams(pr.URL, th)); err != nil { + return fmt.Errorf("review thread %q: %w", th.ThreadID, err) + } + } + } + if reviewMode == ports.ReviewWriteMerge { + for _, threadID := range reviewThreadIDs(threads, comments) { + if err := q.DeletePRCommentsByThread(ctx, gen.DeletePRCommentsByThreadParams{PRURL: pr.URL, ThreadID: threadID}); err != nil { + return fmt.Errorf("delete comments for review thread %q: %w", threadID, err) + } + } + } + if reviewMode == ports.ReviewWriteReplace || reviewMode == ports.ReviewWriteMerge { + for _, c := range comments { + if err := q.InsertPRComment(ctx, genCommentParams(pr.URL, c)); err != nil { + return fmt.Errorf("comment %q: %w", c.ID, err) + } + } + } else if replaceLegacyComments { + for _, c := range comments { + if err := q.InsertLegacyPRComment(ctx, genLegacyCommentParams(pr.URL, c)); err != nil { + return fmt.Errorf("legacy comment %q: %w", c.ID, err) + } } } return nil }) } +func reviewThreadIDs(threads []domain.PullRequestReviewThread, comments []domain.PullRequestComment) []string { + seen := map[string]bool{} + out := make([]string, 0, len(threads)) + for _, th := range threads { + if th.ThreadID == "" || seen[th.ThreadID] { + continue + } + seen[th.ThreadID] = true + out = append(out, th.ThreadID) + } + for _, c := range comments { + if c.ThreadID == "" || seen[c.ThreadID] { + continue + } + seen[c.ThreadID] = true + out = append(out, c.ThreadID) + } + return out +} + // GetPR returns the PR facts for a URL, or ok=false if absent. func (s *Store) GetPR(ctx context.Context, url string) (domain.PullRequest, bool, error) { p, err := s.qr.GetPR(ctx, url) @@ -112,6 +188,19 @@ func (s *Store) ListPRComments(ctx context.Context, prURL string) ([]domain.Pull return out, nil } +// ListPRReviewThreads returns a PR's review threads, oldest first. +func (s *Store) ListPRReviewThreads(ctx context.Context, prURL string) ([]domain.PullRequestReviewThread, error) { + rows, err := s.qr.ListPRReviewThreads(ctx, prURL) + if err != nil { + return nil, fmt.Errorf("list pr review threads %s: %w", prURL, err) + } + out := make([]domain.PullRequestReviewThread, 0, len(rows)) + for _, th := range rows { + out = append(out, reviewThreadFromGen(th)) + } + return out, nil +} + // ---- domain <-> gen mapping ---- // prState collapses the PR's bools into the single pr.state column value. @@ -130,6 +219,49 @@ func prState(r domain.PullRequest) domain.PRState { func genPRParams(r domain.PullRequest) gen.UpsertPRParams { return gen.UpsertPRParams{ + URL: r.URL, + SessionID: r.SessionID, + Number: int64(r.Number), + PRState: prState(r), + ReviewDecision: reviewOrDefault(r.Review), + CIState: ciOrDefault(r.CI), + Mergeability: mergeabilityOrDefault(r.Mergeability), + UpdatedAt: r.UpdatedAt, + Provider: r.Provider, + Host: r.Host, + Repo: r.Repo, + SourceBranch: r.SourceBranch, + TargetBranch: r.TargetBranch, + HeadSha: r.HeadSHA, + Title: r.Title, + Additions: int64(r.Additions), + Deletions: int64(r.Deletions), + ChangedFiles: int64(r.ChangedFiles), + Author: r.Author, + BaseSha: r.BaseSHA, + MergeCommitSha: r.MergeCommitSHA, + IsDraft: boolInt(r.Draft), + IsMerged: boolInt(r.Merged), + IsClosed: boolInt(r.Closed), + ProviderState: r.ProviderState, + ProviderMergeable: r.ProviderMergeable, + ProviderMergeStateStatus: r.ProviderMergeStateStatus, + HtmlURL: r.HTMLURL, + CreatedAtProvider: nullTime(r.CreatedAtProvider), + UpdatedAtProvider: nullTime(r.UpdatedAtProvider), + MergedAtProvider: nullTime(r.MergedAtProvider), + ClosedAtProvider: nullTime(r.ClosedAtProvider), + MetadataHash: r.MetadataHash, + CIHash: r.CIHash, + ReviewHash: r.ReviewHash, + ObservedAt: nullTime(r.ObservedAt), + CIObservedAt: nullTime(r.CIObservedAt), + ReviewObservedAt: nullTime(r.ReviewObservedAt), + } +} + +func genLegacyPRParams(r domain.PullRequest) gen.UpsertLegacyPRParams { + return gen.UpsertLegacyPRParams{ URL: r.URL, SessionID: r.SessionID, Number: int64(r.Number), @@ -138,6 +270,9 @@ func genPRParams(r domain.PullRequest) gen.UpsertPRParams { CIState: ciOrDefault(r.CI), Mergeability: mergeabilityOrDefault(r.Mergeability), UpdatedAt: r.UpdatedAt, + IsDraft: boolInt(r.Draft), + IsMerged: boolInt(r.Merged), + IsClosed: boolInt(r.Closed), } } @@ -164,16 +299,43 @@ func mergeabilityOrDefault(v domain.Mergeability) domain.Mergeability { func prRowFromGen(p gen.PR) domain.PullRequest { return domain.PullRequest{ - URL: p.URL, - SessionID: p.SessionID, - Number: int(p.Number), - Draft: p.PRState == domain.PRStateDraft, - Merged: p.PRState == domain.PRStateMerged, - Closed: p.PRState == domain.PRStateClosed, - CI: p.CIState, - Review: p.ReviewDecision, - Mergeability: p.Mergeability, - UpdatedAt: p.UpdatedAt, + URL: p.URL, + SessionID: p.SessionID, + Number: int(p.Number), + Draft: p.PRState == domain.PRStateDraft || p.IsDraft != 0, + Merged: p.PRState == domain.PRStateMerged || p.IsMerged != 0, + Closed: p.PRState == domain.PRStateClosed || p.IsClosed != 0, + CI: p.CIState, + Review: p.ReviewDecision, + Mergeability: p.Mergeability, + UpdatedAt: p.UpdatedAt, + Provider: p.Provider, + Host: p.Host, + Repo: p.Repo, + SourceBranch: p.SourceBranch, + TargetBranch: p.TargetBranch, + HeadSHA: p.HeadSha, + Title: p.Title, + Additions: int(p.Additions), + Deletions: int(p.Deletions), + ChangedFiles: int(p.ChangedFiles), + Author: p.Author, + BaseSHA: p.BaseSha, + MergeCommitSHA: p.MergeCommitSha, + ProviderState: p.ProviderState, + ProviderMergeable: p.ProviderMergeable, + ProviderMergeStateStatus: p.ProviderMergeStateStatus, + HTMLURL: p.HtmlURL, + CreatedAtProvider: timeFromNull(p.CreatedAtProvider), + UpdatedAtProvider: timeFromNull(p.UpdatedAtProvider), + MergedAtProvider: timeFromNull(p.MergedAtProvider), + ClosedAtProvider: timeFromNull(p.ClosedAtProvider), + MetadataHash: p.MetadataHash, + CIHash: p.CIHash, + ReviewHash: p.ReviewHash, + ObservedAt: timeFromNull(p.ObservedAt), + CIObservedAt: timeFromNull(p.CIObservedAt), + ReviewObservedAt: timeFromNull(p.ReviewObservedAt), } } @@ -185,13 +347,15 @@ func genCheckParams(prURL string, c domain.PullRequestCheck) gen.UpsertPRCheckPa return gen.UpsertPRCheckParams{ PRURL: prURL, Name: c.Name, CommitHash: c.CommitHash, Status: status, URL: c.URL, LogTail: c.LogTail, CreatedAt: c.CreatedAt, + Conclusion: c.Conclusion, Details: c.Details, } } func checkRowFromGen(c gen.PRCheck) domain.PullRequestCheck { return domain.PullRequestCheck{ Name: c.Name, CommitHash: c.CommitHash, Status: c.Status, - URL: c.URL, LogTail: c.LogTail, CreatedAt: c.CreatedAt, + Conclusion: c.Conclusion, URL: c.URL, Details: c.Details, + LogTail: c.LogTail, CreatedAt: c.CreatedAt, } } @@ -199,12 +363,53 @@ func genCommentParams(prURL string, c domain.PullRequestComment) gen.InsertPRCom return gen.InsertPRCommentParams{ PRURL: prURL, CommentID: c.ID, Author: c.Author, File: c.File, Line: int64(c.Line), Body: c.Body, Resolved: c.Resolved, CreatedAt: c.CreatedAt, + ThreadID: c.ThreadID, URL: c.URL, IsBot: boolInt(c.IsBot), + } +} + +func genLegacyCommentParams(prURL string, c domain.PullRequestComment) gen.InsertLegacyPRCommentParams { + return gen.InsertLegacyPRCommentParams{ + PRURL: prURL, CommentID: c.ID, Author: c.Author, File: c.File, + Line: int64(c.Line), Body: c.Body, Resolved: c.Resolved, CreatedAt: c.CreatedAt, + ThreadID: "", URL: "", IsBot: 0, } } func commentFromGen(c gen.PRComment) domain.PullRequestComment { return domain.PullRequestComment{ - ID: c.CommentID, Author: c.Author, File: c.File, Line: int(c.Line), - Body: c.Body, Resolved: c.Resolved, CreatedAt: c.CreatedAt, + ThreadID: c.ThreadID, ID: c.CommentID, Author: c.Author, + File: c.File, Line: int(c.Line), Body: c.Body, URL: c.URL, + Resolved: c.Resolved, IsBot: c.IsBot != 0, CreatedAt: c.CreatedAt, + } +} + +func genReviewThreadParams(prURL string, th domain.PullRequestReviewThread) gen.UpsertPRReviewThreadParams { + return gen.UpsertPRReviewThreadParams{ + PRURL: prURL, ThreadID: th.ThreadID, Path: th.Path, + Line: int64(th.Line), Resolved: boolInt(th.Resolved), + IsBot: boolInt(th.IsBot), SemanticHash: th.SemanticHash, + UpdatedAt: th.UpdatedAt, + } +} + +func reviewThreadFromGen(th gen.PRReviewThread) domain.PullRequestReviewThread { + return domain.PullRequestReviewThread{ + ThreadID: th.ThreadID, Path: th.Path, Line: int(th.Line), + Resolved: th.Resolved != 0, IsBot: th.IsBot != 0, + SemanticHash: th.SemanticHash, UpdatedAt: th.UpdatedAt, + } +} + +func boolInt(v bool) int64 { + if v { + return 1 + } + return 0 +} + +func timeFromNull(t sql.NullTime) time.Time { + if !t.Valid { + return time.Time{} } + return t.Time } diff --git a/backend/internal/storage/sqlite/store/store_test.go b/backend/internal/storage/sqlite/store/store_test.go index 7731e5ca..40b74f16 100644 --- a/backend/internal/storage/sqlite/store/store_test.go +++ b/backend/internal/storage/sqlite/store/store_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/aoagents/agent-orchestrator/backend/internal/domain" + "github.com/aoagents/agent-orchestrator/backend/internal/ports" "github.com/aoagents/agent-orchestrator/backend/internal/storage/sqlite" ) @@ -216,6 +217,232 @@ func TestPRCommentsReplace(t *testing.T) { } } +func TestWriteSCMObservationPersistsMetadataChecksReviewsAndComments(t *testing.T) { + s := newTestStore(t) + ctx := context.Background() + seedProject(t, s, "mer") + r, _ := s.CreateSession(ctx, sampleRecord("mer")) + now := time.Now().UTC().Truncate(time.Second) + + pr := domain.PullRequest{ + URL: "https://github.com/o/r/pull/1", SessionID: r.ID, Number: 1, + Provider: "github", Host: "github.com", Repo: "o/r", + SourceBranch: "feat/75", TargetBranch: "main", HeadSHA: "h1", + Title: "SCM observer", Additions: 10, Deletions: 2, ChangedFiles: 3, + Author: "dev", BaseSHA: "b1", MergeCommitSHA: "m1", + ProviderState: "OPEN", ProviderMergeable: "MERGEABLE", ProviderMergeStateStatus: "CLEAN", + HTMLURL: "https://github.com/o/r/pull/1", + CI: domain.CIFailing, Review: domain.ReviewChangesRequest, Mergeability: domain.MergeBlocked, + MetadataHash: "mh", CIHash: "ch", ReviewHash: "rh", + UpdatedAt: now, ObservedAt: now, CIObservedAt: now, ReviewObservedAt: now, + } + checks := []domain.PullRequestCheck{{Name: "build", CommitHash: "h1", Status: domain.PRCheckFailed, Conclusion: "failure", URL: "ci", Details: "99", LogTail: "boom", CreatedAt: now}} + threads := []domain.PullRequestReviewThread{{ThreadID: "t1", Path: "main.go", Line: 7, SemanticHash: "th", UpdatedAt: now}} + comments := []domain.PullRequestComment{{ThreadID: "t1", ID: "c1", Author: "reviewer", File: "main.go", Line: 7, Body: "fix", URL: "comment", CreatedAt: now}} + + if err := s.WriteSCMObservation(ctx, pr, checks, threads, comments, ports.ReviewWriteReplace); err != nil { + t.Fatal(err) + } + got, ok, err := s.GetPR(ctx, pr.URL) + if err != nil || !ok { + t.Fatalf("get pr: ok=%v err=%v", ok, err) + } + if got.Provider != "github" || got.HeadSHA != "h1" || got.MetadataHash != "mh" || got.CIHash != "ch" || got.ReviewHash != "rh" { + t.Fatalf("SCM metadata not persisted: %+v", got) + } + gotChecks, _ := s.ListChecks(ctx, pr.URL) + if len(gotChecks) != 1 || gotChecks[0].Conclusion != "failure" || gotChecks[0].Details != "99" || gotChecks[0].LogTail != "boom" { + t.Fatalf("checks not persisted: %+v", gotChecks) + } + gotThreads, _ := s.ListPRReviewThreads(ctx, pr.URL) + if len(gotThreads) != 1 || gotThreads[0].ThreadID != "t1" || gotThreads[0].SemanticHash != "th" { + t.Fatalf("threads not persisted: %+v", gotThreads) + } + gotComments, _ := s.ListPRComments(ctx, pr.URL) + if len(gotComments) != 1 || gotComments[0].ThreadID != "t1" || gotComments[0].URL != "comment" { + t.Fatalf("comments not persisted: %+v", gotComments) + } +} + +func TestWriteSCMObservationMergeUpdatesFetchedReviewWindow(t *testing.T) { + s := newTestStore(t) + ctx := context.Background() + seedProject(t, s, "mer") + r, _ := s.CreateSession(ctx, sampleRecord("mer")) + now := time.Now().UTC().Truncate(time.Second) + pr := domain.PullRequest{URL: "https://github.com/o/r/pull/1", SessionID: r.ID, Number: 1, UpdatedAt: now} + + initialThreads := []domain.PullRequestReviewThread{ + {ThreadID: "older", Path: "old.go", Line: 1, Resolved: false, SemanticHash: "old", UpdatedAt: now}, + {ThreadID: "latest", Path: "main.go", Line: 7, Resolved: false, SemanticHash: "latest-v1", UpdatedAt: now}, + } + initialComments := []domain.PullRequestComment{ + {ThreadID: "older", ID: "older-c1", Author: "ann", Body: "old", CreatedAt: now}, + {ThreadID: "latest", ID: "latest-c1", Author: "bob", Body: "before", CreatedAt: now}, + } + if err := s.WriteSCMObservation(ctx, pr, nil, initialThreads, initialComments, ports.ReviewWriteReplace); err != nil { + t.Fatal(err) + } + + mergedThreads := []domain.PullRequestReviewThread{ + {ThreadID: "latest", Path: "main.go", Line: 8, Resolved: true, SemanticHash: "latest-v2", UpdatedAt: now.Add(time.Second)}, + {ThreadID: "new", Path: "new.go", Line: 2, Resolved: false, SemanticHash: "new", UpdatedAt: now.Add(time.Second)}, + } + mergedComments := []domain.PullRequestComment{ + {ThreadID: "latest", ID: "latest-c2", Author: "bob", Body: "after", CreatedAt: now.Add(time.Second)}, + {ThreadID: "new", ID: "new-c1", Author: "cat", Body: "new", CreatedAt: now.Add(time.Second)}, + } + if err := s.WriteSCMObservation(ctx, pr, nil, mergedThreads, mergedComments, ports.ReviewWriteMerge); err != nil { + t.Fatal(err) + } + + gotThreads, err := s.ListPRReviewThreads(ctx, pr.URL) + if err != nil { + t.Fatal(err) + } + if len(gotThreads) != 3 { + t.Fatalf("threads = %#v, want older preserved plus latest/new", gotThreads) + } + byThread := map[string]domain.PullRequestReviewThread{} + for _, th := range gotThreads { + byThread[th.ThreadID] = th + } + if byThread["older"].SemanticHash != "old" { + t.Fatalf("older thread not preserved: %#v", byThread["older"]) + } + if byThread["latest"].SemanticHash != "latest-v2" || !byThread["latest"].Resolved || byThread["latest"].Line != 8 { + t.Fatalf("latest thread not updated: %#v", byThread["latest"]) + } + if byThread["new"].SemanticHash != "new" { + t.Fatalf("new thread not inserted: %#v", byThread["new"]) + } + + gotComments, err := s.ListPRComments(ctx, pr.URL) + if err != nil { + t.Fatal(err) + } + ids := map[string]bool{} + for _, c := range gotComments { + ids[c.ID] = true + } + if !ids["older-c1"] || !ids["latest-c2"] || !ids["new-c1"] { + t.Fatalf("comments after merge = %#v, want older preserved and fetched threads replaced", gotComments) + } + if ids["latest-c1"] { + t.Fatalf("stale fetched-thread comment was preserved: %#v", gotComments) + } +} + +func TestWritePRPreservesSCMReviewThreads(t *testing.T) { + s := newTestStore(t) + ctx := context.Background() + seedProject(t, s, "mer") + r, _ := s.CreateSession(ctx, sampleRecord("mer")) + now := time.Now().UTC().Truncate(time.Second) + observedAt := now.Add(-time.Minute) + pr := domain.PullRequest{ + URL: "https://github.com/o/r/pull/1", SessionID: r.ID, Number: 1, UpdatedAt: now, + Provider: "github", Host: "github.com", Repo: "o/r", HeadSHA: "head-1", + MetadataHash: "metadata-v1", CIHash: "ci-v1", ReviewHash: "review-v1", + ObservedAt: observedAt, CIObservedAt: observedAt, ReviewObservedAt: observedAt, + } + threads := []domain.PullRequestReviewThread{{ThreadID: "t1", Path: "main.go", Line: 7, SemanticHash: "thread-v1", UpdatedAt: now}} + comments := []domain.PullRequestComment{{ThreadID: "t1", ID: "c1", Author: "reviewer", Body: "scm", URL: "https://example/comment/c1", CreatedAt: now}} + + if err := s.WriteSCMObservation(ctx, pr, nil, threads, comments, ports.ReviewWriteReplace); err != nil { + t.Fatal(err) + } + legacyComments := []domain.PullRequestComment{ + {ID: "c1", Author: "legacy", Body: "duplicate legacy row must not clear thread metadata", CreatedAt: now.Add(time.Second)}, + {ID: "legacy-only", Author: "legacy", Body: "legacy", CreatedAt: now.Add(time.Second)}, + } + if err := s.WritePR(ctx, domain.PullRequest{URL: pr.URL, SessionID: r.ID, Number: 1, CI: domain.CIPassing, UpdatedAt: now.Add(time.Second)}, nil, legacyComments); err != nil { + t.Fatal(err) + } + + gotPR, ok, err := s.GetPR(ctx, pr.URL) + if err != nil || !ok { + t.Fatalf("get pr: ok=%v err=%v", ok, err) + } + if gotPR.Provider != "github" || gotPR.Host != "github.com" || gotPR.Repo != "o/r" || gotPR.HeadSHA != "head-1" || + gotPR.MetadataHash != "metadata-v1" || gotPR.CIHash != "ci-v1" || gotPR.ReviewHash != "review-v1" { + t.Fatalf("legacy WritePR must preserve SCM-owned metadata and hashes, got %+v", gotPR) + } + if !gotPR.ObservedAt.Equal(observedAt) || !gotPR.CIObservedAt.Equal(observedAt) || !gotPR.ReviewObservedAt.Equal(observedAt) { + t.Fatalf("legacy WritePR must preserve SCM observation timestamps, got observed=%s ci=%s review=%s", gotPR.ObservedAt, gotPR.CIObservedAt, gotPR.ReviewObservedAt) + } + if gotPR.CI != domain.CIPassing { + t.Fatalf("legacy WritePR should still update legacy scalar CI state, got %s", gotPR.CI) + } + gotThreads, err := s.ListPRReviewThreads(ctx, pr.URL) + if err != nil { + t.Fatal(err) + } + if len(gotThreads) != 1 || gotThreads[0].ThreadID != "t1" || gotThreads[0].SemanticHash != "thread-v1" { + t.Fatalf("legacy WritePR must preserve SCM-owned review threads, got %+v", gotThreads) + } + gotComments, err := s.ListPRComments(ctx, pr.URL) + if err != nil { + t.Fatal(err) + } + byID := map[string]domain.PullRequestComment{} + for _, c := range gotComments { + byID[c.ID] = c + } + scmComment, ok := byID["c1"] + if !ok || scmComment.ThreadID != "t1" || scmComment.URL != "https://example/comment/c1" || scmComment.Body != "scm" { + t.Fatalf("legacy WritePR must not clear SCM comment metadata, got %+v", scmComment) + } + legacyOnly, ok := byID["legacy-only"] + if !ok || legacyOnly.ThreadID != "" { + t.Fatalf("legacy-only comment should remain unthreaded, got %+v", legacyOnly) + } + + mergedThreads := []domain.PullRequestReviewThread{{ThreadID: "t1", Path: "main.go", Line: 8, Resolved: true, SemanticHash: "thread-v2", UpdatedAt: now.Add(2 * time.Second)}} + mergedComments := []domain.PullRequestComment{{ThreadID: "t1", ID: "c2", Author: "reviewer", Body: "updated", URL: "https://example/comment/c2", CreatedAt: now.Add(2 * time.Second)}} + if err := s.WriteSCMObservation(ctx, pr, nil, mergedThreads, mergedComments, ports.ReviewWriteMerge); err != nil { + t.Fatal(err) + } + gotComments, err = s.ListPRComments(ctx, pr.URL) + if err != nil { + t.Fatal(err) + } + byID = map[string]domain.PullRequestComment{} + for _, c := range gotComments { + byID[c.ID] = c + } + if _, ok := byID["c1"]; ok { + t.Fatalf("SCM merge should delete stale fetched-thread comment c1, comments=%+v", gotComments) + } + replacement, ok := byID["c2"] + if !ok || replacement.ThreadID != "t1" { + t.Fatalf("SCM merge did not insert replacement threaded comment, comments=%+v", gotComments) + } +} + +func TestWritePRReplacesLegacyCommentBodies(t *testing.T) { + s := newTestStore(t) + ctx := context.Background() + seedProject(t, s, "mer") + r, _ := s.CreateSession(ctx, sampleRecord("mer")) + now := time.Now().UTC().Truncate(time.Second) + pr := domain.PullRequest{URL: "https://github.com/o/r/pull/2", SessionID: r.ID, Number: 2, UpdatedAt: now} + + if err := s.WritePR(ctx, pr, nil, []domain.PullRequestComment{{ID: "legacy", Author: "reviewer", Body: "before", CreatedAt: now}}); err != nil { + t.Fatal(err) + } + if err := s.WritePR(ctx, pr, nil, []domain.PullRequestComment{{ID: "legacy", Author: "reviewer", Body: "after edit", CreatedAt: now.Add(time.Second)}}); err != nil { + t.Fatal(err) + } + got, err := s.ListPRComments(ctx, pr.URL) + if err != nil { + t.Fatal(err) + } + if len(got) != 1 || got[0].Body != "after edit" || !got[0].CreatedAt.Equal(now.Add(time.Second)) { + t.Fatalf("legacy comment replacement did not persist edited row: %+v", got) + } +} + func TestCDCTriggersPopulateChangeLog(t *testing.T) { s := newTestStore(t) ctx := context.Background()