From 5e8aeb5c3a1f8d6fb7039d906211fe66ba74b492 Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Mon, 13 Apr 2026 16:01:32 -0400 Subject: [PATCH] review-agent: fix comment pre-filter to use per-comment reply matching The comment analyzer used a global last_bot_time cutoff to determine which comments were already addressed. Any bot comment (even unrelated) posted after a human comment would suppress it. This caused legitimate review feedback to be skipped when an unrelated bot comment happened to be more recent. Fix by matching replies per-comment: - Issue comments: check if our bot's reply contains the comment URL or #issuecomment-{id} anchor (requires bot to include the link) - Review bodies: check if our bot's reply contains the review ID - Review threads: unchanged (already per-thread via GraphQL) Also fix bot classification into three categories: - is_our_bot(): BOT_ACCOUNTS only - counts as "already replied" - is_approved_bot(): APPROVED_BOTS (e.g. coderabbitai) - triggers responses, was previously blocked by the catch-all [bot] suffix check in is_bot() - is_ignored_bot(): unrecognized [bot] accounts - skipped Add response rule instructing the agent to include comment URL/ID references when replying to flat comments for reply linkage. Co-Authored-By: Claude Opus 4.6 --- ...ypershift-review-agent-process-commands.sh | 222 +++++++++++++----- ...hypershift-review-agent-report-commands.sh | 75 ++++-- 2 files changed, 212 insertions(+), 85 deletions(-) diff --git a/ci-operator/step-registry/hypershift/review-agent/process/hypershift-review-agent-process-commands.sh b/ci-operator/step-registry/hypershift/review-agent/process/hypershift-review-agent-process-commands.sh index 1770c1dc8e217..577ec8906a22e 100644 --- a/ci-operator/step-registry/hypershift/review-agent/process/hypershift-review-agent-process-commands.sh +++ b/ci-operator/step-registry/hypershift/review-agent/process/hypershift-review-agent-process-commands.sh @@ -36,6 +36,7 @@ timelines and identifying only threads where: from __future__ import annotations import json +import re import subprocess import sys from functools import lru_cache @@ -59,6 +60,14 @@ APPROVED_BOTS = [ "coderabbitai[bot]", ] +# Prow/CI bots whose comments should be ignored entirely +IGNORED_ACCOUNTS = [ + "openshift-ci-robot", + "openshift-ci", + "openshift-merge-robot", + "openshift-bot", +] + # Cache for authorization results to minimize API calls _auth_cache: dict[str, bool] = {} @@ -74,11 +83,27 @@ def run_gh(args: list[str]) -> Any: return json.loads(result.stdout) if result.stdout.strip() else None -def is_bot(login: str) -> bool: - """Check if login is a known bot account.""" +def is_our_bot(login: str) -> bool: + """Check if login is our bot account (whose comments count as replies).""" + if not login: + return False + return login in BOT_ACCOUNTS + + +def is_approved_bot(login: str) -> bool: + """Check if login is an approved bot whose comments should trigger responses.""" if not login: return False - return login in BOT_ACCOUNTS or login.endswith("[bot]") + return login in APPROVED_BOTS + + +def is_ignored_bot(login: str) -> bool: + """Check if login is a bot that should be ignored (prow bots, unknown [bot] accounts).""" + if not login: + return False + if login in BOT_ACCOUNTS or login in APPROVED_BOTS: + return False + return login in IGNORED_ACCOUNTS or login.endswith("[bot]") def is_openshift_org_member(login: str) -> bool: @@ -329,41 +354,46 @@ def analyze_review_threads(pr_number: int) -> list[dict]: if not comments: continue - # Find last human and last bot comment - last_human = None - last_bot = None + # Classify comments: + # our_bot: our bot's reply (counts as "addressed") + # actionable: human or approved bot comment (needs response) + # ignored: unrecognized bots (skip) + last_actionable = None + last_our_bot = None for comment in comments: author = comment["author"]["login"] if comment["author"] else "unknown" - if is_bot(author): - last_bot = comment - else: - last_human = comment - - # Needs attention if no bot reply, or human commented after bot - if last_bot is None: - last_author = last_human["author"]["login"] if last_human and last_human["author"] else "unknown" - # Check if author is authorized - if not is_authorized_author(last_author): + if is_our_bot(author): + last_our_bot = comment + elif is_ignored_bot(author): continue + elif is_authorized_author(author): + last_actionable = comment + + if last_actionable is None: + continue + + last_author = last_actionable["author"]["login"] if last_actionable["author"] else "unknown" + author_type = "approved_bot" if is_approved_bot(last_author) else "human" + + # Needs attention if no bot reply, or actionable comment after bot reply + if last_our_bot is None: needs_attention.append({ "type": "review_thread", "id": thread["id"], - "last_human_comment": last_human["body"][:200] if last_human else None, - "last_human_author": last_author, + "last_comment": last_actionable["body"][:200], + "last_author": last_author, + "author_type": author_type, "reason": "no_bot_reply" }) - elif last_human and last_human["createdAt"] > last_bot["createdAt"]: - last_author = last_human["author"]["login"] if last_human["author"] else "unknown" - # Check if author is authorized - if not is_authorized_author(last_author): - continue + elif last_actionable["createdAt"] > last_our_bot["createdAt"]: needs_attention.append({ "type": "review_thread", "id": thread["id"], - "last_human_comment": last_human["body"][:200], - "last_human_author": last_author, - "reason": "human_followup_after_bot" + "last_comment": last_actionable["body"][:200], + "last_author": last_author, + "author_type": author_type, + "reason": "followup_after_bot" }) return needs_attention @@ -404,7 +434,7 @@ def analyze_review_bodies(pr_number: int) -> list[dict]: reviews = result["data"]["repository"]["pullRequest"]["reviews"]["nodes"] needs_attention = [] - # Get issue comments to check if bot has replied to any review + # Get bot issue comment bodies to check for review ID references try: issue_comments = run_gh([ "api", f"repos/openshift/hypershift/issues/{pr_number}/comments", @@ -413,18 +443,15 @@ def analyze_review_bodies(pr_number: int) -> list[dict]: except subprocess.CalledProcessError: issue_comments = [] - # Find last bot comment time from issue comments - bot_comment_times = [] - if issue_comments: - for c in issue_comments: - if c["user"]["login"] in BOT_ACCOUNTS: - bot_comment_times.append(c["created_at"]) - last_bot_time = max(bot_comment_times) if bot_comment_times else None + bot_bodies = [ + c["body"] for c in (issue_comments or []) + if c["user"]["login"] in BOT_ACCOUNTS + ] for review in reviews: - # Skip reviews without bodies or from bots + # Skip reviews without bodies, from our bot, or from ignored bots author = review["author"]["login"] if review["author"] else None - if not author or is_bot(author): + if not author or is_our_bot(author) or is_ignored_bot(author): continue body = review.get("body", "").strip() @@ -435,25 +462,33 @@ def analyze_review_bodies(pr_number: int) -> list[dict]: if not is_authorized_author(author): continue - submitted_at = review["submittedAt"] + review_id = review["id"] + + # Check if any bot comment references this specific review + replied = any(str(review_id) in b for b in bot_bodies) - # Needs attention if no bot reply, or review was submitted after last bot comment - if last_bot_time is None or submitted_at > last_bot_time: + if not replied: needs_attention.append({ "type": "review_body", - "id": review["id"], + "id": review_id, "author": author, + "author_type": "approved_bot" if is_approved_bot(author) else "human", "state": review["state"], - "body": body[:500], # Include more context for review bodies - "submitted_at": submitted_at, - "reason": "no_bot_reply" if last_bot_time is None else "review_after_bot_reply" + "body": body[:500], + "submitted_at": review["submittedAt"], + "reason": "no_bot_reply" }) return needs_attention def analyze_issue_comments(pr_number: int) -> list[dict]: - """Analyze issue comments (general PR comments) and return those needing attention.""" + """Analyze issue comments (general PR comments) and return those needing attention. + + Since issue comments are flat (no threading), we rely on our bot + including the comment URL when replying. A comment is considered + addressed only if our bot's reply contains its URL or comment ID anchor. + """ try: comments = run_gh([ "api", f"repos/openshift/hypershift/issues/{pr_number}/comments", @@ -465,34 +500,50 @@ def analyze_issue_comments(pr_number: int) -> list[dict]: if not comments: return [] - # Separate human and bot comments - human_comments = [c for c in comments if not is_bot(c["user"]["login"])] - bot_comments = [c for c in comments if c["user"]["login"] in BOT_ACCOUNTS] + # Actionable comments: humans and approved bots (not our bot, not ignored bots) + actionable_comments = [ + c for c in comments + if not is_our_bot(c["user"]["login"]) and not is_ignored_bot(c["user"]["login"]) + ] + # Our bot's comments (used for reply matching) + bot_comments = [c for c in comments if is_our_bot(c["user"]["login"])] - if not human_comments: + if not actionable_comments: return [] - # Find the last bot comment timestamp - last_bot_time = None - if bot_comments: - last_bot_time = max(c["created_at"] for c in bot_comments) + # Collect all our bot comment bodies for reference matching + bot_bodies = [c["body"] for c in bot_comments] needs_attention = [] - # Find human comments after last bot reply - for comment in human_comments: - if last_bot_time is None or comment["created_at"] > last_bot_time: - author = comment["user"]["login"] - # Check if author is authorized - if not is_authorized_author(author): - continue + for comment in actionable_comments: + author = comment["user"]["login"] + # Check if author is authorized + if not is_authorized_author(author): + continue + + # Check if any bot comment references this specific comment + comment_id = str(comment["id"]) + comment_url = comment.get("html_url", "") + # Match by full URL or by the #issuecomment-{id} anchor with word boundary + # to prevent #issuecomment-123 from matching #issuecomment-1234 + anchor_pattern = re.compile(rf"#issuecomment-{re.escape(comment_id)}(?!\d)") + + replied = any( + anchor_pattern.search(body) or (comment_url and comment_url in body) + for body in bot_bodies + ) + + if not replied: needs_attention.append({ "type": "issue_comment", "id": comment["id"], + "html_url": comment_url, "author": author, + "author_type": "approved_bot" if is_approved_bot(author) else "human", "body": comment["body"][:200], "created_at": comment["created_at"], - "reason": "no_bot_reply" if last_bot_time is None else "human_followup_after_bot" + "reason": "no_bot_reply" }) return needs_attention @@ -792,6 +843,9 @@ RESPONSE RULES: 1. For each piece of feedback, choose ONE response mechanism only - never respond to the same feedback via both inline reply AND general PR comment 2. Only make code changes when explicitly requested (look for imperative language like 'change', 'fix', 'update', 'remove') 3. For questions or clarifications, reply with an explanation only - do not change code unless asked +4. COMMENT LINKAGE: When replying to a flat comment (type: issue_comment or review_body), you MUST include a reference so the system knows which comment you addressed: + - For issue_comment: include the html_url from the JSON in your reply. Format: start with 'Re: ' on its own line + - For review_body: include the review id from the JSON in your reply. Format: start with 'Re: review ' on its own line ${SUBAGENT_PROMPT}" @@ -811,9 +865,53 @@ ${SUBAGENT_PROMPT}" set -e echo "Claude processing complete. Full output saved to /tmp/claude-pr-${PR_NUMBER}-output.json" - # Copy Claude output to SHARED_DIR for the report step if [ -f "/tmp/claude-pr-${PR_NUMBER}-output.json" ]; then - cp "/tmp/claude-pr-${PR_NUMBER}-output.json" "${SHARED_DIR}/claude-pr-${PR_NUMBER}-output.json" + # Full output to ARTIFACT_DIR (GCS-backed, no size limit) + cp "/tmp/claude-pr-${PR_NUMBER}-output.json" "${ARTIFACT_DIR}/claude-pr-${PR_NUMBER}-output.json" + # Compact summary to SHARED_DIR for the report step. + # SHARED_DIR is backed by a Kubernetes secret (1MB limit), so we + # extract only the fields the report needs into a small JSON file. + python3 -c " +import json, sys +f = sys.argv[1] +result_line = system_line = None +tool_calls = [] +tool_errors = [] +for line in open(f): + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + t = obj.get('type', '') + if t == 'result': + result_line = obj + elif t == 'system': + system_line = obj + elif t == 'tool_use': + tool_calls.append(obj.get('name', 'unknown')) + if obj.get('is_error'): + tool_errors.append(str(obj.get('content', ''))[:200]) +summary = {} +if result_line: + summary['result'] = (result_line.get('result') or '')[:5000] + summary['usage'] = result_line.get('usage', {}) + summary['duration_ms'] = result_line.get('duration_ms', 0) + summary['duration_api_ms'] = result_line.get('duration_api_ms', 0) + summary['num_turns'] = result_line.get('num_turns', 0) + summary['session_id'] = result_line.get('session_id', '') + summary['total_cost_usd'] = result_line.get('total_cost_usd', 0) + summary['modelUsage'] = result_line.get('modelUsage', {}) +if system_line: + summary['model'] = system_line.get('model', 'unknown') + summary['tools'] = system_line.get('tools', []) +summary['tool_calls'] = tool_calls +summary['tool_errors'] = tool_errors +print(json.dumps(summary)) +" "/tmp/claude-pr-${PR_NUMBER}-output.json" \ + > "${SHARED_DIR}/claude-pr-${PR_NUMBER}-summary.json" 2>/dev/null || true fi if [ $EXIT_CODE -eq 0 ]; then diff --git a/ci-operator/step-registry/hypershift/review-agent/report/hypershift-review-agent-report-commands.sh b/ci-operator/step-registry/hypershift/review-agent/report/hypershift-review-agent-report-commands.sh index bf1841905d4b6..6fc172e24e0cf 100644 --- a/ci-operator/step-registry/hypershift/review-agent/report/hypershift-review-agent-report-commands.sh +++ b/ci-operator/step-registry/hypershift/review-agent/report/hypershift-review-agent-report-commands.sh @@ -157,24 +157,41 @@ while IFS= read -r line; do STATUS_LABEL="Failed" fi - # Try to find the Claude output JSON - OUTPUT_FILE="${SHARED_DIR}/claude-pr-${PR_NUMBER}-output.json" - if [ ! -f "$OUTPUT_FILE" ]; then - OUTPUT_FILE="/tmp/claude-pr-${PR_NUMBER}-output.json" + # Read from compact summary JSON (written by process step) + SUMMARY_FILE="${SHARED_DIR}/claude-pr-${PR_NUMBER}-summary.json" + if [ ! -f "$SUMMARY_FILE" ]; then + # Fallback to legacy stream-json format + SUMMARY_FILE="${SHARED_DIR}/claude-pr-${PR_NUMBER}-output.json" fi - # Extract data from stream-json output - RESULT_TEXT=$(extract_from_stream_json "$OUTPUT_FILE" "text" | html_escape) - PR_INPUT=$(extract_from_stream_json "$OUTPUT_FILE" "input_tokens") - PR_OUTPUT=$(extract_from_stream_json "$OUTPUT_FILE" "output_tokens") - PR_CACHE_READ=$(extract_from_stream_json "$OUTPUT_FILE" "cache_read") - PR_CACHE_CREATE=$(extract_from_stream_json "$OUTPUT_FILE" "cache_create") - DURATION_MS=$(extract_from_stream_json "$OUTPUT_FILE" "duration_ms") - DURATION_API_MS=$(extract_from_stream_json "$OUTPUT_FILE" "duration_api_ms") - NUM_TURNS=$(extract_from_stream_json "$OUTPUT_FILE" "num_turns") - MODEL=$(extract_from_stream_json "$OUTPUT_FILE" "model") - SESSION_ID=$(extract_from_stream_json "$OUTPUT_FILE" "session_id") - TOOLS_AVAILABLE=$(extract_from_stream_json "$OUTPUT_FILE" "tools_available") + if [ -f "$SUMMARY_FILE" ] && grep -q '"result"' "$SUMMARY_FILE" 2>/dev/null && ! grep -q '"type":"result"' "$SUMMARY_FILE" 2>/dev/null; then + # New compact summary format + RESULT_TEXT=$(jq -r '.result // empty' "$SUMMARY_FILE" 2>/dev/null | html_escape) + PR_INPUT=$(jq -r '.usage.input_tokens // 0' "$SUMMARY_FILE" 2>/dev/null) + PR_OUTPUT=$(jq -r '.usage.output_tokens // 0' "$SUMMARY_FILE" 2>/dev/null) + PR_CACHE_READ=$(jq -r '.usage.cache_read_input_tokens // 0' "$SUMMARY_FILE" 2>/dev/null) + PR_CACHE_CREATE=$(jq -r '.usage.cache_creation_input_tokens // (.usage.cache_creation.ephemeral_5m_input_tokens // 0)' "$SUMMARY_FILE" 2>/dev/null) + DURATION_MS=$(jq -r '.duration_ms // 0' "$SUMMARY_FILE" 2>/dev/null) + DURATION_API_MS=$(jq -r '.duration_api_ms // 0' "$SUMMARY_FILE" 2>/dev/null) + NUM_TURNS=$(jq -r '.num_turns // 0' "$SUMMARY_FILE" 2>/dev/null) + MODEL=$(jq -r '.model // "unknown"' "$SUMMARY_FILE" 2>/dev/null) + SESSION_ID=$(jq -r '.session_id // "unknown"' "$SUMMARY_FILE" 2>/dev/null) + TOOLS_AVAILABLE=$(jq -r '.tools // [] | join(",")' "$SUMMARY_FILE" 2>/dev/null) + else + # Legacy stream-json format + OUTPUT_FILE="$SUMMARY_FILE" + RESULT_TEXT=$(extract_from_stream_json "$OUTPUT_FILE" "text" | html_escape) + PR_INPUT=$(extract_from_stream_json "$OUTPUT_FILE" "input_tokens") + PR_OUTPUT=$(extract_from_stream_json "$OUTPUT_FILE" "output_tokens") + PR_CACHE_READ=$(extract_from_stream_json "$OUTPUT_FILE" "cache_read") + PR_CACHE_CREATE=$(extract_from_stream_json "$OUTPUT_FILE" "cache_create") + DURATION_MS=$(extract_from_stream_json "$OUTPUT_FILE" "duration_ms") + DURATION_API_MS=$(extract_from_stream_json "$OUTPUT_FILE" "duration_api_ms") + NUM_TURNS=$(extract_from_stream_json "$OUTPUT_FILE" "num_turns") + MODEL=$(extract_from_stream_json "$OUTPUT_FILE" "model") + SESSION_ID=$(extract_from_stream_json "$OUTPUT_FILE" "session_id") + TOOLS_AVAILABLE=$(extract_from_stream_json "$OUTPUT_FILE" "tools_available") + fi : "${PR_INPUT:=0}" : "${PR_OUTPUT:=0}" @@ -185,7 +202,13 @@ while IFS= read -r line; do DURATION_SECS=$((DURATION_MS / 1000)) DURATION_API_SECS=$((DURATION_API_MS / 1000)) - PR_COST_RAW=$(extract_from_stream_json "$OUTPUT_FILE" "cost_usd") + + # Cost - try summary format first, then legacy + if [ -f "$SUMMARY_FILE" ] && ! grep -q '"type":"result"' "$SUMMARY_FILE" 2>/dev/null; then + PR_COST_RAW=$(jq -r '.total_cost_usd // 0' "$SUMMARY_FILE" 2>/dev/null) + else + PR_COST_RAW=$(extract_from_stream_json "${OUTPUT_FILE:-$SUMMARY_FILE}" "cost_usd") + fi : "${PR_COST_RAW:=0}" PR_COST=$(format_cost "$PR_COST_RAW") @@ -196,16 +219,18 @@ while IFS= read -r line; do GRAND_TOTAL_CACHE_CREATE=$((GRAND_TOTAL_CACHE_CREATE + PR_CACHE_CREATE)) GRAND_TOTAL_COST_USD=$(sum_costs "$GRAND_TOTAL_COST_USD" "$PR_COST_RAW") - # Extract tool calls from stream-json + # Extract tool calls and errors TOOL_CALLS_RAW="" TOOL_ERRORS_RAW="" - if [ -f "$OUTPUT_FILE" ] && [ -s "$OUTPUT_FILE" ]; then - # Tool calls: extract tool_use messages with name and input + if [ -f "$SUMMARY_FILE" ] && ! grep -q '"type":"result"' "$SUMMARY_FILE" 2>/dev/null; then + # New compact summary format - tool_calls is a list of names, tool_errors is a list of strings + TOOL_CALLS_RAW=$(jq -r '.tool_calls[]? // empty' "$SUMMARY_FILE" 2>/dev/null | sed 's/^/ /' || echo "") + TOOL_ERRORS_RAW=$(jq -r '.tool_errors[]? // empty' "$SUMMARY_FILE" 2>/dev/null || echo "") + elif [ -f "${OUTPUT_FILE:-}" ] && [ -s "${OUTPUT_FILE:-}" ]; then + # Legacy stream-json format TOOL_CALLS_RAW=$(grep '"type":"tool_use"' "$OUTPUT_FILE" 2>/dev/null \ | jq -r '" \(.name)(\(.input | tostring | .[0:120])...)"' 2>/dev/null \ || echo "") - - # Tool errors: extract tool_result messages where is_error is true TOOL_ERRORS_RAW=$(grep '"is_error":true' "$OUTPUT_FILE" 2>/dev/null \ | jq -r '.content // "unknown error"' 2>/dev/null \ || echo "") @@ -254,7 +279,11 @@ while IFS= read -r line; do # Token usage table # Build per-model breakdown rows MODEL_BREAKDOWN_ROWS="" - MODEL_BREAKDOWN_RAW=$(extract_from_stream_json "$OUTPUT_FILE" "model_usage") + if [ -f "$SUMMARY_FILE" ] && ! grep -q '"type":"result"' "$SUMMARY_FILE" 2>/dev/null; then + MODEL_BREAKDOWN_RAW=$(jq -r '.modelUsage // {} | to_entries[] | "\(.key)|\(.value.inputTokens // .value.input_tokens // 0)|\(.value.outputTokens // .value.output_tokens // 0)|\(.value.cacheReadInputTokens // .value.cache_read_input_tokens // 0)|\(.value.cacheCreationInputTokens // .value.cache_creation_input_tokens // 0)"' "$SUMMARY_FILE" 2>/dev/null | head -20 || echo "") + else + MODEL_BREAKDOWN_RAW=$(extract_from_stream_json "${OUTPUT_FILE:-$SUMMARY_FILE}" "model_usage") + fi if [ -n "$MODEL_BREAKDOWN_RAW" ]; then MODEL_BREAKDOWN_ROWS="Per-model breakdown" while IFS='|' read -r M_NAME M_INPUT M_OUTPUT M_CACHE_READ M_CACHE_CREATE; do