From b62b3434265339fa79396dd88485bcf4febbd277 Mon Sep 17 00:00:00 2001
From: Patryk Matuszak <pmatusza@redhat.com>
Date: Fri, 3 Jul 2026 17:51:46 +0200
Subject: [PATCH 1/4] ci-doctor: retain failed-download jobs instead of
 dropping them
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

download-jobs.sh silently excluded jobs whose artifact download failed
from the jobs.json it emits, so a detected failed job could vanish from
the pipeline before analysis even started. Keep such jobs in the list
with an empty artifacts_dir and a download_error marker, and have
plan-analysis skip scheduling analysis for them (there are no artifacts
to read) — the aggregation overlay will surface them as unanalyzed
instead of losing them.
---
 plugins/shared/scripts/download-jobs.sh | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/plugins/shared/scripts/download-jobs.sh b/plugins/shared/scripts/download-jobs.sh
index 642cb47c..949f8239 100755
--- a/plugins/shared/scripts/download-jobs.sh
+++ b/plugins/shared/scripts/download-jobs.sh
@@ -15,6 +15,8 @@ set -euo pipefail
 #
 # Output (stdout): JSON array of job objects with "artifacts_dir" added:
 #   [{"job":"...","url":"...","build_id":"...","artifacts_dir":"/tmp/.../artifacts/BUILD_ID"}, ...]
+# Jobs whose download failed are kept with "artifacts_dir": "" and
+# "download_error": "artifact download failed" so they are never silently lost.
 #
 # Progress/errors: stderr
 
@@ -162,13 +164,14 @@ main() {
 
     echo "Done: ${ok} downloaded/cached, ${fail} failed." >&2
 
-    # Exclude failed downloads, then add artifacts_dir
-    local output_json="${jobs_json}"
-    while IFS= read -r bid; do
-        [[ -z "${bid}" ]] && continue
-        output_json=$(echo "${output_json}" | jq --arg id "${bid}" '[.[] | select(.build_id != $id)]')
-    done <<< "${failed_ids}"
-    echo "${output_json}" | jq --arg workdir "${WORKDIR}" '[.[] | . + {artifacts_dir: ($workdir + "/artifacts/" + .build_id)}]'
+    # Keep failed downloads in the list (marked) so downstream reporting
+    # never loses a detected job; only successful jobs get an artifacts_dir.
+    local failed_json
+    failed_json=$(printf '%s\n' "${failed_ids}" | jq -R -s '[split("\n")[] | select(length > 0)]')
+    echo "${jobs_json}" | jq --arg workdir "${WORKDIR}" --argjson failed "${failed_json}" '
+        [.[] | if ((.build_id // "") as $b | $failed | index($b))
+           then . + {artifacts_dir: "", download_error: "artifact download failed"}
+           else . + {artifacts_dir: ($workdir + "/artifacts/" + .build_id)} end]'
 }
 
 main "${@}"

From f1f271e9445ce9d19260d72b40317305355ecc87 Mon Sep 17 00:00:00 2001
From: Patryk Matuszak <pmatusza@redhat.com>
Date: Fri, 3 Jul 2026 17:51:45 +0200
Subject: [PATCH 2/4] ci-doctor: aggregate from the detected job list with
 missing-analysis placeholders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

total_failed used to count parsed STRUCTURED SUMMARY entries, so a job
whose analysis report was missing or unparseable silently disappeared
from the summary (and one multi-entry report could overcount). Treat the
prepare phase's jobs/release-<v>-jobs.json and prs-jobs.json as ground
truth: join parsed analyses onto the detected list by build_id (from the
per-job filename, with job_url fallback) and emit an explicit
missing_analysis placeholder for every detected job with no analysis.

Placeholders live in a separate array with no fingerprint or severity so
they never enter issues[], history.json, or bug correlation. Summaries
are now written even when zero reports parsed (all placeholders) or the
detected list is empty (total_failed 0) — but not when a collection
error was recorded, which must keep rendering as such. affected_jobs
entries gain build_id so the HTML can link detected jobs to issues.
---
 plugins/shared/scripts/aggregate.py | 232 ++++++++++++++++++++++++----
 plugins/tests/test_aggregate.py     | 209 +++++++++++++++++++++++++
 2 files changed, 409 insertions(+), 32 deletions(-)
 create mode 100644 plugins/tests/test_aggregate.py

diff --git a/plugins/shared/scripts/aggregate.py b/plugins/shared/scripts/aggregate.py
index a8500f85..6b77d63d 100755
--- a/plugins/shared/scripts/aggregate.py
+++ b/plugins/shared/scripts/aggregate.py
@@ -25,6 +25,137 @@
 from parse import parse_structured_summary, group_by_signature
 
 
+# ---------------------------------------------------------------------------
+# Detected-job ground truth (deterministic overlay)
+#
+# The prepare phase's jobs/release-<v>-jobs.json and jobs/prs-jobs.json list
+# every detected failed job.  Aggregation starts from that list and joins the
+# parsed analyses onto it; detected jobs with no parsed analysis become
+# explicit placeholders in a separate missing_analysis array, so a job can
+# never silently vanish between detection and the HTML report.
+# ---------------------------------------------------------------------------
+
+def _detected_job(workdir, entry):
+    build_id = entry.get("build_id", "")
+    finished = ""
+    if build_id:
+        evidence_path = os.path.join(workdir, "evidence", f"evidence-{build_id}.json")
+        try:
+            with open(evidence_path) as f:
+                finished = (json.load(f).get("finished") or "")[:10]
+        except (OSError, json.JSONDecodeError):
+            pass
+    return {
+        "job_name": entry.get("job", ""),
+        "job_url": entry.get("url", ""),
+        "build_id": build_id,
+        "download_error": entry.get("download_error", ""),
+        "finished": finished,
+    }
+
+
+def load_detected_release_jobs(workdir, release):
+    """Read jobs/release-<release>-jobs.json into detected-job dicts.
+
+    Returns None when the file is missing or unreadable (distinct from []),
+    so callers can fall back to legacy file-glob behavior.
+    """
+    path = os.path.join(workdir, "jobs", f"release-{release}-jobs.json")
+    try:
+        with open(path) as f:
+            entries = json.load(f)
+    except (OSError, json.JSONDecodeError):
+        return None
+    if not isinstance(entries, list):
+        return None
+    jobs = []
+    for entry in entries:
+        if not isinstance(entry, dict):
+            continue
+        # Release job lists only contain failures; tolerate a status field.
+        status = entry.get("status", "")
+        if status and status.upper() != "FAILURE":
+            continue
+        jobs.append(_detected_job(workdir, entry))
+    return jobs
+
+
+def load_detected_pr_jobs(workdir):
+    """Read jobs/prs-jobs.json into {pr_number: [detected-job dicts]}.
+
+    prs-jobs.json includes SUCCESS/pending jobs of PRs that had at least one
+    failure; only FAILURE entries count as detected (same filter as
+    plan-analysis.py).  Returns None when the file is missing or unreadable.
+    """
+    path = os.path.join(workdir, "jobs", "prs-jobs.json")
+    try:
+        with open(path) as f:
+            entries = json.load(f)
+    except (OSError, json.JSONDecodeError):
+        return None
+    if not isinstance(entries, list):
+        return None
+    prs = {}
+    for entry in entries:
+        if not isinstance(entry, dict):
+            continue
+        if entry.get("status", "").upper() != "FAILURE":
+            continue
+        pr_number = entry.get("pr_number") or 0
+        prs.setdefault(pr_number, []).append(_detected_job(workdir, entry))
+    return prs
+
+
+def _file_build_id(filepath):
+    """Extract the build id from a per-job report filename.
+
+    Both shapes end in -<build_id>.txt:
+    release-<v>-job-<i>-<build_id>.txt and prs-job-<i>-pr<N>-<build_id>.txt.
+    """
+    m = re.search(r"-(\d+)\.txt$", os.path.basename(filepath))
+    return m.group(1) if m else ""
+
+
+def _placeholder(job):
+    """Missing-analysis entry for a detected job with no parsed report.
+
+    Deliberately carries no fingerprint/severity/stack_layer: placeholders
+    live outside issues[] and must never look like analyzed failures to
+    downstream consumers.
+    """
+    if job.get("download_error"):
+        reason = "artifact download failed"
+    elif not job.get("build_id"):
+        reason = "no build id"
+    else:
+        reason = "analysis report missing or unparseable"
+    return {
+        "job_name": job.get("job_name", ""),
+        "job_url": job.get("job_url", ""),
+        "build_id": job.get("build_id", ""),
+        "finished": job.get("finished", ""),
+        "reason": reason,
+    }
+
+
+def _missing_placeholders(detected, parsed):
+    """Placeholders for detected jobs not covered by any parsed entry.
+
+    Join primarily on build_id (attached to parsed entries from their
+    filename), with job_url as a defensive fallback.
+    """
+    covered_ids = {e.get("build_id") for e in parsed if e.get("build_id")}
+    covered_urls = {e.get("job_url") for e in parsed if e.get("job_url")}
+    missing = []
+    for job in detected:
+        if job.get("build_id") and job["build_id"] in covered_ids:
+            continue
+        if job.get("job_url") and job["job_url"] in covered_urls:
+            continue
+        missing.append(_placeholder(job))
+    return missing
+
+
 def classify_severity(group):
     count = len(group)
     if count >= 5:
@@ -40,16 +171,23 @@ def classify_severity(group):
 # JSON generation
 # ---------------------------------------------------------------------------
 
-def build_release_json(release, jobs, timestamp):
-    """Build the release summary as a dict (ready for json.dump)."""
+def build_release_json(release, jobs, timestamp, detected=None):
+    """Build the release summary as a dict (ready for json.dump).
+
+    When the detected-job list is available, total_failed is the detected
+    count (not the parsed-entry count, which can under- and over-count) and
+    detected jobs with no parsed analysis are listed in missing_analysis.
+    """
     issues, breakdown = _build_issues_from_jobs(jobs)
+    missing = _missing_placeholders(detected, jobs) if detected is not None else []
 
     return {
         "release": release,
-        "total_failed": len(jobs),
+        "total_failed": len(detected) if detected is not None else len(jobs),
         "date": timestamp.strftime("%Y-%m-%d"),
         "breakdown": breakdown,
         "issues": issues,
+        "missing_analysis": missing,
     }
 
 
@@ -92,7 +230,8 @@ def _build_issues_from_jobs(jobs):
             "analysis_gaps": rep.get("analysis_gaps", []),
             "scenarios": sorted({s for j in group for s in j.get("scenarios", [])}),
             "affected_jobs": [
-                {"name": j["job_name"], "date": j["finished"], "url": j["job_url"]}
+                {"name": j["job_name"], "date": j["finished"], "url": j["job_url"],
+                 "build_id": j.get("build_id", "")}
                 for j in group
             ],
         })
@@ -100,30 +239,42 @@ def _build_issues_from_jobs(jobs):
     return issues, breakdown
 
 
-def build_pr_json(pr_jobs, timestamp):
+def build_pr_json(pr_jobs, timestamp, detected_prs=None):
     """Build the PR summary as a dict (ready for json.dump).
 
-    pr_jobs: dict mapping pr_number to list of job dicts.
+    pr_jobs: dict mapping pr_number to list of parsed job dicts.
+    detected_prs: dict mapping pr_number to detected-job dicts (ground
+    truth), or None when prs-jobs.json is unavailable.  When available,
+    per-PR failed counts come from the detected list and unanalyzed jobs
+    are listed per PR in missing_analysis — including PRs where no report
+    parsed at all.
     """
-    total_failed = sum(len(jobs) for jobs in pr_jobs.values())
+    pr_numbers = sorted(set(pr_jobs) | set(detected_prs or {}))
 
+    total_failed = 0
     prs = []
-    for pr_number, jobs in sorted(pr_jobs.items()):
-        if not jobs:
+    for pr_number in pr_numbers:
+        jobs = pr_jobs.get(pr_number, [])
+        detected = (detected_prs or {}).get(pr_number)
+        missing = _missing_placeholders(detected, jobs) if detected is not None else []
+        if not jobs and not missing:
             continue
-        first = jobs[0]
+        failed = len(detected) if detected is not None else len(jobs)
+        total_failed += failed
+        first = jobs[0] if jobs else {}
         issues, breakdown = _build_issues_from_jobs(jobs)
         prs.append({
             "number": pr_number,
             "title": first.get("pr_title", ""),
             "url": first.get("pr_url", ""),
-            "failed": len(jobs),
+            "failed": failed,
             "breakdown": breakdown,
             "issues": issues,
+            "missing_analysis": missing,
         })
 
     return {
-        "total_prs": len(pr_jobs),
+        "total_prs": len(pr_numbers) if detected_prs is not None else len(pr_jobs),
         "prs_with_failures": len(prs),
         "total_failed": total_failed,
         "date": timestamp.strftime("%Y-%m-%d"),
@@ -205,25 +356,40 @@ def main():
     timestamp = datetime.now(timezone.utc)
 
     if mode == "release":
+        # A recorded collection error must keep rendering as such —
+        # create-report.py falls back to release-<v>-error.txt only when
+        # the summary file is absent, so do not write one.
+        error_path = os.path.join(workdir, "jobs", f"release-{release}-error.txt")
+        if os.path.exists(error_path):
+            print(f"Collection error recorded for release {release}; "
+                  f"not writing a summary", file=sys.stderr)
+            sys.exit(0)
+
+        detected = load_detected_release_jobs(workdir, release)
         files = find_release_job_files(workdir, release)
-        if not files:
+        if detected is None and not files:
             print(f"No job files found for release {release}", file=sys.stderr)
             sys.exit(1)
 
-        print(f"Found {len(files)} job files for release {release}", file=sys.stderr)
+        print(f"Found {len(files)} job files for release {release}"
+              + (f" ({len(detected)} detected failed jobs)" if detected is not None else ""),
+              file=sys.stderr)
         jobs = []
         for filepath in files:
             summaries = parse_structured_summary(filepath)
             if not summaries:
                 print(f"  WARNING: no STRUCTURED SUMMARY in {os.path.basename(filepath)}", file=sys.stderr)
                 continue
+            build_id = _file_build_id(filepath)
+            for summary in summaries:
+                summary["build_id"] = build_id
             jobs.extend(summaries)
 
-        if not jobs:
+        if detected is None and not jobs:
             print("No valid job reports found", file=sys.stderr)
             sys.exit(1)
 
-        result = build_release_json(release, jobs, timestamp)
+        result = build_release_json(release, jobs, timestamp, detected=detected)
         jobs_dir = os.path.join(workdir, "jobs")
         os.makedirs(jobs_dir, exist_ok=True)
         output_path = os.path.join(jobs_dir, f"release-{release}-summary.json")
@@ -233,27 +399,29 @@ def main():
         print(json.dumps(result, indent=2))
 
     elif mode == "prs":
+        detected_prs = load_detected_pr_jobs(workdir)
         files = find_pr_job_files(workdir)
         if not files:
             print("No PR job files found", file=sys.stderr)
-            result = build_pr_json({}, timestamp)
         else:
             print(f"Found {len(files)} PR job files", file=sys.stderr)
-            pr_jobs = {}
-            for filepath in files:
-                summaries = parse_structured_summary(filepath)
-                if not summaries:
-                    print(f"  WARNING: no STRUCTURED SUMMARY in {os.path.basename(filepath)}", file=sys.stderr)
-                    continue
-                for summary in summaries:
-                    summary["pr_title"] = ""
-                    summary["pr_url"] = ""
-
-                m = re.search(r"-pr(\d+)-", os.path.basename(filepath))
-                pr_number = int(m.group(1)) if m else 0
-                pr_jobs.setdefault(pr_number, []).extend(summaries)
-
-            result = build_pr_json(pr_jobs, timestamp)
+        pr_jobs = {}
+        for filepath in files:
+            summaries = parse_structured_summary(filepath)
+            if not summaries:
+                print(f"  WARNING: no STRUCTURED SUMMARY in {os.path.basename(filepath)}", file=sys.stderr)
+                continue
+            build_id = _file_build_id(filepath)
+            for summary in summaries:
+                summary["pr_title"] = ""
+                summary["pr_url"] = ""
+                summary["build_id"] = build_id
+
+            m = re.search(r"-pr(\d+)-", os.path.basename(filepath))
+            pr_number = int(m.group(1)) if m else 0
+            pr_jobs.setdefault(pr_number, []).extend(summaries)
+
+        result = build_pr_json(pr_jobs, timestamp, detected_prs)
 
         jobs_dir = os.path.join(workdir, "jobs")
         os.makedirs(jobs_dir, exist_ok=True)
diff --git a/plugins/tests/test_aggregate.py b/plugins/tests/test_aggregate.py
new file mode 100644
index 00000000..5ce39e27
--- /dev/null
+++ b/plugins/tests/test_aggregate.py
@@ -0,0 +1,209 @@
+"""Tests for the deterministic overlay in plugins/shared/scripts/aggregate.py.
+
+Aggregation must start from the detected-job list written by the prepare
+phase (jobs/release-<v>-jobs.json, jobs/prs-jobs.json) so that a detected
+failed job can never silently vanish from the summary when its analysis
+report is missing or unparseable.
+
+Run: python3 -m unittest discover -s plugins/tests -v
+"""
+
+import json
+import subprocess
+import sys
+import tempfile
+import unittest
+from pathlib import Path
+
+SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "shared" / "scripts"
+
+
+def _report(job_name, job_url, release="4.99", severity=3):
+    entry = {
+        "severity": severity,
+        "stack_layer": "test",
+        "step_name": "e2e-test",
+        "error_signature": f"failure in {job_name}",
+        "root_cause": "something broke",
+        "raw_error": "boom",
+        "infrastructure_failure": False,
+        "remediation": "fix it",
+        "confidence": "high",
+        "job_name": job_name,
+        "job_url": job_url,
+        "release": release,
+        "finished": "2026-07-01",
+        "analysis_gaps": [],
+        "scenarios": [],
+        "causal_chain": [],
+    }
+    return (
+        "Analysis prose.\n\n--- STRUCTURED SUMMARY ---\n"
+        + json.dumps([entry])
+        + "\n--- END STRUCTURED SUMMARY ---\n"
+    )
+
+
+def _detected(job, url, build_id, **extra):
+    return {"job": job, "url": url, "build_id": build_id,
+            "artifacts_dir": f"/x/{build_id}", **extra}
+
+
+def _run(workdir, *args):
+    return subprocess.run(
+        [sys.executable, str(SCRIPTS_DIR / "aggregate.py"),
+         *args, "--workdir", str(workdir)],
+        capture_output=True, text=True)
+
+
+class AggregateOverlayTest(unittest.TestCase):
+
+    def setUp(self):
+        self._tmp = tempfile.TemporaryDirectory()
+        self.addCleanup(self._tmp.cleanup)
+        self.workdir = Path(self._tmp.name)
+        self.jobs_dir = self.workdir / "jobs"
+        self.jobs_dir.mkdir()
+
+    def _release_summary(self):
+        return json.loads(
+            (self.jobs_dir / "release-4.99-summary.json").read_text())
+
+    def test_release_missing_analyses_become_placeholders(self):
+        detected = [
+            _detected("j-analyzed", "https://prow/111", "111"),
+            _detected("j-lost", "https://prow/222", "222"),
+            _detected("j-dl-failed", "https://prow/333", "333",
+                      artifacts_dir="",
+                      download_error="artifact download failed"),
+        ]
+        (self.jobs_dir / "release-4.99-jobs.json").write_text(
+            json.dumps(detected))
+        (self.jobs_dir / "release-4.99-job-1-111.txt").write_text(
+            _report("j-analyzed", "https://prow/111"))
+
+        result = _run(self.workdir, "--release", "4.99")
+        self.assertEqual(result.returncode, 0, result.stderr)
+        summary = self._release_summary()
+
+        self.assertEqual(summary["total_failed"], 3)
+        self.assertEqual(len(summary["issues"]), 1)
+        self.assertEqual(
+            summary["issues"][0]["affected_jobs"][0]["build_id"], "111")
+
+        reasons = {m["build_id"]: m["reason"]
+                   for m in summary["missing_analysis"]}
+        self.assertEqual(reasons, {
+            "222": "analysis report missing or unparseable",
+            "333": "artifact download failed",
+        })
+
+    def test_release_placeholders_have_no_fingerprint(self):
+        # Downstream tooling keys on issue fingerprints; placeholders must
+        # never grow one.
+        (self.jobs_dir / "release-4.99-jobs.json").write_text(
+            json.dumps([_detected("j-lost", "https://prow/222", "222")]))
+
+        result = _run(self.workdir, "--release", "4.99")
+        self.assertEqual(result.returncode, 0, result.stderr)
+        for placeholder in self._release_summary()["missing_analysis"]:
+            self.assertNotIn("fingerprint", placeholder)
+            self.assertNotIn("severity", placeholder)
+
+    def test_release_empty_detected_list_writes_zero_summary(self):
+        (self.jobs_dir / "release-4.99-jobs.json").write_text("[]")
+
+        result = _run(self.workdir, "--release", "4.99")
+        self.assertEqual(result.returncode, 0, result.stderr)
+        summary = self._release_summary()
+        self.assertEqual(summary["total_failed"], 0)
+        self.assertEqual(summary["issues"], [])
+        self.assertEqual(summary["missing_analysis"], [])
+
+    def test_release_zero_parsed_reports_still_succeeds(self):
+        (self.jobs_dir / "release-4.99-jobs.json").write_text(
+            json.dumps([_detected("j-a", "https://prow/111", "111"),
+                        _detected("j-b", "https://prow/222", "222")]))
+        (self.jobs_dir / "release-4.99-job-1-111.txt").write_text(
+            "no summary block")
+
+        result = _run(self.workdir, "--release", "4.99")
+        self.assertEqual(result.returncode, 0, result.stderr)
+        summary = self._release_summary()
+        self.assertEqual(summary["total_failed"], 2)
+        self.assertEqual(summary["issues"], [])
+        self.assertEqual(len(summary["missing_analysis"]), 2)
+
+    def test_release_collection_error_writes_no_summary(self):
+        # create-report.py renders release-<v>-error.txt only when the
+        # summary file is absent; writing one would mask the collection
+        # error as "0 failures".
+        (self.jobs_dir / "release-4.99-jobs.json").write_text("[]")
+        (self.jobs_dir / "release-4.99-error.txt").write_text(
+            "collection blew up")
+
+        result = _run(self.workdir, "--release", "4.99")
+        self.assertEqual(result.returncode, 0, result.stderr)
+        self.assertFalse(
+            (self.jobs_dir / "release-4.99-summary.json").exists())
+
+    def test_release_without_detected_list_keeps_legacy_behavior(self):
+        (self.jobs_dir / "release-4.99-job-1-111.txt").write_text(
+            _report("j-analyzed", "https://prow/111"))
+
+        result = _run(self.workdir, "--release", "4.99")
+        self.assertEqual(result.returncode, 0, result.stderr)
+        summary = self._release_summary()
+        self.assertEqual(summary["total_failed"], 1)
+        self.assertEqual(summary["missing_analysis"], [])
+
+    def test_release_no_inputs_at_all_errors(self):
+        result = _run(self.workdir, "--release", "4.99")
+        self.assertEqual(result.returncode, 1)
+
+    def test_pr_mode_overlay(self):
+        detected = [
+            _detected("j-pr42-failed", "https://prow/444", "444",
+                      pr_number=42, status="FAILURE"),
+            _detected("j-pr42-passed", "https://prow/555", "555",
+                      pr_number=42, status="SUCCESS"),
+            _detected("j-pr43-failed", "https://prow/666", "666",
+                      pr_number=43, status="FAILURE"),
+        ]
+        (self.jobs_dir / "prs-jobs.json").write_text(json.dumps(detected))
+        (self.jobs_dir / "prs-job-1-pr42-444.txt").write_text(
+            _report("j-pr42-failed", "https://prow/444"))
+
+        result = _run(self.workdir, "--prs")
+        self.assertEqual(result.returncode, 0, result.stderr)
+        summary = json.loads((self.jobs_dir / "prs-summary.json").read_text())
+
+        # Only FAILURE entries count as detected.
+        self.assertEqual(summary["total_failed"], 2)
+        prs = {pr["number"]: pr for pr in summary["prs"]}
+
+        self.assertEqual(prs[42]["failed"], 1)
+        self.assertEqual(prs[42]["missing_analysis"], [])
+        self.assertEqual(
+            prs[42]["issues"][0]["affected_jobs"][0]["build_id"], "444")
+
+        # PR 43 had no parsed report at all but must still appear.
+        self.assertEqual(prs[43]["failed"], 1)
+        self.assertEqual(prs[43]["issues"], [])
+        self.assertEqual(prs[43]["missing_analysis"][0]["build_id"], "666")
+        self.assertEqual(prs[43]["missing_analysis"][0]["reason"],
+                         "analysis report missing or unparseable")
+
+    def test_pr_mode_without_detected_list_keeps_legacy_behavior(self):
+        (self.jobs_dir / "prs-job-1-pr42-444.txt").write_text(
+            _report("j-pr42-failed", "https://prow/444"))
+
+        result = _run(self.workdir, "--prs")
+        self.assertEqual(result.returncode, 0, result.stderr)
+        summary = json.loads((self.jobs_dir / "prs-summary.json").read_text())
+        self.assertEqual(summary["total_failed"], 1)
+        self.assertEqual(summary["prs"][0]["failed"], 1)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 53400e2962c8917cfc6d895385b38abf1a57ec54 Mon Sep 17 00:00:00 2001
From: Patryk Matuszak <pmatusza@redhat.com>
Date: Fri, 3 Jul 2026 17:51:44 +0200
Subject: [PATCH 3/4] ci-doctor: run PR aggregation whenever prs-jobs.json has
 entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The finalize step only ran aggregate.py --prs when at least one
prs-job-*.txt file existed, so if every PR analysis went missing no PR
summary was written at all and the detected failures vanished from the
report. Gate on detected PR jobs too — aggregation now emits an
all-placeholder summary in that case.
---
 plugins/shared/scripts/doctor.sh | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/plugins/shared/scripts/doctor.sh b/plugins/shared/scripts/doctor.sh
index 569c7302..b20a5cbb 100755
--- a/plugins/shared/scripts/doctor.sh
+++ b/plugins/shared/scripts/doctor.sh
@@ -338,10 +338,15 @@ cmd_finalize() {
             echo "  WARNING: aggregation failed for ${release}" >&2
     done
 
-    # Aggregate PRs (if job files exist)
-    local pr_files
+    # Aggregate PRs when per-job report files exist OR failed PR jobs were
+    # detected — even if every analysis went missing, the summary must list
+    # the detected jobs as unanalyzed rather than not exist at all.
+    local pr_files pr_detected=""
     pr_files=$(find "${WORKDIR}/jobs" -name 'prs-job-*.txt' 2>/dev/null | head -1)
-    if [[ -n "${pr_files}" ]]; then
+    if jq -e 'length > 0' "${WORKDIR}/jobs/prs-jobs.json" >/dev/null 2>&1; then
+        pr_detected="yes"
+    fi
+    if [[ -n "${pr_files}" ]] || [[ -n "${pr_detected}" ]]; then
         echo "=== Aggregating PRs ===" >&2
         python3 "${SCRIPT_DIR}/aggregate.py" \
             --prs --workdir "${WORKDIR}" >/dev/null || \

From 3a8ca11294bbf88f22bc786126d297acd6825f03 Mon Sep 17 00:00:00 2001
From: Patryk Matuszak <pmatusza@redhat.com>
Date: Fri, 3 Jul 2026 17:51:43 +0200
Subject: [PATCH 4/4] ci-doctor: render detected failed jobs with links and
 not-analyzed markers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each release and PR section now lists every detected failed job:
analyzed jobs link to their issue card's anchor, and jobs from
missing_analysis carry a badge-nodata "not analyzed" marker with the
reason — so a job that slipped through the analysis pipeline is visible
in the report instead of silently absent. The breakdown row gains a
"Not analyzed" count (deliberately without a bd-* class so the
today-filter JS recompute ignores it), and the rdata-None fallback copy
now points at aggregation errors, the only thing it can mean with the
overlay in place. Old summaries without the new fields render unchanged.
---
 plugins/shared/scripts/create-report.py | 62 ++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 2 deletions(-)

diff --git a/plugins/shared/scripts/create-report.py b/plugins/shared/scripts/create-report.py
index 4c0a330b..e97fa959 100755
--- a/plugins/shared/scripts/create-report.py
+++ b/plugins/shared/scripts/create-report.py
@@ -1203,6 +1203,50 @@ def _render_bug_links(bug_match):
 # HTML rendering
 # ---------------------------------------------------------------------------
 
+def _render_detected_jobs(anchor_prefix, data):
+    """Render the complete detected failed-jobs list for a release/PR section.
+
+    Every detected failed job appears exactly once: analyzed jobs link to
+    their issue's anchor; jobs from missing_analysis (detected by prepare
+    but never analyzed) are marked with a not-analyzed badge so silent
+    losses in the pipeline are visible in the report.
+    """
+    issues = data.get("issues") or []
+    missing = data.get("missing_analysis") or []
+
+    seen = set()
+    rows = []
+    for issue in issues:
+        anchor = f'{anchor_prefix}-{issue["number"]}'
+        for job in issue.get("affected_jobs", []):
+            key = job.get("build_id") or job.get("url") or job.get("name")
+            if key in seen:
+                continue
+            seen.add(key)
+            date = f'<span class="job-date">[{_e(job["date"][:10])}]</span> ' if job.get("date") else ""
+            name = _e(job.get("name", ""))
+            link = f'<a href="{_e(job["url"])}" target="_blank">{name}</a>' if job.get("url") else name
+            rows.append(f'                    <li>{date}{link} &mdash; '
+                        f'<a href="#{anchor}">Issue {issue["number"]}</a></li>')
+    for job in missing:
+        date = f'<span class="job-date">[{_e(job["finished"][:10])}]</span> ' if job.get("finished") else ""
+        name = _e(job.get("job_name", ""))
+        link = f'<a href="{_e(job["job_url"])}" target="_blank">{name}</a>' if job.get("job_url") else name
+        reason = _e(job.get("reason", "analysis missing"))
+        rows.append(f'                    <li>{date}{link} '
+                    f'<span class="badge badge-nodata">not analyzed &mdash; {reason}</span></li>')
+
+    if not rows:
+        return []
+    total = data.get("total_failed", len(rows))
+    return (
+        [f'            <p><strong>Detected failed jobs ({total})</strong></p>',
+         '                <ul class="detected-jobs">']
+        + rows
+        + ['                </ul>']
+    )
+
+
 def render_release_section(version, rdata, bug_candidates, index_info=None):
     if rdata is None:
         return (
@@ -1211,7 +1255,7 @@ def render_release_section(version, rdata, bug_candidates, index_info=None):
             f'                <h2>Release {_e(version)}</h2>\n'
             '                <span class="badge badge-nodata">no data</span>\n'
             '            </div>\n'
-            "            <p>Analysis failed to produce results.</p>\n"
+            "            <p>Analysis failed to produce results (aggregation error &mdash; check finalize logs).</p>\n"
             "        </div>"
         )
 
@@ -1247,8 +1291,14 @@ def render_release_section(version, rdata, bug_candidates, index_info=None):
     lines.append(f'                <span class="breakdown-item"><strong class="bd-build">{b["build"]}</strong> Build</span>')
     lines.append(f'                <span class="breakdown-item"><strong class="bd-test">{b["test"]}</strong> Test</span>')
     lines.append(f'                <span class="breakdown-item"><strong class="bd-infra">{b["infrastructure"]}</strong> Infrastructure</span>')
+    missing = rdata.get("missing_analysis") or []
+    if missing:
+        # No bd-* class: the today-filter JS recomputes only bd-* counts.
+        lines.append(f'                <span class="breakdown-item"><strong>{len(missing)}</strong> Not analyzed</span>')
     lines.append("            </div>")
 
+    lines.extend(_render_detected_jobs(f'release-{_e(version)}', rdata))
+
     lines.append('            <table class="issues-table">')
     for issue in rdata["issues"]:
         bug_match = match_issue_to_bugs(issue["title"], bug_candidates)
@@ -1406,12 +1456,18 @@ def render_pr_section(pr_data, bug_candidates, pr_status, pr_error=None):
         lines.append(f'                <span class="breakdown-item"><strong>{b.get("build", 0)}</strong> Build</span>')
         lines.append(f'                <span class="breakdown-item"><strong>{b.get("test", 0)}</strong> Test</span>')
         lines.append(f'                <span class="breakdown-item"><strong>{b.get("infrastructure", 0)}</strong> Infrastructure</span>')
+        pr_missing = (analysis.get("missing_analysis") or []) if analysis else []
+        if pr_missing:
+            lines.append(f'                <span class="breakdown-item"><strong>{len(pr_missing)}</strong> Not analyzed</span>')
         if pr["passed"]:
             lines.append(f'                <span class="breakdown-item"><strong>{pr["passed"]}</strong> Passed</span>')
         if pending:
             lines.append(f'                <span class="breakdown-item"><strong>{pending}</strong> Running</span>')
         lines.append("            </div>")
 
+        if analysis:
+            lines.extend(_render_detected_jobs(f'pr-{pr["number"]}', analysis))
+
         if analysis and analysis.get("issues"):
 
             lines.append('            <table class="issues-table">')
@@ -1696,7 +1752,9 @@ def main():
                     "collection_error": entry["error"],
                 }
             else:
-                # Distinguish "no failures" from "analysis failed" by checking the jobs file
+                # Aggregation itself failed (it writes a summary even when
+                # analyses are missing). Distinguish "no failures" from
+                # "aggregation crashed" by checking the detected jobs file.
                 jobs = load_json(entry["jobs"])
                 if jobs is not None and len(jobs) == 0:
                     rdata = {