openshift-eng · ggiguash · Jun 13, 2026 · Jun 13, 2026 · Jun 13, 2026 · Jun 13, 2026
diff --git a/plugins/lvms-ci/scripts/doctor-analyze.js b/plugins/lvms-ci/scripts/doctor-analyze.js
@@ -0,0 +1 @@
+../../shared/scripts/doctor-analyze.js
diff --git a/plugins/lvms-ci/skills/doctor/SKILL.md b/plugins/lvms-ci/skills/doctor/SKILL.md
@@ -3,7 +3,7 @@ name: lvms-ci:doctor
 argument-hint: [release1,release2,...]
 description: Analyze CI for LVMS periodic jobs and produce an HTML summary
 user-invocable: true
-allowed-tools: Skill, Bash, Read, Write, Glob, Grep, Agent
+allowed-tools: Skill, Bash, Read, Write, Glob, Grep, Workflow
 ---
 
 # lvms-ci:doctor
@@ -33,6 +33,18 @@ Compute once at the start by running `date +%y%m%d` and substituting into the pa
 
 ## Implementation Steps
 
+### Step 0: Verify Workflow Tool Availability
+
+**CRITICAL — hard requirement**: This skill requires the `Workflow` tool. Before proceeding, confirm that `Workflow` is listed in your available tools. If it is NOT available, STOP IMMEDIATELY and report this error to the user:
+
+```text
+ERROR: The Workflow tool is not available in this Claude Code version.
+The lvms-ci:doctor skill requires Workflow support (Claude Code 2.2+).
+Please upgrade Claude Code and retry.
+```
+
+Do NOT attempt to work around the missing tool by spawning individual agents, using sequential analysis, or any other fallback. The skill cannot produce correct results without deterministic workflow orchestration.
+
 ### Step 1: Prepare — Collect and Download All Artifacts
 
 **Goal**: Deterministically collect all failed jobs and download their artifacts before any LLM analysis.
@@ -65,26 +77,35 @@ Compute once at the start by running `date +%y%m%d` and substituting into the pa
 - If a release has no failed jobs, its jobs JSON will be an empty array — skip analysis for that release
 - If a release has an `"error"` field in the JSON summary, data collection failed for that release — report the error to the user but continue with other releases
 
-### Step 2: Analyze Each Job Using /lvms-ci:prow-job
+### Step 2: Analyze via Workflow
 
-**Goal**: Get detailed root cause analysis for each failed job using pre-downloaded artifacts.
+**Goal**: Run per-job analysis in parallel, orchestrated deterministically by a workflow script.
 
 **Actions**:
 
-1. Use the JSON summary output from Step 1 to build agent prompts. Do NOT read the job JSON files into the main conversation — the prepare script already printed all job details (artifacts_dir, build_id, job name) and agents receive artifacts_dir directly in their prompt.
-2. For **every** failed job across all releases, launch a separate **Agent** (using the `Agent` tool, NOT the `Skill` tool):
+1. Read `<WORKDIR>/workflows/analyze-jobs.json` (written by the prepare script in Step 1). If the array is empty, skip to Step 3.
+2. Invoke the Workflow tool:
 
    ```text
-   Agent: subagent_type=general_purpose, prompt="Analyze this Prow job and save the report:
-   1. Run /lvms-ci:prow-job <ARTIFACTS_DIR>
-   2. After the analysis completes, save the FULL report output (including the --- STRUCTURED SUMMARY --- block) to:
-      <WORKDIR>/jobs/release-<RELEASE>-job-<N>-<JOB_ID>.txt
-      Use the Write tool to save the file. The file must contain the complete analysis report."
+   Workflow(
+     scriptPath: "plugins/lvms-ci/scripts/doctor-analyze.js",
+     args: {
+       jobs: <contents of analyze-jobs.json>,
+       prow_job_skill: "/lvms-ci:prow-job"
+     }
+   )
    ```
 
-3. Launch **ALL** agents in a **single message** as **foreground** agents (do NOT use `run_in_background`). Foreground agents in the same message run concurrently — this is just as fast as background agents but keeps your turn active until all complete.
-4. Say "Analyzing N jobs in parallel..." in your message text alongside the Agent tool calls.
-5. When all agents return, immediately proceed to Step 3 in the same turn. Do NOT stop or end your turn between Step 2 and Step 3.
+3. The workflow runs in the background and sends a completion notification when done. Wait for the notification, then proceed to Step 3. Do NOT stop or end your turn between Step 2 and Step 3.
+
+**CRITICAL — no fallback**: If the Workflow tool call fails for any reason (script error, timeout, API error), STOP and report the error to the user. Do NOT fall back to sequential Agent-based spawning — sequential execution risks hitting the turn timeout.
+
+The workflow runs all job analyses in parallel — each agent runs the prow-job skill and saves the report.
+
+**Error Handling**:
+
+- If individual analysis agents fail, the workflow continues with the remaining jobs
+- The workflow returns `{analyzed, failed, total}` — use these for the summary
 
 ### Step 3: Finalize — Aggregate and Generate HTML Report
 
@@ -145,13 +166,13 @@ HTML report generated: <WORKDIR>/report-lvm-operator-ci-doctor.html
 
 ## Related Skills
 
-- **lvms-ci:prow-job**: Single job analysis (used by Step 2 agents)
+- **lvms-ci:prow-job**: Single job analysis (used by Step 2 workflow agents)
 
 ## Notes
 
 - **Deterministic scripts** handle: data collection, artifact download, aggregation, HTML generation
-- **LLM agents** handle: per-job root cause analysis (Step 2)
-- All agents are launched in a single parallel wave
+- **LLM agents** handle: per-job root cause analysis (via Step 2 workflow)
+- Step 2 uses a Workflow script (`doctor-analyze.js`) that guarantees parallel agent execution via `parallel()`
 - The `prepare` script downloads all artifacts upfront so prow-job agents use local paths (no redundant downloads)
 - The `finalize` script runs aggregation and HTML generation in one call
 - All intermediate files use prescribed filenames in `<WORKDIR>` — no improvised names

diff --git a/plugins/microshift-ci/scripts/doctor-analyze.js b/plugins/microshift-ci/scripts/doctor-analyze.js
@@ -0,0 +1 @@
+../../shared/scripts/doctor-analyze.js
diff --git a/plugins/microshift-ci/skills/doctor/SKILL.md b/plugins/microshift-ci/skills/doctor/SKILL.md
@@ -3,7 +3,7 @@ name: microshift-ci:doctor
 argument-hint: <release1,release2,...>
 description: Analyze CI for multiple MicroShift releases and produce an HTML summary
 user-invocable: true
-allowed-tools: Skill, Bash, Read, Write, Glob, Grep, Agent
+allowed-tools: Skill, Bash, Read, Write, Glob, Grep, Agent, Workflow
 ---
 
 # microshift-ci:doctor
@@ -32,6 +32,18 @@ Compute once at the start by running `date +%y%m%d` and substituting into the pa
 
 ## Implementation Steps
 
+### Step 0: Verify Workflow Tool Availability
+
+**CRITICAL — hard requirement**: This skill requires the `Workflow` tool. Before proceeding, confirm that `Workflow` is listed in your available tools. If it is NOT available, STOP IMMEDIATELY and report this error to the user:
+
+```text
+ERROR: The Workflow tool is not available in this Claude Code version.
+The microshift-ci:doctor skill requires Workflow support (Claude Code 2.2+).
+Please upgrade Claude Code and retry.
+```
+
+Do NOT attempt to work around the missing tool by spawning individual agents, using sequential analysis, or any other fallback. The skill cannot produce correct results without deterministic workflow orchestration.
+
 ### Step 1: Prepare — Collect and Download All Artifacts
 
 **Goal**: Deterministically collect all failed jobs and download their artifacts before any LLM analysis.
@@ -85,38 +97,35 @@ Compute once at the start by running `date +%y%m%d` and substituting into the pa
    - `4_disk_usage.png` — Disk usage by partition (% fill)
 3. If prerequisites are missing (`pcp2json`, `matplotlib`), the script errors and stops.
 
-### Step 2: Analyze Each Job Using /microshift-ci:prow-job
+### Step 2: Analyze via Workflow
 
-**Goal**: Get detailed root cause analysis for each failed job using pre-downloaded artifacts.
+**Goal**: Run per-job analysis in parallel, orchestrated deterministically by a workflow script.
 
 **Actions**:
 
-1. Use the JSON summary output from Step 1 to build agent prompts. Do NOT read the job JSON files into the main conversation — the prepare script already printed all job details (artifacts_dir, build_id, job name) and agents receive artifacts_dir directly in their prompt.
-2. For **every** failed job across all releases and PRs, launch a separate **Agent** (using the `Agent` tool, NOT the `Skill` tool). For PR jobs, only launch agents for jobs with FAILURE status.
-
-   **For release jobs:**
+1. Read `<WORKDIR>/workflows/analyze-jobs.json` (written by the prepare script in Step 1). If the array is empty, skip to Step 3.
+2. Invoke the Workflow tool:
 
    ```text
-   Agent: subagent_type=general_purpose, prompt="Analyze this Prow job and save the report:
-   1. Run /microshift-ci:prow-job <ARTIFACTS_DIR>
-   2. After the analysis completes, save the FULL report output (including the --- STRUCTURED SUMMARY --- block) to:
-      <WORKDIR>/jobs/release-<RELEASE>-job-<N>-<JOB_ID>.txt
-      Use the Write tool to save the file. The file must contain the complete analysis report."
+   Workflow(
+     scriptPath: "plugins/microshift-ci/scripts/doctor-analyze.js",
+     args: {
+       jobs: <contents of analyze-jobs.json>,
+       prow_job_skill: "/microshift-ci:prow-job"
+     }
+   )
    ```
 
-   **For PR jobs:**
+3. The workflow runs in the background and sends a completion notification when done. Wait for the notification, then proceed to Step 3. Do NOT stop or end your turn between Step 2 and Step 3.
 
-   ```text
-   Agent: subagent_type=general_purpose, prompt="Analyze this Prow job and save the report:
-   1. Run /microshift-ci:prow-job <ARTIFACTS_DIR>
-   2. After the analysis completes, save the FULL report output (including the --- STRUCTURED SUMMARY --- block) to:
-      <WORKDIR>/jobs/prs-job-<N>-pr<PR>-<JOB_NAME_SUFFIX>.txt
-      Use the Write tool to save the file. The file must contain the complete analysis report."
-   ```
+**CRITICAL — no fallback**: If the Workflow tool call fails for any reason (script error, timeout, API error), STOP and report the error to the user. Do NOT fall back to sequential Agent-based spawning — sequential execution risks hitting the turn timeout.
+
+The workflow runs all job analyses in parallel — each agent runs the prow-job skill and saves the report.
+
+**Error Handling**:
 
-3. Launch **ALL** agents (all releases + PRs) in a **single message** as **foreground** agents (do NOT use `run_in_background`). Foreground agents in the same message run concurrently — this is just as fast as background agents but keeps your turn active until all complete.
-4. Say "Analyzing N jobs in parallel..." in your message text alongside the Agent tool calls.
-5. When all agents return, immediately proceed to Step 3 in the same turn. Do NOT stop or end your turn between Step 2 and Step 3.
+- If individual analysis agents fail, the workflow continues with the remaining jobs
+- The workflow returns `{analyzed, failed, total}` — use these for the summary
 
 ### Step 3: Run Bug Correlation (Dry-Run)
 
@@ -214,16 +223,16 @@ HTML report generated: <WORKDIR>/report-microshift-ci-doctor.html
 
 ## Related Skills
 
-- **microshift-ci:prow-job**: Single job analysis (used by Step 2 agents)
+- **microshift-ci:prow-job**: Single job analysis (used by Step 2 workflow agents)
 - **microshift-ci:create-bugs**: Bug correlation and creation (used in Step 3; can also be run with `--create` after this command)
 - **microshift-ci:doctor-refresh**: Regenerate the HTML report from existing data (e.g., after `/microshift-ci:create-bugs --create`)
 
 ## Notes
 
 - **Deterministic scripts** handle: data collection, artifact download, aggregation, HTML generation
-- **LLM agents** handle: per-job root cause analysis (Step 2), Jira bug search and open bugs query (Step 3)
+- **LLM agents** handle: per-job root cause analysis (Step 2 workflow), Jira bug search and open bugs query (Step 3)
 - `/microshift-ci:doctor-refresh` regenerates the HTML report from existing data. Use it after `/microshift-ci:create-bugs --create` to include newly created bugs
-- Step 2 agents (per-job analysis) are launched in a single parallel wave
+- Step 2 uses a Workflow script (`doctor-analyze.js`) that guarantees parallel agent execution via `parallel()`
 - Step 3 uses a single create-bugs agent with all sources (releases + rebase) comma-separated
 - The `prepare` script downloads all artifacts upfront so prow-job agents use local paths (no redundant downloads)
 - The `finalize` script runs aggregation and HTML generation in one call

diff --git a/plugins/shared/scripts/doctor-analyze.js b/plugins/shared/scripts/doctor-analyze.js
@@ -0,0 +1,56 @@
+export const meta = {
+  name: 'doctor-analyze',
+  description: 'Analyze CI jobs in parallel via per-job prow-job skill invocations',
+  phases: [
+    { title: 'Analyze', detail: 'Per-job root cause analysis' },
+  ],
+}
+
+// args: {
+//   jobs: [{ artifacts_dir: string, output_path: string, label: string }],
+//   prow_job_skill: string,      // e.g. "/lvms-ci:prow-job" or "/microshift-ci:prow-job"
+// }
+
+// Defend against the model passing args as a JSON string instead of an object
+const a = typeof args === 'string' ? JSON.parse(args) : args
+
+if (!a || !Array.isArray(a.jobs)) {
+  log('ERROR: args.jobs is missing or not an array')
+  return { analyzed: 0, failed: 0, total: 0, error: 'args.jobs is missing or not an array' }
+}
+if (!a.prow_job_skill) {
+  log('ERROR: args.prow_job_skill is missing')
+  return { analyzed: 0, failed: 0, total: 0, error: 'args.prow_job_skill is missing' }
+}
+
+phase('Analyze')
+log('Analyzing ' + a.jobs.length + ' jobs in parallel...')
+
+const results = await parallel(a.jobs.map(function(job) {
+  return function() {
+    return agent(
+      'Analyze this Prow job and save the report:\n' +
+      '1. Run ' + a.prow_job_skill + ' ' + job.artifacts_dir + '\n' +
+      '2. After the analysis completes, save the FULL report output' +
+      ' (including the --- STRUCTURED SUMMARY --- block) to:\n' +
+      '   ' + job.output_path + '\n' +
+      '   Use the Write tool to save the file.' +
+      ' The file must contain the complete analysis report.',
+      { label: job.label, phase: 'Analyze' }
+    )
+  }
+}))
+
+const analyzed = results.filter(function(r) { return r != null }).length
+const failed = results.length - analyzed
+if (failed > 0) {
+  log('Analysis complete: ' + analyzed + '/' + results.length + ' jobs analyzed, ' + failed + ' failed')
+} else {
+  log('Analysis complete: all ' + analyzed + ' jobs analyzed')
+}
+
+return {
+  analyzed: analyzed,
+  failed: failed,
+  total: results.length,
+}
diff --git a/plugins/shared/scripts/doctor.sh b/plugins/shared/scripts/doctor.sh
@@ -224,6 +224,40 @@ cmd_prepare() {
     fi
 
     echo "${result}"
+
+    # Build workflow args: map each job to its output path for parallel analysis
+    mkdir -p "${WORKDIR}/workflows"
+    local workflow_jobs="[]"
+
+    # Release jobs: no status field (all are already failures from the prepare filter)
+    for release in "${RELEASES[@]}"; do
+        release=$(echo "${release}" | xargs)
+        local jobs_file="${WORKDIR}/jobs/release-${release}-jobs.json"
+        [[ -f "${jobs_file}" ]] || continue
+
+        workflow_jobs=$(echo "${workflow_jobs}" | jq --arg r "${release}" --arg w "${WORKDIR}" \
+            --slurpfile jobs "${jobs_file}" \
+            '. + [$jobs[0] | to_entries[] | {
+                artifacts_dir: .value.artifacts_dir,
+                output_path:   ($w + "/jobs/release-" + $r + "-job-" + ((.key+1)|tostring) + "-" + .value.build_id + ".txt"),
+                label:         ($r + "-job-" + ((.key+1)|tostring))
+            }]')
+    done
+
+    # PR jobs: pre-filter to FAILURE only so the workflow analyzes exactly what's needed
+    if ${do_rebase} && [[ -f "${WORKDIR}/jobs/prs-jobs.json" ]]; then
+        workflow_jobs=$(echo "${workflow_jobs}" | jq --arg w "${WORKDIR}" \
+            --slurpfile jobs "${WORKDIR}/jobs/prs-jobs.json" \
+            '. + [$jobs[0] | to_entries[] | select(.value.status == "FAILURE") | {
+                artifacts_dir: .value.artifacts_dir,
+                output_path:   ($w + "/jobs/prs-job-" + ((.key+1)|tostring) + "-pr" + (.value.pr_number|tostring) + "-" + .value.build_id + ".txt"),
+                label:         ("pr" + (.value.pr_number|tostring) + "-job-" + ((.key+1)|tostring))
+            }]')
+    fi
+
+    # Write the workflow args file
+    echo "${workflow_jobs}" > "${WORKDIR}/workflows/analyze-jobs.json"
+    echo "Workflow args written to ${WORKDIR}/workflows/analyze-jobs.json" >&2
 }
 
 # ---------------------------------------------------------------------------