diff --git a/apps/dashboard/smoke/action-packet-smoke.ts b/apps/dashboard/smoke/action-packet-smoke.ts index e1311870..8838ab74 100644 --- a/apps/dashboard/smoke/action-packet-smoke.ts +++ b/apps/dashboard/smoke/action-packet-smoke.ts @@ -87,7 +87,7 @@ const legacyFallbackPacket = buildActionPacket({ gateFallbackDecision: "保持 status inspection;补 project_asset 后再恢复 delivery。", boundary: "This is a legacy/raw fallback; do not infer owner, gate, or stop condition authority.", safePathLabel: "Legacy status inspection", - command: "loopx status --goal-id legacy-status-only", + command: "loopx diagnose --goal-id legacy-status-only --limit 20", projectNextAction: "Continue from raw status field.", projectStopCondition: "Stop before any delivery claim.", projectAssetSource: "legacy_raw_fallback", @@ -111,7 +111,7 @@ const focusWaitPacket = buildActionPacket({ gateFallbackDecision: "继续保持 focus wait;有新 owner evidence、clean baseline 或外部 eval 后再恢复 delivery。", boundary: "这不是 delivery approval;项目 Agent 只做 status/history inspection,不执行交付路径、写入、reward append 或生产动作。", safePathLabel: "Status/history inspection only", - command: "loopx --registry ./examples/registry.example.json --runtime-root ./tmp/runtime status --goal-id focus-wait-owner-blocker", + command: "loopx --registry ./examples/registry.example.json --runtime-root ./tmp/runtime diagnose --goal-id focus-wait-owner-blocker --limit 20", }); assert(focusWaitPacket.includes("目标:focus-wait-owner-blocker"), "missing focus-wait goal id"); @@ -135,7 +135,7 @@ const platformMigrationNoEvidencePacket = buildActionPacket({ boundary: "Use only sanitized status/history/material counts; do not read private evidence, internal links, raw paths, or production state.", durableRecordRule: null, safePathLabel: "No-evidence status/packet sanity", - command: "loopx status --goal-id platform-migration-material-registry --limit 20", + command: "loopx diagnose --goal-id platform-migration-material-registry --limit 20", quotaShortLine: "Eligible; 0/1440 slots", authorityShortLine: "entries 0/3; topics 3; materials 6; repos 2; owner review 1; stale 1; risk medium", projectOwner: "codex", diff --git a/examples/install-local-smoke.py b/examples/install-local-smoke.py index f38ac9ad..99703fda 100644 --- a/examples/install-local-smoke.py +++ b/examples/install-local-smoke.py @@ -178,11 +178,16 @@ def main() -> int: self_repair_text = " ".join(self_repair_skill.read_text(encoding="utf-8").split()) for phrase in ( "Build a compact evidence packet", + "loopx --format json diagnose --goal-id ", + "loopx --format json status --limit 20", + "intentionally a registry/dashboard view rather than a goal-filtered command", + "registry-declared active state file", "references/repair-patterns.md", "Repair at the lowest durable layer", "Do not solve contradictory payloads by guessing", ): assert phrase in self_repair_text, phrase + assert "status --goal-id " not in self_repair_text, self_repair_text self_repair_patterns = ( codex_home / "skills" @@ -192,6 +197,7 @@ def main() -> int: ) self_repair_patterns_text = self_repair_patterns.read_text(encoding="utf-8") assert "`boundary_projection_gap`" in self_repair_patterns_text, self_repair_patterns_text + assert "`skill_cli_contract_drift`" in self_repair_patterns_text, self_repair_patterns_text assert "`tiny_turn_under_delivery`" in self_repair_patterns_text, self_repair_patterns_text assert ( codex_home / "skills" / "loopx-self-repair" / "agents" / "openai.yaml" diff --git a/examples/review-packet-cli-smoke.py b/examples/review-packet-cli-smoke.py index feb92d70..8f6d76ec 100644 --- a/examples/review-packet-cli-smoke.py +++ b/examples/review-packet-cli-smoke.py @@ -735,7 +735,7 @@ def assert_missing_project_asset_review_packet_fallback() -> None: "waiting_on": "codex", "severity": "action", "recommended_action": "Continue only through raw status fallback.", - "agent_command": "loopx status --goal-id legacy-status-only", + "agent_command": "loopx diagnose --goal-id legacy-status-only --limit 20", "source": "latest_run", } ] diff --git a/examples/review-packet-smoke.py b/examples/review-packet-smoke.py index 16a46065..4348a80e 100644 --- a/examples/review-packet-smoke.py +++ b/examples/review-packet-smoke.py @@ -126,7 +126,7 @@ def build_sanitized_focus_wait_packet() -> str: goal_id = "focus-wait-owner-blocker" status_command = ( "loopx --registry ./examples/registry.example.json " - "--runtime-root ./tmp/runtime status --goal-id focus-wait-owner-blocker" + "--runtime-root ./tmp/runtime diagnose --goal-id focus-wait-owner-blocker --limit 20" ) return "\n".join( [ diff --git a/loopx/doctor.py b/loopx/doctor.py index 340f9a72..92e9538c 100644 --- a/loopx/doctor.py +++ b/loopx/doctor.py @@ -39,6 +39,9 @@ ), "loopx-self-repair": ( "Build a compact evidence packet", + "loopx --format json diagnose --goal-id ", + "loopx --format json status --limit 20", + "registry-declared active state file", "references/repair-patterns.md", "Repair at the lowest durable layer", ), diff --git a/skills/loopx-self-repair/SKILL.md b/skills/loopx-self-repair/SKILL.md index 01b51e9e..b5764aec 100644 --- a/skills/loopx-self-repair/SKILL.md +++ b/skills/loopx-self-repair/SKILL.md @@ -16,14 +16,17 @@ not only an apology or a one-off explanation. ```bash git status --short --branch - loopx --format json status --goal-id - loopx --format json quota should-run --goal-id + loopx --format json diagnose --goal-id + loopx --format json status --limit 20 + loopx --format json quota should-run --goal-id [--agent-id ] loopx --format json history --goal-id --limit 5 ``` - Also inspect the project-local registry and - `.codex/goals//ACTIVE_GOAL_STATE.md` when relevant. Use the shared - global registry for heartbeat/quota truth. + `status` is intentionally a registry/dashboard view rather than a + goal-filtered command; use `diagnose --goal-id` for the goal-specific agent + evidence packet. Also inspect the project-local registry and the + registry-declared active state file when relevant. Use the shared global + registry for heartbeat/quota truth. 3. **Classify the failure.** Read `references/repair-patterns.md` and match the symptoms to a known pattern. If no pattern fits, add one after the fix. diff --git a/skills/loopx-self-repair/references/repair-patterns.md b/skills/loopx-self-repair/references/repair-patterns.md index 774684fb..e1be2491 100644 --- a/skills/loopx-self-repair/references/repair-patterns.md +++ b/skills/loopx-self-repair/references/repair-patterns.md @@ -22,6 +22,7 @@ teaches a reusable control-plane lesson. | `status_projection_history_neutral_gap` | `quota should-run` reports no user action or quiet monitor behavior, but `status` / `diagnose --limit N` misreports a stale controller/user gate or older connected-without-run state after recent monitor/spend/readiness events. | quota payload, diagnose/status payload, recent history classifications, neutral-run classification sets, UI `--limit`, latest meaningful state run. | Status/history/quota disagree on which runs are status-neutral, or a short display limit is reused as the control-plane reasoning window. | Share the neutral classification contract across history/status/quota and reason over a wider internal state window before trimming displayed runs; add a regression with consecutive neutral runs before a meaningful state transition. | | `monitor_replan_noop_loop` | Recent runs are monitor-only, replan, or self-repair-adjacent, but the same monitor/action recommendation keeps returning and no runnable todo, blocker, successor, supersede, user gate, capability change, or monitor-target change appears. | quota payload, interaction contract, todo summaries, recent history classifications, latest replan ACK, monitor target, active state todos. | Self-repair/replan was recorded as activity but did not change the machine-visible work frontier. | Require a repair delta contract: classify no-delta repairs as no-ops, then add a blocker, supersede stale monitor work, create a successor runnable todo, or record an explicit watch-lane continuation with expiry. | | `scheduler_liveness_backoff_gap` | `quota should-run` says `should_run=false`, `quiet_noop_allowed=true`, no spend, or `automation_action=keep_active_quiet`, but the host keeps polling at the bootstrap cadence for hours; Codex CLI TUI or Claude Code `/loop` also keeps repeating unchanged checks. | quota payload `automation_liveness`, `interaction_contract`, `heartbeat_recommendation`, host automation cadence, CLI/Claude loop logs or statusline, recent no-spend poll history. | LoopX separated execution permission from user notification, but did not expose a machine-readable next-wakeup/backoff/final-check/exit policy for host runtimes. | Add or repair `quota should-run.scheduler_hint`; host runtimes must apply Codex App cadence backoff and CLI/Claude unchanged-poll final quota/replan check before self-stop, without quota spend. Prompt/docs should treat fixed 3-minute cadence as bootstrap only. | +| `skill_cli_contract_drift` | A skill, installed release snapshot, generated prompt, smoke fixture, or agent handbook suggests a CLI form that the current parser rejects, such as `loopx status --goal-id`. | `loopx --help`, failing command stderr, `rg` over `skills/`, installed skill copies, release snapshot, generated prompts, and relevant smoke fixtures. | Process documentation drifted from the CLI contract, or a fixture preserved old command text without labeling it as legacy/non-executable. | Update the durable skill/source docs and installed skill copy, add or update a focused smoke that rejects the stale command in executable instructions, and keep legacy fixture strings only when the test explicitly asserts they are untrusted historical payload. | | `loop_surface_activation_gap` | A thread reports LoopX setup success because `register-agent` or `quota should-run --agent-id` works, but no Codex App heartbeat automation, Codex CLI `/goal`, or other host loop has been installed for that goal. | `upgrade-plan` / host-loop activation status, `$CODEX_HOME/automations/*/automation.toml`, generated `heartbeat-prompt`, target thread id, recent setup final answer, registry coordination fields. | The control-plane identity layer was treated as the host scheduler layer; setup success criteria stopped at registry/quota instead of proving `host_loop_activation.activated=true` or surfacing a concrete host-tool gate. | Project setup and agent registration must surface `host_loop_activation`; create/update the host loop from generated scoped `heartbeat-prompt`, or report the exact missing host capability. Do not claim "connected" as autonomous until the host loop is active. | | `tiny_turn_under_delivery` | Many heartbeats make only one small doc/prose edit despite quota permitting a coherent batch. | recent history durations, active priority stack, user feedback, todo completion rate. | Agent optimized for avoiding mistakes rather than delivering a bounded batch. | Run steering audit, choose a larger verifiable batch, and record why it remains within boundary. Consider cadence/budget policy updates if repeated. | | `todo_succession_gap` | A non-trivial feature todo is marked done after PR/smoke success, but rollout, product-path audit, docs, telemetry, benchmark proof, or operator-decision follow-up is only mentioned in prose or not recorded at all. | completed todo note/evidence, recent PR/run history, active `Agent Todo`, active `Next Action`, related design docs. | Agent treated an implementation slice as feature completion, or LoopX lacked a reminder to create the successor todo. | Keep the lifecycle simple: do not add many feature states. Complete the old todo and create the next concrete agent/user todo with `--next-agent-todo`/`todo add`, or record a compact no-follow-up rationale. |