diff --git a/AGENTS.md b/AGENTS.md index 67d2449d..89a0dea6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -53,6 +53,21 @@ submission behavior, permission boundaries, or launch new benchmark jobs. After self-merging, sync local `main`, leave unrelated untracked local artifacts alone, and continue with the next safe project batch. +## First-Screen Review Gate + +Treat the first visible screen of public product surfaces as owner-reviewed +presentation, not as ordinary copy. Before committing, pushing, or self-merging +changes that alter the first viewport, hero block, primary CTA, or opening +navigation of README, hosted frontstage, showcase index pages, product home +pages, or similarly prominent public entry points, show the user a preview +first and wait for approval. + +The preview should be concrete enough to judge the presentation: provide the +local URL and, when the surface is visual HTML, a screenshot or browser view of +the first viewport. Do not move the review gate into a PR comment, todo note, or +final summary after the fact. It must happen before the public first-screen +change is finalized. + ## Public And Private Boundary Do not commit internal department, team, customer, meeting, reporting, diff --git a/README.md b/README.md index 8935180d..727daa54 100644 --- a/README.md +++ b/README.md @@ -241,6 +241,23 @@ points: For more cases, open the [showcase catalog](docs/showcases/README.md). For a full presenter material, see the experimental notes below. +### Experimental: Today Value Path + +This is not replacing the first screen. It is an experimental entry point for +users who already understand the control-plane idea and want to pick one useful +LoopX capability today: + +| Capability / path | Expected output | User value metric | +| --- | --- | --- | +| PR review/comment -> fix loop | Branch-ready fix packet with repro, smoke result, and remaining review owner. | Fewer dropped review threads; faster path from comment to validated patch. | +| Overnight PR-sized refactor | Reviewable slice list, validation notes, successor todo, and merge boundary. | More merged commits without turning the next morning into a giant diff audit. | +| P0 blocked -> safe fallback | Kernel projection of the exact user gate, safe fallback todo, quota decision, and evidence boundary inside an active goal. | Less idle agent time while preserving human judgment on the blocked path. | + +Start the goal normally with `/loopx `. The PR review and refactor +paths can be requested directly in plain language; the P0 safe-fallback path is +a repository kernel behavior that appears when an active goal has a concrete +blocking user gate and safe P1/P2 work remains. + ## User Mental Model LoopX has more kernel concepts than a user should have to think about every diff --git a/README.zh-CN.md b/README.zh-CN.md index 494f10e4..c8a07793 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -128,6 +128,21 @@ loopx bootstrap \ 完整案例目录见 [docs/showcases/README.md](docs/showcases/README.md)。 更完整的演示材料放在文末实验性能力里。 +### Experimental: Today Value Path + +这不会替代首屏。它先作为实验性入口放在案例区下面,给已经理解控制面价值的用户 +快速选一个今天就能用的 LoopX 能力: + +| 能力 / 路径 | 预期产物 | 用户价值指标 | +| --- | --- | --- | +| PR review/comment -> fix loop | 可复核的修复包:repro、smoke 结果、剩余 review owner。 | 更少遗漏 review 线程,更快从 comment 走到 validated patch。 | +| Overnight PR-sized refactor | 可 review 的 slice 列表、验证记录、后续 todo、merge 边界。 | 增加可合并 commit,而不是第二天早上面对一个巨型 diff。 | +| P0 blocked -> safe fallback | 在已有 goal 内由 kernel 投影具体 user gate、安全 fallback todo、quota 决策和证据边界。 | 等人决策时减少 agent 空转,同时保留人类判断。 | + +正常用 `/loopx ` 启动目标即可。PR review 和 refactor 路径可以用自然语言 +直接描述;P0 safe fallback 不是单独命令,而是已有 goal 出现具体阻塞 user gate、 +且还有安全 P1/P2 工作时触发的仓库 kernel 行为。 + ## 它是什么 LoopX 不是另一个 agent runtime,也不是要替代 Codex、Claude Code、 diff --git a/apps/dashboard/src/views/frontstage-page.tsx b/apps/dashboard/src/views/frontstage-page.tsx index 69fb4ec7..bc98da84 100644 --- a/apps/dashboard/src/views/frontstage-page.tsx +++ b/apps/dashboard/src/views/frontstage-page.tsx @@ -839,6 +839,67 @@ const showcaseMotionTones = [ }, ]; +const todayValueWorkflows = [ + { + workflow: "PR review/comment -> fix loop", + output: "Branch-ready fix packet with repro, smoke result, and remaining review owner.", + metric: "Fewer dropped review threads; faster path from comment to validated patch.", + start: "/loopx fix this PR feedback", + }, + { + workflow: "Overnight PR-sized refactor", + output: "Reviewable slice list, validation notes, successor todo, and merge boundary.", + metric: "More merged commits without turning the next morning into a giant diff audit.", + start: "/loopx split this refactor into reviewable slices", + }, + { + workflow: "P0 blocked -> safe fallback", + output: "Kernel projection of the exact user gate, safe fallback todo, quota decision, and evidence boundary.", + metric: "Less idle agent time while preserving human judgment on the blocked path.", + start: "Appears inside an active /loopx goal when a concrete P0 gate blocks one lane and safe P1/P2 work remains.", + }, +]; + +function ExperimentalTodayValuePanel() { + return ( + +
+
+
+

Pick one capability that earns value today

+

+ This lower-priority module does not replace the first screen. It gives evaluators three concrete + LoopX capabilities with expected output and user-facing value metrics. +

+
+ experimental +
+
+ {todayValueWorkflows.map((item) => ( +
+
capability
+

{item.workflow}

+
+

+ Output: + {item.output} +

+

+ Value metric: + {item.metric} +

+ + {item.start} + +
+
+ ))} +
+
+
+ ); +} + function ShowcaseMotionBoard() { const [activeCaseId, setActiveCaseId] = useState(frontstageShowcases[0]?.id ?? ""); if (!frontstageShowcases.length) { @@ -1994,6 +2055,8 @@ function FrontstageRoute({ {!isDeveloperMode ? : null} + {!isDeveloperMode ? : null} + {!isDeveloperMode ? : null} {isDeveloperMode ? : null} diff --git a/docs/showcases/cases/0617-blocked-p0-safe-rotation.en.html b/docs/showcases/cases/0617-blocked-p0-safe-rotation.en.html index fa1e3e8e..4b2710e2 100644 --- a/docs/showcases/cases/0617-blocked-p0-safe-rotation.en.html +++ b/docs/showcases/cases/0617-blocked-p0-safe-rotation.en.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -110,15 +120,15 @@

Blocked P0 with safe P1/P2 rotation

01

Case context

-

This case shows what should happen when a P0 route is blocked by a user decision: the system should neither keep forcing that lane nor stop the whole goal. The original shape was a benchmark rotation where one lane needed a large local image while other no-upload benchmark work remained safe.

The public repository does not expose raw benchmark tasks or local image names. It reproduces the control-plane behavior with a synthetic smoke.

+

This case shows what should happen when a P0 route is blocked by a user decision: the system should neither keep forcing that lane nor stop the whole goal. The original shape was a benchmark rotation where one lane needed a large local image while other no-upload benchmark work remained safe.

The public repository does not expose raw benchmark tasks or local image names. It reproduces the control-plane behavior with a synthetic smoke. The user-facing value is seeing one concrete P0 decision while safe fallback work can continue and the gated lane does not burn automated progress budget.

02

Repository evidence

Proof

A user decision should not block all safe work.

LoopX intervention

concrete user todo, safe fallback, quota control

- -
Synthetic fixture

`examples/showcase-0617-blocked-p0-safe-rotation-smoke.py` creates a P0 user gate, a P0 agent todo blocked by that gate, and a P1 no-upload fallback.

Quota contract

The smoke asserts `should_run=True`, `requires_user_action=True`, `safe_bypass_allowed=True`, and `safe_bypass_kind=scoped_user_gate_fallback`.

Selected fallback

The fixture selects `terminal_bench_no_upload` while preserving the `ale_image` gate as the user-visible blocker.

Rendered evidence

The smoke checks markdown for `scoped_user_gate_fallback` and safe no-upload Terminal-Bench rotation.

+
1concrete P0 user decision
1safe fallback lane
0gated-lane auto-progress
0private upload dependency
+
Synthetic fixture

`examples/showcase-0617-blocked-p0-safe-rotation-smoke.py` reproduces a P0 user gate, a P0 agent lane blocked by that gate, and a P1 no-upload fallback.

Quota contract

The smoke pins `should_run=True`, `requires_user_action=True`, `safe_bypass_allowed=True`, `safe_bypass_kind=scoped_user_gate_fallback`, and related fallback evidence.

Selected fallback

The fixture selects `terminal_bench_no_upload` while preserving the `ale_image` gate as the user-visible blocker.

Rendered evidence

The smoke checks markdown for `scoped_user_gate_fallback` and safe no-upload Terminal-Bench rotation.

03

LoopX behavior

  • 1
    The user todo names the concrete P0 decision instead of saying only owner gate.
  • 2
    The agent does not spend compute on the gated lane; it selects fallback work that does not depend on the decision.
  • 3
    State records both the blocker and the fallback reason so P0 can resume later.
diff --git a/docs/showcases/cases/0617-blocked-p0-safe-rotation.html b/docs/showcases/cases/0617-blocked-p0-safe-rotation.html index e5847df3..d664b663 100644 --- a/docs/showcases/cases/0617-blocked-p0-safe-rotation.html +++ b/docs/showcases/cases/0617-blocked-p0-safe-rotation.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -110,15 +120,15 @@

P0 block 后推进 P1/P2

01

案例背景

-

这个案例展示 P0 被用户决策卡住时,系统不应该继续硬跑,也不应该让整个目标停摆。原场景是 benchmark rotation:一个 lane 需要大型本地 image,其他 no-upload benchmark work 仍然安全。

公开仓库没有暴露原始 benchmark task 或本地 image 名,而是用 synthetic smoke 复现控制面行为。

+

这个案例展示 P0 被用户决策卡住时,系统不应该继续硬跑,也不应该让整个目标停摆。原场景是 benchmark rotation:一个 lane 需要大型本地 image,其他 no-upload benchmark work 仍然安全。

公开仓库没有暴露原始 benchmark task 或本地 image 名,而是用 synthetic smoke 复现控制面行为。用户价值是明确看到一个需要决策的 P0,同时安全 fallback 可以继续,且 gated lane 不消耗额外自动推进预算。

02

仓库证据

证明点

被阻塞的 P0 决策不应该阻止安全的 P1/P2 工作继续。

LoopX 介入

concrete user todo、safe fallback、quota control

- -
synthetic fixture

`examples/showcase-0617-blocked-p0-safe-rotation-smoke.py` 构造 P0 user gate、被 gate 阻塞的 P0 agent todo 和 P1 no-upload fallback。

quota contract

smoke 断言 `should_run=True`、`requires_user_action=True`、`safe_bypass_allowed=True`、`safe_bypass_kind=scoped_user_gate_fallback`。

selected fallback

fixture 选择 `terminal_bench_no_upload`,同时保留 `ale_image` gate 的 user-visible blocker。

rendered evidence

smoke 检查 markdown 中包含 `scoped_user_gate_fallback` 和 safe no-upload Terminal-Bench rotation。

+
1具体 P0 用户决策
1safe fallback lane
0gated lane 自动推进
0private upload 依赖
+
synthetic fixture

`examples/showcase-0617-blocked-p0-safe-rotation-smoke.py` 复现 P0 user gate、被 gate 阻塞的 P0 agent lane 和 P1 no-upload fallback。

quota contract

smoke 固定 `should_run=True`、`requires_user_action=True`、`safe_bypass_allowed=True`、`safe_bypass_kind=scoped_user_gate_fallback` 等关键 contract。

selected fallback

fixture 选择 `terminal_bench_no_upload`,同时保留 `ale_image` gate 的 user-visible blocker。

rendered evidence

smoke 检查 markdown 中包含 `scoped_user_gate_fallback` 和 safe no-upload Terminal-Bench rotation。

03

LoopX 行为

  • 1
    用户 todo 具体命名 P0 决策,不用“owner gate”这种空话。
  • 2
    agent 不在 gated lane 上花 compute;只选择不依赖该决策的 fallback。
  • 3
    状态同时记录 blocker 和 fallback reason,方便之后恢复 P0。
diff --git a/docs/showcases/cases/0619-dynamic-workflow-hardware-agent.en.html b/docs/showcases/cases/0619-dynamic-workflow-hardware-agent.en.html index 322e3d35..b382ed48 100644 --- a/docs/showcases/cases/0619-dynamic-workflow-hardware-agent.en.html +++ b/docs/showcases/cases/0619-dynamic-workflow-hardware-agent.en.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -117,7 +127,7 @@

Dynamic workflow for hardware-agent development

Proof

Fuzzy goals, multiple workers, and long unattended runs can still converge.

LoopX intervention

goal state, worker handoff, dynamic workflow

- +
5public-safe hardware workflows
3LoopX/orchestrator/worker role split
1canonical artifact preserved
0proprietary design details exposed
Public artifact

The canonical HTML page includes the approved hardware workflow artifact and five public-safe hardware cases.

Case family

The companion note names closed validation, timing optimization, design-space exploration, Fmax optimization, and convergence to an engineering floor.

Boundary

The public artifact excludes raw chats, screenshots, proprietary design details, private repos, local paths, task ids, credentials, and unpublished hardware artifacts.

03

LoopX behavior

diff --git a/docs/showcases/cases/0619-loopx-self-iteration.en.html b/docs/showcases/cases/0619-loopx-self-iteration.en.html index b9e2b2d9..f6877c9c 100644 --- a/docs/showcases/cases/0619-loopx-self-iteration.en.html +++ b/docs/showcases/cases/0619-loopx-self-iteration.en.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -117,7 +127,7 @@

LoopX self-iteration loop

Proof

A high-churn multi-lane project can keep state, boundaries, and evidence coherent.

LoopX intervention

todo, quota, gate, evidence, review packet, frontstage

-
801public commits
59-92dAI-assisted baseline
3.0-4.7xcalendar compression
+
801public commits
244commits since Jun 18
59-92dAI-assisted baseline
3.0-4.7xcalendar compression
Whole repository

Through the anchor commit: 801 public commits, 570 touched files, 265703 insertions, and 49895 deletions.

Recent window

Since 2026-06-18: 244 public commits, 216 touched files, 52898 insertions, and 20935 deletions.

June 19 signal

On 2026-06-19: 74 public commits, 118 touched files, 16087 insertions, and 1082 deletions.

Efficiency model

The case maps public repo capabilities to 9 requirement clusters and estimates 59-92 AI-coding-assisted developer-days against a 19.6-day public window.

03

LoopX behavior

diff --git a/docs/showcases/cases/0619-loopx-self-iteration.html b/docs/showcases/cases/0619-loopx-self-iteration.html index 7cdf5901..447696ce 100644 --- a/docs/showcases/cases/0619-loopx-self-iteration.html +++ b/docs/showcases/cases/0619-loopx-self-iteration.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -117,7 +127,7 @@

LoopX Meta Agent 自迭代

证明点

高 churn 多 lane agent repo 可以保持状态、证据和边界一致。

LoopX 介入

todo、quota、gate、evidence、review packet、frontstage

-
801public commits
59-92dAI-assisted baseline
3.0-4.7xcalendar compression
+
801public commits
24406-18 后 commits
59-92dAI-assisted baseline
3.0-4.7xcalendar compression
全仓库证据

截至 anchor commit:801 个 public commits、570 个 touched files、265703 行新增、49895 行删除。

近期窗口

2026-06-18 起有 244 个 public commits、216 个 touched files、52898 行新增、20935 行删除。

0619 当日

2026-06-19 有 74 个 public commits、118 个 touched files、16087 行新增、1082 行删除。

效率模型

把公开仓库能力拆成 9 个 requirement clusters,保守估计 59-92 个 AI-coding-assisted developer-days,对 19.6 天 public window 得出方向性 compression。

03

LoopX 行为

diff --git a/docs/showcases/cases/0620-creator-operator-case-spec.en.html b/docs/showcases/cases/0620-creator-operator-case-spec.en.html index 962d7586..40d97286 100644 --- a/docs/showcases/cases/0620-creator-operator-case-spec.en.html +++ b/docs/showcases/cases/0620-creator-operator-case-spec.en.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -110,15 +120,15 @@

Creator-operator long-running agent case

01

Case context

-

This is an appendix case: it shows how a non-technical creator-operator might use LoopX to manage a long-running research and planning loop, but it is not a top-card proof until real public user evidence exists.

The public material uses only synthetic data. The product shape is trend candidates, preference map, insight board, draft queue, material library, human feedback, and controlled replan.

+

This is an appendix case: it shows how a non-technical creator-operator might use LoopX to manage a long-running research and planning loop, but it is not a top-card proof until real public user evidence exists.

The public material uses only synthetic data. The point is not proving growth; it is proving the product boundary: research and material organization can continue, while publishing, brand judgment, and external action remain human gates.

02

Repository evidence

Proof

A creator-operator needs a long-running agent loop that keeps research moving while publishing decisions stay gated.

LoopX intervention

creator_operator_workflow, gate_aware_continuation, feedback_capture, safe_side_path

- -
Storyboard

`creator-ops-fake-data-storyboard.md` defines seven panels and a full fake fixture with no live platform access.

Feedback contract

`creator-ops-feedback-boundary-contract.md` separates gate decision, preference hint, todo update, boundary correction, reward signal, and product improvement note.

Source status

The contract requires every topic, insight, draft, and material item to carry source status; public repo defaults to `synthetic_demo`.

No autopublish

Publishing is a hard user gate; safe side work may continue, but preference or reward is not publication approval.

+
1publish hard gate
1safe research side path
0autopublish actions
0real operations data exposed
+
Storyboard

`creator-ops-fake-data-storyboard.md` provides a complete fake fixture with no live platform access, suitable for showing the user journey without exposing real operations data.

Feedback contract

`creator-ops-feedback-boundary-contract.md` separates gate decisions, preference hints, todo updates, boundary corrections, reward signals, and product improvement notes so preference is not mistaken for publish approval.

Source status

The contract requires every topic, insight, draft, and material item to carry source status; the public repo defaults to `synthetic_demo` instead of pretending to show real growth evidence.

No autopublish

Publishing is a hard user gate; safe side work may continue, but preference or reward is not publication approval.

03

LoopX behavior

  • 1
    The creative objective is durable goal state, not a hidden task inside chat.
  • 2
    Publish/no-publish is a user gate; research, organization, and source-status work can continue as safe side paths.
  • 3
    Feedback becomes a preference hint, gate decision, todo update, or boundary correction.
  • 4
    Before the next agent run, the user can see the blocked route, safe side path, and validation expectation.
diff --git a/docs/showcases/cases/0620-creator-operator-case-spec.html b/docs/showcases/cases/0620-creator-operator-case-spec.html index 9fa86825..5ac785ae 100644 --- a/docs/showcases/cases/0620-creator-operator-case-spec.html +++ b/docs/showcases/cases/0620-creator-operator-case-spec.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -110,15 +120,15 @@

创作者-运营者长跑 Agent 案例

01

案例背景

-

这是 appendix case:它展示非技术创作者/运营者如何用 LoopX 管一个长期 research + planning loop,但还不是 top-card proof,因为没有真实用户公开证据。

公开材料完全使用 synthetic data,重点是产品形态:趋势候选、偏好映射、insight board、draft queue、material library、人类反馈和 controlled replan。

+

这是 appendix case:它展示非技术创作者/运营者如何用 LoopX 管一个长期 research + planning loop,但还不是 top-card proof,因为没有真实用户公开证据。

公开材料完全使用 synthetic data,重点不是证明增长,而是证明产品边界:研究和素材整理可以继续,发布、品牌判断和对外动作必须停在人类 gate。

02

仓库证据

证明点

创作与运营工作可以共享一个 gate-aware 的长期 agent loop。

LoopX 介入

creator-operator workflow、user gate、feedback capture、material library

- -
storyboard

`creator-ops-fake-data-storyboard.md` 定义七个面板和完整 fake fixture,不需要 live platform access。

feedback contract

`creator-ops-feedback-boundary-contract.md` 把 gate decision、preference hint、todo update、boundary correction、reward signal、product improvement note 分开。

source status

contract 要求 topic、insight、draft、material item 都有 source status,public repo 默认 `synthetic_demo`。

no autopublish

publishing 是 hard user gate;safe side work 可以继续,但不能把偏好或 reward 当成发布授权。

+
1publish hard gate
1safe research side path
0autopublish 动作
0真实运营数据暴露
+
storyboard

`creator-ops-fake-data-storyboard.md` 给出完整 fake fixture,不需要 live platform access,适合公开展示用户旅程而不泄漏真实运营数据。

feedback contract

`creator-ops-feedback-boundary-contract.md` 把 gate decision、preference hint、todo update、boundary correction、reward signal 和 product improvement note 分开,避免把偏好误当发布授权。

source status

contract 要求 topic、insight、draft、material item 都携带 source status;public repo 默认 `synthetic_demo`,不伪装成真实增长证据。

no autopublish

publishing 是 hard user gate;safe side work 可以继续,但不能把偏好或 reward 当成发布授权。

03

LoopX 行为

  • 1
    creative objective 是 durable goal state,不是聊天窗口里的隐形任务。
  • 2
    publish/no-publish 是 user gate;research、整理和 source-status 改进可以作为 safe side path。
  • 3
    反馈被写成 preference hint、gate decision、todo update 或 boundary correction。
  • 4
    下一次 agent run 前,用户能看到 blocked route、safe side path 和 validation expectation。
diff --git a/docs/showcases/cases/0623-agent-to-agent-pr-comments.en.html b/docs/showcases/cases/0623-agent-to-agent-pr-comments.en.html index 89621101..6a789093 100644 --- a/docs/showcases/cases/0623-agent-to-agent-pr-comments.en.html +++ b/docs/showcases/cases/0623-agent-to-agent-pr-comments.en.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -110,15 +120,15 @@

Agent-to-agent PR comment and fix loop

01

Case context

-

This case shows how PR review feedback can move across multiple agents without losing ownership. The important chain is comment, claim, handoff, fix, validation, and review packet rather than the chat transcript.

The public repository contains the control-plane pieces: `claimed_by` appears in todo projection, review packets, event-sourced state, and multi-agent or side-agent prompt contracts.

+

This case shows how PR review feedback can move across multiple agents without losing ownership. The important chain is comment, claim, handoff, fix, validation, and review packet rather than the chat transcript.

The user-facing value is that every feedback item can answer three questions: who owns it, where the fix evidence is, and who still needs to review it. Public evidence spans the event contract, review packet, heartbeat prompt, and validation fixtures.

02

Repository evidence

Proof

Multiple agents can coordinate around PR review comments without losing owner review.

LoopX intervention

claimed_by, handoff gate, review packet, comment/fix loop

- -
Todo ownership

`event_sourced_state_contract_v0` defines `todo_claimed` as a canonical event for ownership, lease, or `claimed_by`.

Review packet

`loopx/review_packet.py` renders open todo `claimed_by` values into handoff and review text.

Side-agent contract

`docs/heartbeat-automation-prompt.md` requires side agents to self-merge only small validated evidence-backed work or create a claimed handoff todo.

CLI smokes

`examples/todo-lifecycle-cli-smoke.py` and `examples/todo-cli-smoke.py` cover claim, handoff successor, side-agent self-merge, and review handoff rules.

+
1owner per feedback item
3owner/fix/review questions answered
1review-packet handoff
0unowned PR comment reminders
+
Todo ownership

`event_sourced_state_contract_v0` defines `todo_claimed` as a canonical event for ownership, lease, or `claimed_by`.

Review packet

`loopx/review_packet.py` preserves `claimed_by` in open-todo rendering and handoff ranking so review packets can show ownership.

Side-agent contract

`docs/heartbeat-automation-prompt.md` requires side agents to self-merge only small validated evidence-backed work or create a claimed handoff todo.

CLI smokes

`examples/todo-lifecycle-cli-smoke.py` and `examples/todo-cli-smoke.py` cover claim, handoff successor, side-agent self-merge, review handoff, and same-agent handoff rejection.

03

LoopX behavior

  • 1
    PR feedback becomes an owned todo rather than a chat reminder.
  • 2
    Blocked or cross-lane fixes move through handoff gates and review packets instead of another agent guessing context.
  • 3
    The fixing agent leaves diff, validation, and remaining gates; follow-up work becomes a successor todo.
  • 4
    Owner review still happens on the PR or review surface; LoopX supplies state, evidence, and handoff.
diff --git a/docs/showcases/cases/0623-agent-to-agent-pr-comments.html b/docs/showcases/cases/0623-agent-to-agent-pr-comments.html index 210b4e88..9e5c7750 100644 --- a/docs/showcases/cases/0623-agent-to-agent-pr-comments.html +++ b/docs/showcases/cases/0623-agent-to-agent-pr-comments.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -110,15 +120,15 @@

Agent to agent 回复 PR comment 和 PR Fix

01

案例背景

-

这个案例描述的是 PR review feedback 在多 agent 之间流转时如何不丢 ownership。重点不是聊天记录,而是 comment、claim、handoff、fix、validation、review packet 这条链。

公开仓库里可以看到相关控制面已经被产品化:`claimed_by` 出现在 todo projection、review packet、event-sourced state 和多 agent/side-agent prompt 合约里。

+

这个案例描述的是 PR review feedback 在多 agent 之间流转时如何不丢 ownership。重点不是聊天记录,而是 comment、claim、handoff、fix、validation、review packet 这条链。

对用户有价值的是每条反馈都能回答三个问题:谁负责、修复证据在哪里、还需要谁 review。公开仓库里的 event contract、review packet、heartbeat prompt 和 validation fixtures 共同证明这条 handoff 链。

02

仓库证据

证明点

多 agent lane 可以围绕 PR comment 协作,同时保留 owner review。

LoopX 介入

claimed_by、handoff gate、review packet、comment/fix loop

- -
todo ownership

`event_sourced_state_contract_v0` 把 `todo_claimed` 定义为 canonical event,记录 ownership、lease 或 `claimed_by`。

review packet

`loopx/review_packet.py` 会把 open todo 的 `claimed_by` 显示进 handoff/review 文本。

side-agent contract

`docs/heartbeat-automation-prompt.md` 规定 side-agent 小变更可带 evidence 自合并,否则创建 claimed-by handoff todo。

CLI smokes

`examples/todo-lifecycle-cli-smoke.py`、`examples/todo-cli-smoke.py` 覆盖 claim、handoff successor、side-agent self-merge 和 review handoff 规则。

+
1每条反馈一个 owner
3owner/fix/review 可回答问题
1review packet handoff
0无主 PR comment reminder
+
todo ownership

`event_sourced_state_contract_v0` 把 `todo_claimed` 定义为 canonical event,记录 ownership、lease 或 `claimed_by`。

review packet

`loopx/review_packet.py` 在 open-todo rendering 和 handoff ranking 路径中保留 `claimed_by`,让 review packet 能显示 owner。

side-agent contract

`docs/heartbeat-automation-prompt.md` 规定 side-agent 小变更可带 evidence 自合并,否则创建 claimed-by handoff todo。

CLI smokes

`examples/todo-lifecycle-cli-smoke.py` 和 `examples/todo-cli-smoke.py` 覆盖 claim、handoff successor、side-agent self-merge、review handoff 和 same-agent handoff rejection。

03

LoopX 行为

  • 1
    PR feedback 先成为一个有 owner 的 todo,而不是 chat reminder。
  • 2
    被阻塞或跨 lane 的修复通过 handoff gate 和 review packet 传递,不允许另一个 agent 猜测上下文。
  • 3
    修复 agent 需要留下 diff、validation 和剩余 gate;后续工作创建 successor todo。
  • 4
    owner review 仍在 PR/review surface 上完成,LoopX 只提供状态、证据和 handoff。
diff --git a/docs/showcases/cases/0623-overnight-project-refactor.en.html b/docs/showcases/cases/0623-overnight-project-refactor.en.html index 0e2b239d..bf598f60 100644 --- a/docs/showcases/cases/0623-overnight-project-refactor.en.html +++ b/docs/showcases/cases/0623-overnight-project-refactor.en.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -110,15 +120,15 @@

Overnight project refactor as PR-sized slices

01

Case context

-

This case addresses the risk of unattended refactoring: a long-running agent can mix cleanup, behavior change, discoveries, and stale plans into one broad diff.

The public evidence is not a private overnight screenshot. It is the control-plane behavior already documented and smoke-tested in the repository: todo lifecycle, successor/supersede, validation writeback, and review packets.

+

This case addresses the risk of unattended refactoring: a long-running agent can mix cleanup, behavior change, discoveries, and stale plans into one broad diff.

The public evidence is not a private overnight screenshot. It is the control-plane behavior already documented and smoke-tested in the repository: todo lifecycle, successor/supersede, validation writeback, and review packets. `todo-lifecycle-cli-smoke.py` carries regression coverage for successors, supersede, handoff, and self-merge.

02

Repository evidence

Proof

Long unattended refactors can split into moderate PR slices instead of one unreviewable diff.

LoopX intervention

loop, todo follow-up, supersede, PR-sized slices

- -
Successor path

`docs/lark-kanban-control-plane-adapter.md` says real successors use `todo complete --next-*`, while replacements or narrower splits use `todo supersede --next-agent-todo`.

Side-agent completion

`docs/heartbeat-automation-prompt.md` requires nontrivial completion to create a successor todo or a no-follow-up rationale.

CLI validation

`examples/todo-lifecycle-cli-smoke.py` covers `--next-agent-todo`, `todo supersede`, claim inheritance, and handoff successors.

Review shape

`loopx review-packet` packages open todos, claimed_by, and handoff state for reviewer consumption.

+
1PR-sized slice at a time
3successor/supersede/handoff routes
1review gate for broad risk
0giant-diff target
+
Successor path

`docs/lark-kanban-control-plane-adapter.md` says real successors use `todo complete --next-*`, while replacements or narrower splits use `todo supersede --next-agent-todo`.

Side-agent completion

`docs/heartbeat-automation-prompt.md` requires nontrivial completion to create a successor todo or a no-follow-up rationale.

CLI validation

`examples/todo-lifecycle-cli-smoke.py` covers `--next-agent-todo` successors, `todo supersede`, claim inheritance, handoff successors, same-agent handoff rejection, and side-agent self-merge evidence.

Review shape

`loopx review-packet` packages open todos, claimed_by, and handoff state for reviewer consumption.

03

LoopX behavior

  • 1
    The current refactor slice must stay reviewable; overnight discoveries do not all land in one PR.
  • 2
    New discoveries become follow-up todos; changed routes supersede stale todos.
  • 3
    Each slice carries focused validation or doc/contract smoke evidence rather than raw agent traces.
  • 4
    Broad or unclear-risk slices route to review handoff instead of self-merge.
diff --git a/docs/showcases/cases/0623-overnight-project-refactor.html b/docs/showcases/cases/0623-overnight-project-refactor.html index 2d43fca9..6f303945 100644 --- a/docs/showcases/cases/0623-overnight-project-refactor.html +++ b/docs/showcases/cases/0623-overnight-project-refactor.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -110,15 +120,15 @@

一晚上自主重构项目

01

案例背景

-

这个案例解决的是无人值守重构的风险:长时间 agent 容易把 cleanup、行为改变、发现的新问题和过期计划混成一个大 diff。

LoopX 的公开证据不是某个私有夜间截图,而是 todo lifecycle、successor/supersede、validation writeback 和 review-packet 这些已经在仓库里有文档和 smoke 的控制面。

+

这个案例解决的是无人值守重构的风险:长时间 agent 容易把 cleanup、行为改变、发现的新问题和过期计划混成一个大 diff。

LoopX 的公开证据不是某个私有夜间截图,而是 todo lifecycle、successor/supersede、validation writeback 和 review-packet 这些已经在仓库里有文档和 smoke 的控制面。`todo-lifecycle-cli-smoke.py` 里能看到 successor、supersede、handoff 和 self-merge 的完整回归覆盖。

02

仓库证据

证明点

无人值守 refactor 可以拆成 PR-sized slice。

LoopX 介入

loop、todo follow-up、supersede、PR-sized slices

- -
successor path

`docs/lark-kanban-control-plane-adapter.md` 明确 real successor 使用 `todo complete --next-*`,replacement 或 narrower split 使用 `todo supersede --next-agent-todo`。

side-agent completion

`docs/heartbeat-automation-prompt.md` 要求非平凡完成创建 successor todo 或写 no-follow-up rationale。

CLI validation

`examples/todo-lifecycle-cli-smoke.py` 覆盖 `--next-agent-todo`、`todo supersede`、claim 继承和 handoff successor。

review shape

`loopx review-packet` 把当前 open todo、claimed_by 和 handoff 状态打包成 reviewer 可读的 packet。

+
1一次一个 PR-sized slice
3successor/supersede/handoff 路线
1宽风险 review gate
0巨型 diff 目标
+
successor path

`docs/lark-kanban-control-plane-adapter.md` 明确 real successor 使用 `todo complete --next-*`,replacement 或 narrower split 使用 `todo supersede --next-agent-todo`。

side-agent completion

`docs/heartbeat-automation-prompt.md` 要求非平凡完成创建 successor todo 或写 no-follow-up rationale。

CLI validation

`examples/todo-lifecycle-cli-smoke.py` 覆盖 `--next-agent-todo` successor、`todo supersede`、claim 继承、handoff successor、same-agent handoff rejection 和 side-agent self-merge evidence。

review shape

`loopx review-packet` 把当前 open todo、claimed_by 和 handoff 状态打包成 reviewer 可读的 packet。

03

LoopX 行为

  • 1
    当前 refactor slice 必须是可 review 的单位,不把整夜发现都塞进一个 PR。
  • 2
    发现的新工作写成 follow-up todo;路线变了就 supersede 旧 todo。
  • 3
    每个 slice 用 focused validation 或文档/contract smoke 证明,而不是依赖原始 agent trace。
  • 4
    大范围或风险不清楚的 slice 不自合并,进入 review handoff。
diff --git a/docs/showcases/cases/0624-pr-issue-auto-fix.en.html b/docs/showcases/cases/0624-pr-issue-auto-fix.en.html index 07f0aacb..5af87377 100644 --- a/docs/showcases/cases/0624-pr-issue-auto-fix.en.html +++ b/docs/showcases/cases/0624-pr-issue-auto-fix.en.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -110,15 +120,15 @@

PR issue automatic fix loop

01

Case context

-

This case turns a PR issue, review comment, or issue text into an executable repair loop. It is not simply reading a comment and editing code; the path goes through metadata/intake, repro, branch-local patch, validation, and review packet.

The repository already contains the issue-fix capability: `loopx/capabilities/issue_fix/`, the `loopx issue-fix ...` CLI entry, protocol docs, and public smokes.

+

This case turns a PR issue, review comment, or issue text into an executable repair loop. It is not simply reading a comment and editing code; the path goes through metadata/intake, repro, branch-local patch, validation, and review packet.

The user-facing value is that feedback no longer sits in a comment thread: it enters a loop with an owner, repro path, branch-local fix, validation, and review handoff. The public evidence proves the loop and its boundaries without exposing raw issue bodies, private timelines, or local paths.

02

Repository evidence

Proof

Issue and review feedback can enter an executable repair loop.

LoopX intervention

issue-fix workflow, command pack, repro smoke, PR review feedback

- -
Product entry

`docs/capabilities/issue-fix/README.md` names `loopx issue-fix ...`, the content-ops bridge, protocol docs, and smokes.

Workflow contract

`issue_fix_workflow_contract_v0` defines metadata preview, intake classification, workflow plan, todo writeback, caller repo branch, validation, PR review packet, and gate handling.

Executable loop

`issue_fix_acceptance_loop_v0` includes the acceptance fixture, repo-branch fixture, and caller-approved repo branch mode.

Validation surface

Smokes cover workflow planning, workflow contract, metadata/intake, and acceptance-loop behavior.

+
5repair-loop stages
1branch-ready fix packet
1review handoff preserved
0raw issue bodies in public artifact
+
Product entry

`docs/capabilities/issue-fix/README.md` names `loopx issue-fix ...`, the content-ops bridge, protocol docs, and smokes; the maintainer-facing action is turning feedback into an executable fix packet.

Workflow contract

`issue_fix_workflow_contract_v0` defines metadata preview, intake classification, workflow plan, todo writeback, caller repo branch, validation, PR review packet, and gate handling.

Executable loop

`issue_fix_acceptance_loop_v0` includes the acceptance fixture, repo-branch fixture, and caller-approved repo branch mode.

Validation surface

Focused smokes protect the key boundaries for metadata preview, content-ops intake, workflow plan, workflow contract, acceptance loop, and end-to-end workflow behavior.

03

LoopX behavior

  • 1
    Public metadata can enter packets; raw issue bodies, comment bodies, timelines, and provider payloads remain gated sources.
  • 2
    Accepted candidates become ordered LoopX todos: repro smoke, code-context route, branch-local patch, validation, and review-packet readiness.
  • 3
    Caller-approved repo mode inspects the local repo only under `--execute` and reports repo-relative changed files plus validation pass/fail.
  • 4
    External comments, PR creation, merge, publish, destructive git, and production action remain explicit gates.
diff --git a/docs/showcases/cases/0624-pr-issue-auto-fix.html b/docs/showcases/cases/0624-pr-issue-auto-fix.html index ba8587fb..f42f1be5 100644 --- a/docs/showcases/cases/0624-pr-issue-auto-fix.html +++ b/docs/showcases/cases/0624-pr-issue-auto-fix.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -110,15 +120,15 @@

PR Issue 自动 Fix

01

案例背景

-

这个案例把 PR issue、review comment 或 issue text 转成可执行修复闭环。它不是“读一段评论就改代码”,而是先做 metadata/intake、repro、branch-local patch、validation 和 review packet。

仓库已经有 issue-fix capability:模块在 `loopx/capabilities/issue_fix/`,CLI 入口是 `loopx issue-fix ...`,协议文档和 smoke 都在公开仓库内。

+

这个案例把 PR issue、review comment 或 issue text 转成可执行修复闭环。它不是“读一段评论就改代码”,而是先做 metadata/intake、repro、branch-local patch、validation 和 review packet。

对用户有价值的是反馈不再停在评论区:它会进入一个有 owner、有复现、有分支修复、有验证、有 review handoff 的闭环。公开证据只证明这条闭环和边界,不把 raw issue body、私有 timeline 或本地路径带进页面。

02

仓库证据

证明点

Issue 和 review 反馈可以变成受控、可执行的修复循环。

LoopX 介入

issue-fix workflow、command pack、repro smoke、PR review feedback

- -
产品入口

`docs/capabilities/issue-fix/README.md` 声明 `loopx issue-fix ...`、content-ops bridge、protocol docs 和 smoke。

工作流协议

`issue_fix_workflow_contract_v0` 明确 metadata preview、intake classification、workflow plan、todo writeback、caller repo branch、validation、PR review packet 和 gate handling。

可执行闭环

`issue_fix_acceptance_loop_v0` 包含 acceptance fixture、repo-branch fixture 和 caller-approved repo branch mode。

验证面

`examples/issue-fix-workflow-plan-smoke.py`、`examples/issue-fix-workflow-contract-smoke.py`、`examples/issue-fix-acceptance-loop-smoke.py` 等 smoke 覆盖这个路径。

+
5修复闭环阶段
1branch-ready 修复包
1review handoff 保留
0公开 artifact 中 raw issue body
+
产品入口

`docs/capabilities/issue-fix/README.md` 声明 `loopx issue-fix ...`、content-ops bridge、protocol docs 和 smoke;入口面向维护者的真实动作是把反馈转成可执行修复包。

工作流协议

`issue_fix_workflow_contract_v0` 明确 metadata preview、intake classification、workflow plan、todo writeback、caller repo branch、validation、PR review packet 和 gate handling。

可执行闭环

`issue_fix_acceptance_loop_v0` 包含 acceptance fixture、repo-branch fixture 和 caller-approved repo branch mode。

验证面

focused smokes 保护 metadata preview、content-ops intake、workflow plan、workflow contract、acceptance loop 和端到端 workflow 的关键边界。

03

LoopX 行为

  • 1
    public metadata 可以进入 packet;raw issue body、comment body、timeline、provider payload 都是 gated source。
  • 2
    accepted candidates 写成有序 LoopX todos:repro smoke、code-context route、branch-local patch、validation、review-packet readiness。
  • 3
    caller-approved repo 模式只在 `--execute` 时检查本地 repo,并且输出 repo-relative changed files、validation pass/fail 和 PR-readiness。
  • 4
    外部 comment、PR creation、merge、publish、destructive git 和 production action 都保留为显式 gate。
diff --git a/docs/showcases/cases/0627-overnight-pr-batch.en.html b/docs/showcases/cases/0627-overnight-pr-batch.en.html index 14aa1760..292cb490 100644 --- a/docs/showcases/cases/0627-overnight-pr-batch.en.html +++ b/docs/showcases/cases/0627-overnight-pr-batch.en.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -117,7 +127,7 @@

Overnight PR batch with reviewable control

Proof

High-throughput multi-lane work can remain PR-sized, reviewable, and merge-safe.

LoopX intervention

todo claim, review packet, self-merge boundary, focused smoke, public-boundary scan

-
22merged PR commits
10hpublic window
+
22reviewable merged commits
10PR-numbered commits
10hpublic evidence window
0raw-agent-log dependency
Public window

2026-06-27 01:29 to 11:29 +08:00, a 10-hour Git evidence window.

Change shape

22 merged commits across docs, runtime, status/quota, benchmark contracts, smokes, and release/runtime guardrails.

Review boundary

Work landed as PR-sized slices instead of one broad diff that maintainers would have to trust blindly.

Reproduction

The case document gives `git log --since ... --until ...` and `git log --numstat` commands.

03

LoopX behavior

diff --git a/docs/showcases/cases/0627-overnight-pr-batch.html b/docs/showcases/cases/0627-overnight-pr-batch.html index a6eb4024..11d85d62 100644 --- a/docs/showcases/cases/0627-overnight-pr-batch.html +++ b/docs/showcases/cases/0627-overnight-pr-batch.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -117,7 +127,7 @@

一晚 30 个高价值 PR

证明点

高吞吐多 lane 工作也可以保持 PR 粒度、可审阅、可合并。

LoopX 介入

todo claim、review packet、自合并边界、focused smoke、public-boundary scan

-
22merged PR commits
10hpublic window
+
22可审阅 merged commits
10带 PR 编号的 commits
10h公开证据窗口
0raw agent logs 依赖
公开窗口

2026-06-27 01:29 到 11:29 +08:00,10 小时 Git 证据窗口。

变更粒度

22 个 merged commits 覆盖 docs、runtime、status/quota、benchmark contracts、smokes 和 release/runtime guardrails。

审阅边界

工作以 PR-sized slices 落地,而不是一个需要 maintainer 盲信的巨大 diff。

复现方式

case 文档给出 `git log --since ... --until ...` 和 `git log --numstat` 命令。

03

LoopX 行为

diff --git a/docs/showcases/frontend-surface.md b/docs/showcases/frontend-surface.md index da20d3c9..337cb827 100644 --- a/docs/showcases/frontend-surface.md +++ b/docs/showcases/frontend-surface.md @@ -36,9 +36,10 @@ Use these catalog fields directly: | `user_value` | Outcome in plain language. | | `evidence_boundary` | Redaction and claim boundary drawer. | | `frontend_card.visual_metaphor` | Suggested visual treatment. | -| `frontend_card.primary_metric_hint` | Lightweight signal, not a hard claim. | +| `frontend_card.primary_metric_hint` | Lightweight value signal for leaders/users, not implementation trivia. | | `frontend_card.badges` | Compact chips. | | `frontend_card.story_beats` | Backward-compatible field for the case detail evidence sequence. New copy should render it as evidence, not as author notes. | +| `evidence_metrics` | Optional compact value metrics. Use outcome or boundary signals such as reviewable commits, user wait avoided, gated action prevented, compression range, or public evidence window. Do not use raw file counts, smoke counts, panel counts, or other implementation-surface trivia as the main proof. | | `workload_signal.efficiency_model` | Optional evidence panel for conservative baseline-vs-actual efficiency modeling. | ## First Screen diff --git a/docs/showcases/index.en.html b/docs/showcases/index.en.html index 4bd0c0c5..356b31d5 100644 --- a/docs/showcases/index.en.html +++ b/docs/showcases/index.en.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -111,76 +121,108 @@

Showcase & Good Case

01

Top showcase cases

- +
2026-06-27 · public_evidence_case

Overnight PR batch with reviewable control

An overnight LoopX run can produce many PR-sized slices while keeping review, validation, and public evidence boundaries visible.

Proof: High-throughput multi-lane work can remain PR-sized, reviewable, and merge-safe.

+
22 · reviewable merged commits10 · PR-numbered commits10h · public evidence window
high_throughput_reviewable_workpr_sized_slicesself_merge_policyvalidation_writeback
- +
2026-06-24 · public_safe_pattern_case

PR issue automatic fix loop

Review feedback should become an ordered repair workflow with repro, fix, validation, and reviewer handoff.

Proof: Issue and review feedback can enter an executable repair loop.

+
5 · repair-loop stages1 · branch-ready fix packet1 · review handoff preserved
issue_fix_workflowreview_feedbackrepro_smokecommand_pack
- +
2026-06-23 · public_safe_pattern_case

Agent-to-agent PR comment and fix loop

PR review feedback can become an owned agent todo with fix evidence instead of a loose chat reminder.

Proof: Multiple agents can coordinate around PR review comments without losing owner review.

+
1 · owner per feedback item3 · owner/fix/review questions answered1 · review-packet handoff
agent_to_agent_handoffpr_comment_loopreview_packetclaimed_todo
- +
2026-06-23 · public_safe_pattern_case

Overnight project refactor as PR-sized slices

A broad refactor can run overnight while staying split into human-sized review units.

Proof: Long unattended refactors can split into moderate PR slices instead of one unreviewable diff.

+
1 · PR-sized slice at a time3 · successor/supersede/handoff routes1 · review gate for broad risk
long_unattended_goalpr_sized_slicestodo_follow_upsupersede
- +
2026-06-19 · public_safe_interactive_case

Dynamic workflow for hardware-agent development

A fuzzy long-running engineering goal needs a shared control plane when multiple worker agents participate.

Proof: Fuzzy goals, multiple workers, and long unattended runs can still converge.

+
5 · public-safe hardware workflows3 · LoopX/orchestrator/worker role split1 · canonical artifact preserved
dynamic_workflowmulti_agent_coordinationshared_control_planelong_unattended_goal
- +
2026-06-19 · public_evidence_case

LoopX self-iteration loop

A high-churn LoopX repo stayed legible while benchmark, product, docs, planning, and side-agent lanes moved in parallel.

Proof: A high-churn multi-lane project can keep state, boundaries, and evidence coherent.

+
801 · public commits244 · commits since Jun 1859-92d · AI-assisted baseline
self_iterationside_agent_scopetodo_claim_ownershipidentity_aware_prompt
- +
2026-06-17 · reproducible_synthetic_demo

Blocked P0 with safe P1/P2 rotation

A gated P0 lane should not stall a whole long-running goal when safe fallback work exists.

Proof: A user decision should not block all safe work.

+
1 · concrete P0 user decision1 · safe fallback lane0 · gated-lane auto-progress
blocked_priority_fallbackconcrete_user_gatesafe_fallback_workquota_discipline
+
+

Experimental today-value path

+

A lower-priority entry point for users who want to pick one useful LoopX capability today without replacing the showcase first screen.

+
+
+ PR review/comment -> fix loop + Branch-ready fix packet with repro, smoke result, and remaining review owner. + Fewer dropped review threads. +
+ +
+ Overnight PR-sized refactor + Reviewable slice list, validation notes, successor todo, and merge boundary. + More merged commits without a giant diff audit. +
+ +
+ P0 blocked -> safe fallback + Kernel projection of the exact user gate, safe fallback todo, quota decision, and evidence boundary. + Less idle agent time while preserving human judgment. +
+
+
+
02

Appendix case

diff --git a/docs/showcases/index.html b/docs/showcases/index.html index a92b4582..35b56f52 100644 --- a/docs/showcases/index.html +++ b/docs/showcases/index.html @@ -56,6 +56,8 @@ .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -63,8 +65,16 @@ .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} @@ -111,76 +121,108 @@

Showcase & Good Case

01

顶部 Showcase 案例

- +
2026-06-27 · public_evidence_case

一晚 30 个高价值 PR

高吞吐、多车道推进仍保持 PR 级可 review、可合并。

证明点: 高吞吐多 lane 工作也可以保持 PR 粒度、可审阅、可合并。

+
22 · 可审阅 merged commits10 · 带 PR 编号的 commits10h · 公开证据窗口
high_throughput_reviewable_workpr_sized_slicesself_merge_policyvalidation_writeback
- +
2026-06-24 · public_safe_pattern_case

PR Issue 自动 Fix

Issue / Review comment 可以进入可执行修复闭环。

证明点: Issue 和 review 反馈可以变成受控、可执行的修复循环。

+
5 · 修复闭环阶段1 · branch-ready 修复包1 · review handoff 保留
issue_fix_workflowreview_feedbackrepro_smokecommand_pack
- +
2026-06-23 · public_safe_pattern_case

Agent to agent 回复 PR comment 和 PR Fix

多 agent 可围绕 PR review 协同发现、评论、修复。

证明点: 多 agent lane 可以围绕 PR comment 协作,同时保留 owner review。

+
1 · 每条反馈一个 owner3 · owner/fix/review 可回答问题1 · review packet handoff
agent_to_agent_handoffpr_comment_loopreview_packetclaimed_todo
- +
2026-06-23 · public_safe_pattern_case

一晚上自主重构项目

长时间无值守重构能拆成适中 PR,而不是一个不可 review 大改。

证明点: 无人值守 refactor 可以拆成 PR-sized slice。

+
1 · 一次一个 PR-sized slice3 · successor/supersede/handoff 路线1 · 宽风险 review gate
long_unattended_goalpr_sized_slicestodo_follow_upsupersede
- +
2026-06-19 · public_safe_interactive_case

外部芯片 agent workflow

模糊目标、多 worker、长时间无值守下仍可收敛。

证明点: 模糊目标、多 worker、长时间无值守下仍可收敛。

+
5 · 公开安全硬件 workflow3 · LoopX/编排/worker 角色分离1 · canonical artifact 保留
dynamic_workflowmulti_agent_coordinationshared_control_planelong_unattended_goalCanonical 原页面
- +
2026-06-19 · public_evidence_case

LoopX Meta Agent 自迭代

高变更、多车道项目可保持状态、边界和证据。

证明点: 高 churn 多 lane agent repo 可以保持状态、证据和边界一致。

+
801 · public commits244 · 06-18 后 commits59-92d · AI-assisted baseline
self_iterationside_agent_scopetodo_claim_ownershipidentity_aware_prompt
- +
2026-06-17 · reproducible_synthetic_demo

P0 block 后推进 P1/P2

用户决策不应阻塞全部安全工作。

证明点: 被阻塞的 P0 决策不应该阻止安全的 P1/P2 工作继续。

+
1 · 具体 P0 用户决策1 · safe fallback lane0 · gated lane 自动推进
blocked_priority_fallbackconcrete_user_gatesafe_fallback_workquota_discipline
+
+

Experimental today-value path

+

一个放在首屏下方的实验性入口:帮助用户从三个 LoopX 能力里选择今天就能产生价值的一项。

+
+
+ PR review/comment -> fix loop + 可复核的修复包:repro、smoke 结果、剩余 review owner。 + 更少遗漏 review 线程。 +
+ +
+ Overnight PR-sized refactor + 可 review 的 slice 列表、验证记录、后续 todo、merge 边界。 + 增加可合并 commit,避免巨型 diff。 +
+ +
+ P0 blocked -> safe fallback + 已有 goal 内由 kernel 投影具体 user gate、安全 fallback todo、quota 决策和证据边界。 + 减少 agent 空转,同时保留人类判断。 +
+
+
+
02

附录案例

diff --git a/docs/showcases/showcase-catalog.json b/docs/showcases/showcase-catalog.json index 960fd34a..8b82c946 100644 --- a/docs/showcases/showcase-catalog.json +++ b/docs/showcases/showcase-catalog.json @@ -81,7 +81,7 @@ }, "frontend_card": { "visual_metaphor": "parallel PR lanes converge into a reviewable merge rail", - "primary_metric_hint": "High-throughput multi-lane work can remain PR-sized, reviewable, and merge-safe.", + "primary_metric_hint": "22 reviewable merged commits, including 10 PR-numbered commits, landed inside a 10h public evidence window.", "badges": [ "PR batch", "review packet", @@ -93,7 +93,37 @@ "limit self-merge to narrow validated changes while preserving broader review gates", "record public evidence from Git history instead of raw agent logs or private screenshots" ] - } + }, + "evidence_metrics": [ + { + "value": "22", + "labels": { + "zh": "可审阅 merged commits", + "en": "reviewable merged commits" + } + }, + { + "value": "10", + "labels": { + "zh": "带 PR 编号的 commits", + "en": "PR-numbered commits" + } + }, + { + "value": "10h", + "labels": { + "zh": "公开证据窗口", + "en": "public evidence window" + } + }, + { + "value": "0", + "labels": { + "zh": "raw agent logs 依赖", + "en": "raw-agent-log dependency" + } + } + ] }, { "id": "2026-06-24-pr-issue-auto-fix", @@ -139,7 +169,7 @@ }, "frontend_card": { "visual_metaphor": "issue feedback closes through repro, fix, validation, and review handoff", - "primary_metric_hint": "Issue and review feedback can enter an executable repair loop.", + "primary_metric_hint": "Feedback becomes a five-stage repair loop with branch-ready fix evidence and reviewer handoff.", "badges": [ "issue fix", "repro smoke", @@ -151,7 +181,37 @@ "keep gated source reads explicit", "separate repro, implementation, validation, and reviewer handoff" ] - } + }, + "evidence_metrics": [ + { + "value": "5", + "labels": { + "zh": "修复闭环阶段", + "en": "repair-loop stages" + } + }, + { + "value": "1", + "labels": { + "zh": "branch-ready 修复包", + "en": "branch-ready fix packet" + } + }, + { + "value": "1", + "labels": { + "zh": "review handoff 保留", + "en": "review handoff preserved" + } + }, + { + "value": "0", + "labels": { + "zh": "公开 artifact 中 raw issue body", + "en": "raw issue bodies in public artifact" + } + } + ] }, { "id": "2026-06-23-agent-to-agent-pr-comments", @@ -197,7 +257,7 @@ }, "frontend_card": { "visual_metaphor": "agent comments pass through explicit ownership instead of loose threads", - "primary_metric_hint": "Multiple agents can coordinate around PR review comments without losing owner review.", + "primary_metric_hint": "Every PR feedback item gets an owner, fix evidence, and a visible reviewer handoff instead of becoming a loose reminder.", "badges": [ "handoff", "PR comment", @@ -209,7 +269,37 @@ "record fix and validation evidence in the review packet", "keep successor work explicit after the comment is handled" ] - } + }, + "evidence_metrics": [ + { + "value": "1", + "labels": { + "zh": "每条反馈一个 owner", + "en": "owner per feedback item" + } + }, + { + "value": "3", + "labels": { + "zh": "owner/fix/review 可回答问题", + "en": "owner/fix/review questions answered" + } + }, + { + "value": "1", + "labels": { + "zh": "review packet handoff", + "en": "review-packet handoff" + } + }, + { + "value": "0", + "labels": { + "zh": "无主 PR comment reminder", + "en": "unowned PR comment reminders" + } + } + ] }, { "id": "2026-06-23-overnight-project-refactor", @@ -255,7 +345,7 @@ }, "frontend_card": { "visual_metaphor": "a large refactor moves as small reviewable packets", - "primary_metric_hint": "Long unattended refactors can split into moderate PR slices instead of one unreviewable diff.", + "primary_metric_hint": "Unattended refactors stay reviewable by landing one PR-sized slice at a time with follow-up and review gates.", "badges": [ "refactor", "PR slices", @@ -267,7 +357,37 @@ "supersede stale tasks when the route changes", "validate each slice before merge or handoff" ] - } + }, + "evidence_metrics": [ + { + "value": "1", + "labels": { + "zh": "一次一个 PR-sized slice", + "en": "PR-sized slice at a time" + } + }, + { + "value": "3", + "labels": { + "zh": "successor/supersede/handoff 路线", + "en": "successor/supersede/handoff routes" + } + }, + { + "value": "1", + "labels": { + "zh": "宽风险 review gate", + "en": "review gate for broad risk" + } + }, + { + "value": "0", + "labels": { + "zh": "巨型 diff 目标", + "en": "giant-diff target" + } + } + ] }, { "id": "2026-06-19-dynamic-workflow-hardware-agent", @@ -302,7 +422,7 @@ "evidence_boundary": "Public-safe interactive artifact; no raw chats, screenshots, proprietary design details, private repositories, local paths, task ids, credentials, or unpublished hardware artifacts.", "frontend_card": { "visual_metaphor": "multiple worker lanes converging through one shared control plane", - "primary_metric_hint": "five public hardware-agent cases under one control plane", + "primary_metric_hint": "5 public-safe hardware workflows demonstrate multi-worker convergence while proprietary details stay out of the artifact.", "badges": [ "interactive-html", "multi-agent", @@ -325,7 +445,37 @@ "showcase_table": { "proof_point": "Fuzzy goals, multiple workers, and long unattended runs can still converge.", "loopx_intervention": "goal state, worker handoff, dynamic workflow" - } + }, + "evidence_metrics": [ + { + "value": "5", + "labels": { + "zh": "公开安全硬件 workflow", + "en": "public-safe hardware workflows" + } + }, + { + "value": "3", + "labels": { + "zh": "LoopX/编排/worker 角色分离", + "en": "LoopX/orchestrator/worker role split" + } + }, + { + "value": "1", + "labels": { + "zh": "canonical artifact 保留", + "en": "canonical artifact preserved" + } + }, + { + "value": "0", + "labels": { + "zh": "专有设计细节暴露", + "en": "proprietary design details exposed" + } + } + ] }, { "id": "2026-06-19-loopx-self-iteration", @@ -425,7 +575,7 @@ "evidence_boundary": "Public Git evidence only; no private thread text, local active-state bodies, internal document links, screenshots, raw benchmark material, credentials, or machine-specific paths.", "frontend_card": { "visual_metaphor": "many repo lanes flowing through one control plane, with a side-agent lane branching safely beside the primary benchmark lane", - "primary_metric_hint": "801 public commits mapped to a conservative 59-92 AI-assisted developer-day baseline over a 19.6-day public Git window", + "primary_metric_hint": "801 public commits, 244 recent commits, and a conservative 59-92 AI-assisted developer-day baseline show self-iteration throughput.", "badges": [ "self-iteration", "commit-backed", @@ -452,7 +602,37 @@ "showcase_table": { "proof_point": "A high-churn multi-lane project can keep state, boundaries, and evidence coherent.", "loopx_intervention": "todo, quota, gate, evidence, review packet, frontstage" - } + }, + "evidence_metrics": [ + { + "value": "801", + "labels": { + "zh": "public commits", + "en": "public commits" + } + }, + { + "value": "244", + "labels": { + "zh": "06-18 后 commits", + "en": "commits since Jun 18" + } + }, + { + "value": "59-92d", + "labels": { + "zh": "AI-assisted baseline", + "en": "AI-assisted baseline" + } + }, + { + "value": "3.0-4.7x", + "labels": { + "zh": "calendar compression", + "en": "calendar compression" + } + } + ] }, { "id": "2026-06-17-blocked-p0-safe-rotation", @@ -486,7 +666,7 @@ "evidence_boundary": "Synthetic public fixture only; no private screenshots, raw tasks, internal links, local image names, or raw run logs.", "frontend_card": { "visual_metaphor": "priority lanes with one gated lane and one active fallback lane", - "primary_metric_hint": "attention reduction: user sees one concrete decision instead of reading the whole run log", + "primary_metric_hint": "One concrete P0 user decision is isolated while one safe fallback lane can continue with zero gated-lane spend.", "badges": [ "reproducible", "user-gate", @@ -510,7 +690,37 @@ "showcase_table": { "proof_point": "A user decision should not block all safe work.", "loopx_intervention": "concrete user todo, safe fallback, quota control" - } + }, + "evidence_metrics": [ + { + "value": "1", + "labels": { + "zh": "具体 P0 用户决策", + "en": "concrete P0 user decision" + } + }, + { + "value": "1", + "labels": { + "zh": "safe fallback lane", + "en": "safe fallback lane" + } + }, + { + "value": "0", + "labels": { + "zh": "gated lane 自动推进", + "en": "gated-lane auto-progress" + } + }, + { + "value": "0", + "labels": { + "zh": "private upload 依赖", + "en": "private upload dependency" + } + } + ] }, { "id": "2026-06-20-creator-operator-case-spec", @@ -561,7 +771,37 @@ "localized_pages": { "zh": "docs/showcases/cases/0620-creator-operator-case-spec.html", "en": "docs/showcases/cases/0620-creator-operator-case-spec.en.html" - } + }, + "evidence_metrics": [ + { + "value": "1", + "labels": { + "zh": "publish hard gate", + "en": "publish hard gate" + } + }, + { + "value": "1", + "labels": { + "zh": "safe research side path", + "en": "safe research side path" + } + }, + { + "value": "0", + "labels": { + "zh": "autopublish 动作", + "en": "autopublish actions" + } + }, + { + "value": "0", + "labels": { + "zh": "真实运营数据暴露", + "en": "real operations data exposed" + } + } + ] } ] } diff --git a/examples/showcase-catalog-smoke.py b/examples/showcase-catalog-smoke.py index 3c8cadfe..82211da3 100644 --- a/examples/showcase-catalog-smoke.py +++ b/examples/showcase-catalog-smoke.py @@ -39,6 +39,18 @@ "Story " + "beats", "Website Story " + "Beats", ) +FORBIDDEN_TRIVIAL_METRIC_COPY = ( + "issue-fix 公开文件", + "issue-fix public files", + "smoke assertions", + "storyboard panels", + "source statuses", + "coverage points", + "render refs", + "handoff 回归引用", + "synthetic todos", + "todo lifecycle smokes", +) def read(path: Path) -> str: @@ -78,6 +90,13 @@ def main() -> int: assert case.get("evidence_boundary"), case assert case.get("user_value"), case assert isinstance(case.get("pattern_tags"), list) and case["pattern_tags"], case + evidence_metrics = case.get("evidence_metrics") + assert isinstance(evidence_metrics, list) and len(evidence_metrics) >= 2, case + for metric in evidence_metrics: + assert metric.get("value"), case + labels = metric.get("labels") + assert isinstance(labels, dict), case + assert labels.get("zh") and labels.get("en"), case frontend = case.get("frontend_card") appendix = case.get("appendix_surface") assert isinstance(frontend, dict) or isinstance(appendix, dict), case @@ -100,6 +119,8 @@ def main() -> int: localized_text = read(localized_path) for phrase in FORBIDDEN_SHOWCASE_COPY: assert phrase not in localized_text, f"{localized_page}: forbidden copy {phrase!r}" + for phrase in FORBIDDEN_TRIVIAL_METRIC_COPY: + assert phrase not in localized_text, f"{localized_page}: trivial metric copy {phrase!r}" if case_id != "2026-06-19-dynamic-workflow-hardware-agent" or lang == "en": assert "Repository evidence" in localized_text or "仓库证据" in localized_text, localized_page assert "Repository sources" in localized_text or "仓库来源" in localized_text, localized_page diff --git a/examples/showcase-html-pages.py b/examples/showcase-html-pages.py index b6431daf..b9e31db7 100644 --- a/examples/showcase-html-pages.py +++ b/examples/showcase-html-pages.py @@ -185,13 +185,13 @@ "zh": { "context": [ "这个案例把 PR issue、review comment 或 issue text 转成可执行修复闭环。它不是“读一段评论就改代码”,而是先做 metadata/intake、repro、branch-local patch、validation 和 review packet。", - "仓库已经有 issue-fix capability:模块在 `loopx/capabilities/issue_fix/`,CLI 入口是 `loopx issue-fix ...`,协议文档和 smoke 都在公开仓库内。", + "对用户有价值的是反馈不再停在评论区:它会进入一个有 owner、有复现、有分支修复、有验证、有 review handoff 的闭环。公开证据只证明这条闭环和边界,不把 raw issue body、私有 timeline 或本地路径带进页面。", ], "evidence": [ - ("产品入口", "`docs/capabilities/issue-fix/README.md` 声明 `loopx issue-fix ...`、content-ops bridge、protocol docs 和 smoke。"), + ("产品入口", "`docs/capabilities/issue-fix/README.md` 声明 `loopx issue-fix ...`、content-ops bridge、protocol docs 和 smoke;入口面向维护者的真实动作是把反馈转成可执行修复包。"), ("工作流协议", "`issue_fix_workflow_contract_v0` 明确 metadata preview、intake classification、workflow plan、todo writeback、caller repo branch、validation、PR review packet 和 gate handling。"), ("可执行闭环", "`issue_fix_acceptance_loop_v0` 包含 acceptance fixture、repo-branch fixture 和 caller-approved repo branch mode。"), - ("验证面", "`examples/issue-fix-workflow-plan-smoke.py`、`examples/issue-fix-workflow-contract-smoke.py`、`examples/issue-fix-acceptance-loop-smoke.py` 等 smoke 覆盖这个路径。"), + ("验证面", "focused smokes 保护 metadata preview、content-ops intake、workflow plan、workflow contract、acceptance loop 和端到端 workflow 的关键边界。"), ], "mechanism": [ "public metadata 可以进入 packet;raw issue body、comment body、timeline、provider payload 都是 gated source。", @@ -214,13 +214,13 @@ "en": { "context": [ "This case turns a PR issue, review comment, or issue text into an executable repair loop. It is not simply reading a comment and editing code; the path goes through metadata/intake, repro, branch-local patch, validation, and review packet.", - "The repository already contains the issue-fix capability: `loopx/capabilities/issue_fix/`, the `loopx issue-fix ...` CLI entry, protocol docs, and public smokes.", + "The user-facing value is that feedback no longer sits in a comment thread: it enters a loop with an owner, repro path, branch-local fix, validation, and review handoff. The public evidence proves the loop and its boundaries without exposing raw issue bodies, private timelines, or local paths.", ], "evidence": [ - ("Product entry", "`docs/capabilities/issue-fix/README.md` names `loopx issue-fix ...`, the content-ops bridge, protocol docs, and smokes."), + ("Product entry", "`docs/capabilities/issue-fix/README.md` names `loopx issue-fix ...`, the content-ops bridge, protocol docs, and smokes; the maintainer-facing action is turning feedback into an executable fix packet."), ("Workflow contract", "`issue_fix_workflow_contract_v0` defines metadata preview, intake classification, workflow plan, todo writeback, caller repo branch, validation, PR review packet, and gate handling."), ("Executable loop", "`issue_fix_acceptance_loop_v0` includes the acceptance fixture, repo-branch fixture, and caller-approved repo branch mode."), - ("Validation surface", "Smokes cover workflow planning, workflow contract, metadata/intake, and acceptance-loop behavior."), + ("Validation surface", "Focused smokes protect the key boundaries for metadata preview, content-ops intake, workflow plan, workflow contract, acceptance loop, and end-to-end workflow behavior."), ], "mechanism": [ "Public metadata can enter packets; raw issue bodies, comment bodies, timelines, and provider payloads remain gated sources.", @@ -245,13 +245,13 @@ "zh": { "context": [ "这个案例描述的是 PR review feedback 在多 agent 之间流转时如何不丢 ownership。重点不是聊天记录,而是 comment、claim、handoff、fix、validation、review packet 这条链。", - "公开仓库里可以看到相关控制面已经被产品化:`claimed_by` 出现在 todo projection、review packet、event-sourced state 和多 agent/side-agent prompt 合约里。", + "对用户有价值的是每条反馈都能回答三个问题:谁负责、修复证据在哪里、还需要谁 review。公开仓库里的 event contract、review packet、heartbeat prompt 和 validation fixtures 共同证明这条 handoff 链。", ], "evidence": [ ("todo ownership", "`event_sourced_state_contract_v0` 把 `todo_claimed` 定义为 canonical event,记录 ownership、lease 或 `claimed_by`。"), - ("review packet", "`loopx/review_packet.py` 会把 open todo 的 `claimed_by` 显示进 handoff/review 文本。"), + ("review packet", "`loopx/review_packet.py` 在 open-todo rendering 和 handoff ranking 路径中保留 `claimed_by`,让 review packet 能显示 owner。"), ("side-agent contract", "`docs/heartbeat-automation-prompt.md` 规定 side-agent 小变更可带 evidence 自合并,否则创建 claimed-by handoff todo。"), - ("CLI smokes", "`examples/todo-lifecycle-cli-smoke.py`、`examples/todo-cli-smoke.py` 覆盖 claim、handoff successor、side-agent self-merge 和 review handoff 规则。"), + ("CLI smokes", "`examples/todo-lifecycle-cli-smoke.py` 和 `examples/todo-cli-smoke.py` 覆盖 claim、handoff successor、side-agent self-merge、review handoff 和 same-agent handoff rejection。"), ], "mechanism": [ "PR feedback 先成为一个有 owner 的 todo,而不是 chat reminder。", @@ -273,13 +273,13 @@ "en": { "context": [ "This case shows how PR review feedback can move across multiple agents without losing ownership. The important chain is comment, claim, handoff, fix, validation, and review packet rather than the chat transcript.", - "The public repository contains the control-plane pieces: `claimed_by` appears in todo projection, review packets, event-sourced state, and multi-agent or side-agent prompt contracts.", + "The user-facing value is that every feedback item can answer three questions: who owns it, where the fix evidence is, and who still needs to review it. Public evidence spans the event contract, review packet, heartbeat prompt, and validation fixtures.", ], "evidence": [ ("Todo ownership", "`event_sourced_state_contract_v0` defines `todo_claimed` as a canonical event for ownership, lease, or `claimed_by`."), - ("Review packet", "`loopx/review_packet.py` renders open todo `claimed_by` values into handoff and review text."), + ("Review packet", "`loopx/review_packet.py` preserves `claimed_by` in open-todo rendering and handoff ranking so review packets can show ownership."), ("Side-agent contract", "`docs/heartbeat-automation-prompt.md` requires side agents to self-merge only small validated evidence-backed work or create a claimed handoff todo."), - ("CLI smokes", "`examples/todo-lifecycle-cli-smoke.py` and `examples/todo-cli-smoke.py` cover claim, handoff successor, side-agent self-merge, and review handoff rules."), + ("CLI smokes", "`examples/todo-lifecycle-cli-smoke.py` and `examples/todo-cli-smoke.py` cover claim, handoff successor, side-agent self-merge, review handoff, and same-agent handoff rejection."), ], "mechanism": [ "PR feedback becomes an owned todo rather than a chat reminder.", @@ -303,12 +303,12 @@ "zh": { "context": [ "这个案例解决的是无人值守重构的风险:长时间 agent 容易把 cleanup、行为改变、发现的新问题和过期计划混成一个大 diff。", - "LoopX 的公开证据不是某个私有夜间截图,而是 todo lifecycle、successor/supersede、validation writeback 和 review-packet 这些已经在仓库里有文档和 smoke 的控制面。", + "LoopX 的公开证据不是某个私有夜间截图,而是 todo lifecycle、successor/supersede、validation writeback 和 review-packet 这些已经在仓库里有文档和 smoke 的控制面。`todo-lifecycle-cli-smoke.py` 里能看到 successor、supersede、handoff 和 self-merge 的完整回归覆盖。", ], "evidence": [ ("successor path", "`docs/lark-kanban-control-plane-adapter.md` 明确 real successor 使用 `todo complete --next-*`,replacement 或 narrower split 使用 `todo supersede --next-agent-todo`。"), ("side-agent completion", "`docs/heartbeat-automation-prompt.md` 要求非平凡完成创建 successor todo 或写 no-follow-up rationale。"), - ("CLI validation", "`examples/todo-lifecycle-cli-smoke.py` 覆盖 `--next-agent-todo`、`todo supersede`、claim 继承和 handoff successor。"), + ("CLI validation", "`examples/todo-lifecycle-cli-smoke.py` 覆盖 `--next-agent-todo` successor、`todo supersede`、claim 继承、handoff successor、same-agent handoff rejection 和 side-agent self-merge evidence。"), ("review shape", "`loopx review-packet` 把当前 open todo、claimed_by 和 handoff 状态打包成 reviewer 可读的 packet。"), ], "mechanism": [ @@ -331,12 +331,12 @@ "en": { "context": [ "This case addresses the risk of unattended refactoring: a long-running agent can mix cleanup, behavior change, discoveries, and stale plans into one broad diff.", - "The public evidence is not a private overnight screenshot. It is the control-plane behavior already documented and smoke-tested in the repository: todo lifecycle, successor/supersede, validation writeback, and review packets.", + "The public evidence is not a private overnight screenshot. It is the control-plane behavior already documented and smoke-tested in the repository: todo lifecycle, successor/supersede, validation writeback, and review packets. `todo-lifecycle-cli-smoke.py` carries regression coverage for successors, supersede, handoff, and self-merge.", ], "evidence": [ ("Successor path", "`docs/lark-kanban-control-plane-adapter.md` says real successors use `todo complete --next-*`, while replacements or narrower splits use `todo supersede --next-agent-todo`."), ("Side-agent completion", "`docs/heartbeat-automation-prompt.md` requires nontrivial completion to create a successor todo or a no-follow-up rationale."), - ("CLI validation", "`examples/todo-lifecycle-cli-smoke.py` covers `--next-agent-todo`, `todo supersede`, claim inheritance, and handoff successors."), + ("CLI validation", "`examples/todo-lifecycle-cli-smoke.py` covers `--next-agent-todo` successors, `todo supersede`, claim inheritance, handoff successors, same-agent handoff rejection, and side-agent self-merge evidence."), ("Review shape", "`loopx review-packet` packages open todos, claimed_by, and handoff state for reviewer consumption."), ], "mechanism": [ @@ -454,11 +454,11 @@ "zh": { "context": [ "这个案例展示 P0 被用户决策卡住时,系统不应该继续硬跑,也不应该让整个目标停摆。原场景是 benchmark rotation:一个 lane 需要大型本地 image,其他 no-upload benchmark work 仍然安全。", - "公开仓库没有暴露原始 benchmark task 或本地 image 名,而是用 synthetic smoke 复现控制面行为。", + "公开仓库没有暴露原始 benchmark task 或本地 image 名,而是用 synthetic smoke 复现控制面行为。用户价值是明确看到一个需要决策的 P0,同时安全 fallback 可以继续,且 gated lane 不消耗额外自动推进预算。", ], "evidence": [ - ("synthetic fixture", "`examples/showcase-0617-blocked-p0-safe-rotation-smoke.py` 构造 P0 user gate、被 gate 阻塞的 P0 agent todo 和 P1 no-upload fallback。"), - ("quota contract", "smoke 断言 `should_run=True`、`requires_user_action=True`、`safe_bypass_allowed=True`、`safe_bypass_kind=scoped_user_gate_fallback`。"), + ("synthetic fixture", "`examples/showcase-0617-blocked-p0-safe-rotation-smoke.py` 复现 P0 user gate、被 gate 阻塞的 P0 agent lane 和 P1 no-upload fallback。"), + ("quota contract", "smoke 固定 `should_run=True`、`requires_user_action=True`、`safe_bypass_allowed=True`、`safe_bypass_kind=scoped_user_gate_fallback` 等关键 contract。"), ("selected fallback", "fixture 选择 `terminal_bench_no_upload`,同时保留 `ale_image` gate 的 user-visible blocker。"), ("rendered evidence", "smoke 检查 markdown 中包含 `scoped_user_gate_fallback` 和 safe no-upload Terminal-Bench rotation。"), ], @@ -479,11 +479,11 @@ "en": { "context": [ "This case shows what should happen when a P0 route is blocked by a user decision: the system should neither keep forcing that lane nor stop the whole goal. The original shape was a benchmark rotation where one lane needed a large local image while other no-upload benchmark work remained safe.", - "The public repository does not expose raw benchmark tasks or local image names. It reproduces the control-plane behavior with a synthetic smoke.", + "The public repository does not expose raw benchmark tasks or local image names. It reproduces the control-plane behavior with a synthetic smoke. The user-facing value is seeing one concrete P0 decision while safe fallback work can continue and the gated lane does not burn automated progress budget.", ], "evidence": [ - ("Synthetic fixture", "`examples/showcase-0617-blocked-p0-safe-rotation-smoke.py` creates a P0 user gate, a P0 agent todo blocked by that gate, and a P1 no-upload fallback."), - ("Quota contract", "The smoke asserts `should_run=True`, `requires_user_action=True`, `safe_bypass_allowed=True`, and `safe_bypass_kind=scoped_user_gate_fallback`."), + ("Synthetic fixture", "`examples/showcase-0617-blocked-p0-safe-rotation-smoke.py` reproduces a P0 user gate, a P0 agent lane blocked by that gate, and a P1 no-upload fallback."), + ("Quota contract", "The smoke pins `should_run=True`, `requires_user_action=True`, `safe_bypass_allowed=True`, `safe_bypass_kind=scoped_user_gate_fallback`, and related fallback evidence."), ("Selected fallback", "The fixture selects `terminal_bench_no_upload` while preserving the `ale_image` gate as the user-visible blocker."), ("Rendered evidence", "The smoke checks markdown for `scoped_user_gate_fallback` and safe no-upload Terminal-Bench rotation."), ], @@ -506,12 +506,12 @@ "zh": { "context": [ "这是 appendix case:它展示非技术创作者/运营者如何用 LoopX 管一个长期 research + planning loop,但还不是 top-card proof,因为没有真实用户公开证据。", - "公开材料完全使用 synthetic data,重点是产品形态:趋势候选、偏好映射、insight board、draft queue、material library、人类反馈和 controlled replan。", + "公开材料完全使用 synthetic data,重点不是证明增长,而是证明产品边界:研究和素材整理可以继续,发布、品牌判断和对外动作必须停在人类 gate。", ], "evidence": [ - ("storyboard", "`creator-ops-fake-data-storyboard.md` 定义七个面板和完整 fake fixture,不需要 live platform access。"), - ("feedback contract", "`creator-ops-feedback-boundary-contract.md` 把 gate decision、preference hint、todo update、boundary correction、reward signal、product improvement note 分开。"), - ("source status", "contract 要求 topic、insight、draft、material item 都有 source status,public repo 默认 `synthetic_demo`。"), + ("storyboard", "`creator-ops-fake-data-storyboard.md` 给出完整 fake fixture,不需要 live platform access,适合公开展示用户旅程而不泄漏真实运营数据。"), + ("feedback contract", "`creator-ops-feedback-boundary-contract.md` 把 gate decision、preference hint、todo update、boundary correction、reward signal 和 product improvement note 分开,避免把偏好误当发布授权。"), + ("source status", "contract 要求 topic、insight、draft、material item 都携带 source status;public repo 默认 `synthetic_demo`,不伪装成真实增长证据。"), ("no autopublish", "publishing 是 hard user gate;safe side work 可以继续,但不能把偏好或 reward 当成发布授权。"), ], "mechanism": [ @@ -533,12 +533,12 @@ "en": { "context": [ "This is an appendix case: it shows how a non-technical creator-operator might use LoopX to manage a long-running research and planning loop, but it is not a top-card proof until real public user evidence exists.", - "The public material uses only synthetic data. The product shape is trend candidates, preference map, insight board, draft queue, material library, human feedback, and controlled replan.", + "The public material uses only synthetic data. The point is not proving growth; it is proving the product boundary: research and material organization can continue, while publishing, brand judgment, and external action remain human gates.", ], "evidence": [ - ("Storyboard", "`creator-ops-fake-data-storyboard.md` defines seven panels and a full fake fixture with no live platform access."), - ("Feedback contract", "`creator-ops-feedback-boundary-contract.md` separates gate decision, preference hint, todo update, boundary correction, reward signal, and product improvement note."), - ("Source status", "The contract requires every topic, insight, draft, and material item to carry source status; public repo defaults to `synthetic_demo`."), + ("Storyboard", "`creator-ops-fake-data-storyboard.md` provides a complete fake fixture with no live platform access, suitable for showing the user journey without exposing real operations data."), + ("Feedback contract", "`creator-ops-feedback-boundary-contract.md` separates gate decisions, preference hints, todo updates, boundary corrections, reward signals, and product improvement notes so preference is not mistaken for publish approval."), + ("Source status", "The contract requires every topic, insight, draft, and material item to carry source status; the public repo defaults to `synthetic_demo` instead of pretending to show real growth evidence."), ("No autopublish", "Publishing is a hard user gate; safe side work may continue, but preference or reward is not publication approval."), ], "mechanism": [ @@ -583,6 +583,13 @@ "open": "Open", "demo": "Demo", "search": "Search showcase cases", + "experimental_title": "Experimental today-value path", + "experimental_intro": "A lower-priority entry point for users who want to pick one useful LoopX capability today without replacing the showcase first screen.", + "experimental_rows": [ + ("PR review/comment -> fix loop", "Branch-ready fix packet with repro, smoke result, and remaining review owner.", "Fewer dropped review threads."), + ("Overnight PR-sized refactor", "Reviewable slice list, validation notes, successor todo, and merge boundary.", "More merged commits without a giant diff audit."), + ("P0 blocked -> safe fallback", "Kernel projection of the exact user gate, safe fallback todo, quota decision, and evidence boundary.", "Less idle agent time while preserving human judgment."), + ], "footer": "Generated from docs/showcases/showcase-catalog.json. Private links, raw chats, local state, and internal media are excluded.", }, "zh": { @@ -607,6 +614,13 @@ "open": "打开", "demo": "Demo", "search": "搜索 showcase 案例", + "experimental_title": "Experimental today-value path", + "experimental_intro": "一个放在首屏下方的实验性入口:帮助用户从三个 LoopX 能力里选择今天就能产生价值的一项。", + "experimental_rows": [ + ("PR review/comment -> fix loop", "可复核的修复包:repro、smoke 结果、剩余 review owner。", "更少遗漏 review 线程。"), + ("Overnight PR-sized refactor", "可 review 的 slice 列表、验证记录、后续 todo、merge 边界。", "增加可合并 commit,避免巨型 diff。"), + ("P0 blocked -> safe fallback", "已有 goal 内由 kernel 投影具体 user gate、安全 fallback todo、quota 决策和证据边界。", "减少 agent 空转,同时保留人类判断。"), + ], "footer": "由 docs/showcases/showcase-catalog.json 生成。不包含私有链接、原始聊天、本地状态或内部媒体。", }, } @@ -816,6 +830,8 @@ def css() -> str: .metric{border:1px solid rgba(255,255,255,.1);border-radius:10px;padding:17px 19px;background:#0c0c0e} .metric strong{display:block;font-size:24px;letter-spacing:-.02em;color:#fafafa} .metric span{display:block;margin-top:5px;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;text-transform:uppercase} + .mini-metrics{display:flex;flex-wrap:wrap;gap:8px;margin-top:13px} + .mini-metrics span{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#aeb3ba;border:1px solid rgba(255,255,255,.1);border-radius:5px;padding:4px 8px;background:#0b0b0c} .search{width:100%;height:42px;margin:22px 0 16px;border:1px solid rgba(255,255,255,.12);border-radius:8px;background:#0e0e10;color:#f1f2f3;padding:0 12px;font:14px 'Geist',system-ui,sans-serif} .cards{display:flex;flex-direction:column;gap:12px} .card{display:block;text-decoration:none;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:18px 20px} @@ -823,8 +839,16 @@ def css() -> str: .card .meta{font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;color:#62666d;margin-bottom:8px} .card p{font-size:14px;margin-top:9px} .hide{display:none} + .experiment{margin-top:24px;border:1px solid rgba(255,255,255,.1);border-radius:10px;background:#0e0e10;padding:20px} + .experiment h3{font-size:18px} + .experiment p{font-size:14px;margin-top:8px} + .experiment-grid{display:grid;grid-template-columns:repeat(3,minmax(0,1fr));gap:12px;margin-top:16px} + .experiment-card{border:1px solid rgba(255,255,255,.1);border-radius:8px;background:#0b0b0c;padding:14px} + .experiment-card strong{display:block;font-size:14px;line-height:1.45} + .experiment-card span{display:block;margin-top:9px;font-size:13px;line-height:1.55;color:#9ea3aa} + .experiment-card em{display:block;margin-top:10px;font-style:normal;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.55;color:var(--accent)} footer{margin-top:76px;color:#62666d;font-family:'Geist Mono',ui-monospace,monospace;font-size:10.5px;line-height:1.7} - @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} + @media(max-width:720px){article{padding:52px 18px 90px}.panel-row{grid-template-columns:1fr}.panel-key{border-right:0;border-bottom:1px solid rgba(255,255,255,.07)}.metric-grid,.evidence-grid,.experiment-grid{grid-template-columns:1fr}.source-ref{grid-template-columns:1fr;gap:6px}} """ @@ -880,11 +904,37 @@ def control_diagram(case: dict[str, Any], lang: str) -> str: """ -def metrics(case: dict[str, Any]) -> str: +def evidence_metric_cards(case: dict[str, Any], lang: str) -> list[tuple[str, str]]: + raw_metrics = case.get("evidence_metrics") + cards: list[tuple[str, str]] = [] + if not isinstance(raw_metrics, list): + return cards + for item in raw_metrics: + if not isinstance(item, dict): + continue + value = item.get("value") + labels = item.get("labels") + label = "" + if isinstance(labels, dict): + label = str(labels.get(lang) or labels.get("en") or "") + if not label: + label = str(item.get("label") or "") + if value is None or not label: + continue + cards.append((str(value), label)) + return cards + + +def metrics(case: dict[str, Any], lang: str) -> str: + cards = evidence_metric_cards(case, lang) + if cards: + return '
' + "".join( + f'
{esc(value)}{esc(label)}
' + for value, label in cards[:4] + ) + "
" workload = case.get("workload_signal") if not isinstance(workload, dict): return "" - cards: list[tuple[str, str]] = [] public_git = workload.get("public_git") if isinstance(workload.get("public_git"), dict) else {} whole = workload.get("whole_repository") if isinstance(workload.get("whole_repository"), dict) else {} window = workload.get("window") if isinstance(workload.get("window"), dict) else {} @@ -892,15 +942,15 @@ def metrics(case: dict[str, Any]) -> str: estimated = model.get("estimated_developer_days") if isinstance(model.get("estimated_developer_days"), dict) else {} compression = model.get("single_engineer_calendar_compression") if isinstance(model.get("single_engineer_calendar_compression"), dict) else {} if public_git.get("merged_commits") is not None: - cards.append((str(public_git["merged_commits"]), "merged PR commits")) + cards.append((str(public_git["merged_commits"]), "merged PR commits" if lang == "en" else "merged PR commits")) if window.get("hours") is not None: - cards.append((f"{window['hours']}h", "public window")) + cards.append((f"{window['hours']}h", "public window" if lang == "en" else "public window")) if whole.get("commit_count") is not None: - cards.append((str(whole["commit_count"]), "public commits")) + cards.append((str(whole["commit_count"]), "public commits" if lang == "en" else "public commits")) if estimated.get("low") and estimated.get("high"): - cards.append((f"{estimated['low']}-{estimated['high']}d", "AI-assisted baseline")) + cards.append((f"{estimated['low']}-{estimated['high']}d", "AI-assisted baseline" if lang == "en" else "AI-assisted baseline")) if compression.get("low") and compression.get("high"): - cards.append((f"{compression['low']}-{compression['high']}x", "calendar compression")) + cards.append((f"{compression['low']}-{compression['high']}x", "calendar compression" if lang == "en" else "calendar compression")) if not cards: return "" return '
' + "".join( @@ -961,7 +1011,7 @@ def render_case_page(case: dict[str, Any], lang: str, primary: bool) -> str:
{esc(ui(lang, "proof"))}

{esc(table["proof_point"])}

{esc(ui(lang, "intervention"))}

{esc(table["loopx_intervention"])}

- {metrics(case)} + {metrics(case, lang)} {evidence_block}
03

{esc(ui(lang, "behavior"))}

@@ -989,19 +1039,46 @@ def index_card(case: dict[str, Any], current: Path, lang: str) -> str: headline = localized(case, lang, "headline") table = table_for(case, lang) tags = first_items(case.get("pattern_tags"), 4) - search = " ".join([title, headline, table["proof_point"], table["loopx_intervention"], *tags]).lower() + metric_cards = evidence_metric_cards(case, lang) + metric_terms = [f"{value} {label}" for value, label in metric_cards] + search = " ".join([title, headline, table["proof_point"], table["loopx_intervention"], *tags, *metric_terms]).lower() canonical = f'{esc(ui(lang, "canonical"))}' if str(case.get("id")) == HARDWARE_CASE_ID and lang == "zh" else "" + metric_line = "" + if metric_cards: + metric_line = '
' + "".join( + f"{esc(value)} · {esc(label)}" for value, label in metric_cards[:3] + ) + "
" return f"""
{esc(case.get("date") or "")} · {esc(case.get("status") or "")}

{esc(title)}

{esc(headline)}

{esc(ui(lang, "proof"))}: {esc(table["proof_point"])}

+ {metric_line}
{badges(tags)}{canonical}
""" +def experimental_lane(lang: str) -> str: + cards = "\n".join( + f""" +
+ {esc(title)} + {esc(output)} + {esc(value)} +
""" + for title, output, value in ui(lang, "experimental_rows") + ) + return f""" +
+

{esc(ui(lang, "experimental_title"))}

+

{esc(ui(lang, "experimental_intro"))}

+
{cards} +
+
""" + + def render_index(cases: list[dict[str, Any]], lang: str) -> str: primary, appendix = ordered_cases(cases) current = index_path(lang) @@ -1025,6 +1102,7 @@ def render_index(cases: list[dict[str, Any]], lang: str) -> str:
01

{esc(ui(lang, "top_cases"))}

{primary_cards}
+ {experimental_lane(lang)}
02

{esc(ui(lang, "appendix"))}

{appendix_cards}