From 6a46daa548a63f4ab9544b050d53b8ea249e3a00 Mon Sep 17 00:00:00 2001 From: Kasper Junge Date: Tue, 9 Jun 2026 13:07:43 +0200 Subject: [PATCH] feat: add ralph-cli skill and replace Aider with Pi so agents can drive ralph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an in-repo ralph-cli agent skill (modeled on agr-cli) covering scaffold, run, RALPH.md authoring, agent selection, debugging, and the Python API, and register it in agr.toml. Replace every Aider agent suggestion across docs, examples, llms files, and the skill with Pi (pi.dev) — Pi reads piped stdin natively, so the config is just `pi -p -a` with no bash wrapper. Co-Authored-By: Claude Opus 4.8 --- README.md | 4 +- agr.lock | 4 + agr.toml | 1 + docs/cli.md | 12 +- .../2026-06-09-ralph-cli-skill-and-pi-swap.md | 142 +++++++++++ docs/index.md | 2 +- docs/llms-full.txt | 35 ++- docs/llms.txt | 6 +- examples/optimize-docs/RALPH.md | 2 +- skills/ralph-cli/SKILL.md | 227 ++++++++++++++++++ skills/ralph-cli/references/agents.md | 77 ++++++ skills/ralph-cli/references/python-api.md | 165 +++++++++++++ skills/ralph-cli/references/running.md | 84 +++++++ skills/ralph-cli/references/the-loop.md | 54 +++++ .../ralph-cli/references/troubleshooting.md | 89 +++++++ skills/ralph-cli/references/writing-ralphs.md | 103 ++++++++ tests/test_agent.py | 2 +- tests/test_console_emitter.py | 22 +- 18 files changed, 988 insertions(+), 43 deletions(-) create mode 100644 docs/diary/2026-06-09-ralph-cli-skill-and-pi-swap.md create mode 100644 skills/ralph-cli/SKILL.md create mode 100644 skills/ralph-cli/references/agents.md create mode 100644 skills/ralph-cli/references/python-api.md create mode 100644 skills/ralph-cli/references/running.md create mode 100644 skills/ralph-cli/references/the-loop.md create mode 100644 skills/ralph-cli/references/troubleshooting.md create mode 100644 skills/ralph-cli/references/writing-ralphs.md diff --git a/README.md b/README.md index c527f91a..bd213d79 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ ralph run grow-coverage # loops until Ctrl+C That's loop engineering in three lines: a prompt, a command for live data, and the runtime. Each iteration starts with a **fresh context window** and **current data** — ralphify runs the commands, fills in the `{{ placeholders }}`, pipes the prompt to your agent, and loops. Walk away, come back to a pile of commits. -*Works with any agent CLI. Swap `claude -p` for Codex, Aider, or your own — just change the `agent` field.* +*Works with any agent CLI. Swap `claude -p` for Codex, Pi, or your own — just change the `agent` field.* ## Install @@ -169,7 +169,7 @@ Full docs at **[ralphify.co/docs](https://ralphify.co/docs/)** — getting start ## Requirements - Python 3.11+ -- An agent CLI that accepts piped input ([Claude Code](https://docs.anthropic.com/en/docs/claude-code), Codex, Aider, or your own) +- An agent CLI that accepts piped input ([Claude Code](https://docs.anthropic.com/en/docs/claude-code), Codex, Pi, or your own) ## License diff --git a/agr.lock b/agr.lock index 1fa43c5e..6eccd1ea 100644 --- a/agr.lock +++ b/agr.lock @@ -38,6 +38,10 @@ installed-name = "code-review" path = "skills/ralphify-release" installed-name = "ralphify-release" +[[skill]] +path = "./skills/ralph-cli" +installed-name = "ralph-cli" + [[ralph]] path = "ralphs/bug-hunter" installed-name = "bug-hunter" diff --git a/agr.toml b/agr.toml index 6daa1023..c2118592 100644 --- a/agr.toml +++ b/agr.toml @@ -28,6 +28,7 @@ dependencies = [ {path = "ralphs/bug-hunter", type = "ralph"}, {path = "skills/ralphify-release", type = "skill"}, {handle = "computerlovetech/ralphs/improve-codebase", type = "ralph"}, + {path = "./skills/ralph-cli", type = "skill"}, ] [[source]] diff --git a/docs/cli.md b/docs/cli.md index 1dc2d760..dd327461 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -118,7 +118,7 @@ The compact peek panel shows the ten most recent activity lines. Press **shift+P | `[` / `]` | Browse to the previous / next iteration | | `q` or `P` | Exit back to the compact view | -Live streaming works with line-buffered agents such as Claude Code, OpenAI Codex, and Aider. Agents that repaint their own terminal UI (full-screen curses or TUI apps) are not supported — ralphify pipes their stdio, so they detect a non-TTY and fall back to plain output. If lines appear in bursts rather than as produced, set `PYTHONUNBUFFERED=1` (or the equivalent) in the environment where you launch `ralph`. +Live streaming works with line-buffered agents such as Claude Code, OpenAI Codex, and Pi. Agents that repaint their own terminal UI (full-screen curses or TUI apps) are not supported — ralphify pipes their stdio, so they detect a non-TTY and fall back to plain output. If lines appear in bursts rather than as produced, set `PYTHONUNBUFFERED=1` (or the equivalent) in the environment where you launch `ralph`. ### `ralph scaffold` @@ -238,7 +238,7 @@ Ralphify works with **any CLI that reads a prompt from stdin and exits when done | Agent | Stdin support | Streaming | Wrapper needed | |---|---|---|---| | [Claude Code](#claude-code) | Native (`-p`) | Yes — real-time activity tracking | No | -| [Aider](#aider) | Via bash wrapper | No | Yes (`bash -c`) | +| [Pi](#pi) | Native (`-p`) | No | No | | [Codex CLI](#codex-cli) | Native (`exec`) | No | No | | [Custom](#custom-wrapper-script) | You implement it | No | Yes (script) | @@ -254,17 +254,17 @@ agent: claude -p --dangerously-skip-permissions `-p` enables non-interactive mode (reads prompt from stdin, prints output, exits). `--dangerously-skip-permissions` skips approval prompts so the agent works autonomously — without it, the agent would hang forever waiting for approval that nobody is there to give. Install with `npm install -g @anthropic-ai/claude-code`. When the command starts with `claude`, ralphify automatically adds `--output-format stream-json --verbose` for activity tracking and result-text extraction. -### Aider +### Pi -[Aider](https://aider.chat) doesn't natively read prompts from stdin, so wrap it with `bash -c` and `cat -`: +[Pi](https://pi.dev) is a minimal coding-agent CLI. Its print mode reads the piped prompt from stdin natively, so no wrapper is needed: ```markdown --- -agent: bash -c 'aider --yes-always --no-auto-commits --message "$(cat -)"' +agent: pi -p -a --- ``` -`--yes-always` auto-approves changes; `--no-auto-commits` lets your prompt control commits. Add `--model claude-sonnet-4-6` (or another) to pick a model. +`-p` enables print mode (non-interactive: reads the piped prompt from stdin, prints the response, exits). `-a` (`--approve`) trusts the project-local Pi config and extensions for the run so the loop runs unattended. Install with `npm install -g --ignore-scripts @earendil-works/pi-coding-agent` or `curl -fsSL https://pi.dev/install.sh | sh`. ### Codex CLI diff --git a/docs/diary/2026-06-09-ralph-cli-skill-and-pi-swap.md b/docs/diary/2026-06-09-ralph-cli-skill-and-pi-swap.md new file mode 100644 index 00000000..74a19160 --- /dev/null +++ b/docs/diary/2026-06-09-ralph-cli-skill-and-pi-swap.md @@ -0,0 +1,142 @@ +# Diary: ralph-cli skill + replace Aider with Pi + +Two related pieces of work in one session: (1) author a new `ralph-cli` agent +skill modeled on the existing `agr-cli` skill, install it via agr; (2) replace +every Aider suggestion across the project with Pi (pi.dev). + +## Step 1: Create the ralph-cli skill + +**Author:** main + +### Prompt Context + +**Verbatim prompt:** "look in ../agr/skills/agr-cli . I want to make a similar +skill for ralphif ralph-cli. Please write it and use agr to install it." +**Interpretation:** Study the `agr-cli` SKILL.md + references as a template, and +produce an analogous skill that teaches an agent to operate the `ralph` CLU / +ralphify, then register it with `agr add`. +**Inferred intent:** Ship a first-class, in-repo skill so any agent working in +ralphify (or a downstream consumer) can drive `ralph` correctly — scaffold, +write RALPH.md, run loops, pick agents, debug, use the Python API. + +### What I did +Read `/Users/kasperjunge/Code/agr/skills/agr-cli/SKILL.md` to learn the +structure (frontmatter description, "When to use", mental model, numbered +workflows, boundaries, references index). Grounded the content in the real CLI +surface by running `ralph --help`, `ralph run --help`, `ralph scaffold --help` +and reading `/docs/cli.md` and the example `/ralphs/example/RALPH.md`. + +Scaffolded with `agr init ralph-cli`, moved it under `/skills/ralph-cli/`, and +wrote `SKILL.md` plus six reference files: `writing-ralphs.md`, `running.md`, +`the-loop.md`, `agents.md`, `troubleshooting.md`, `python-api.md`. Registered it +with `agr add ./skills/ralph-cli`, which added `{path = "./skills/ralph-cli", +type = "skill"}` to `/agr.toml` and installed a copy into +`/.claude/skills/ralph-cli`. + +### Why +The user explicitly asked for a sibling skill to `agr-cli`. Splitting depth into +`references/` keeps the top-level SKILL.md scannable while letting an agent load +detail on demand — the same pattern agr-cli uses. + +### What worked +`ralph --help` and the existing `docs/cli.md` gave a complete, authoritative +surface, so the skill didn't need guesswork. `agr init` + `agr add ./path` +registered the in-repo skill cleanly on the first try. + +### What didn't work +First `Write` of SKILL.md failed: "File has not been read yet" — `agr init` had +already created a scaffold SKILL.md, so it had to be `Read` before overwriting. +Re-read, then wrote successfully. + +### What I learned +ralphify ships ralphs as an agr resource `type = "ralph"` (visible in +`agr.toml`), distinct from `type = "skill"`. The skill's boundaries call this +out so it defers install/sync mechanics to the `agr-cli` skill and keeps its own +scope on authoring + running ralphs. + +### What was tricky +Scoping the description's trigger terms. "ralph", "RALPH.md", and "loop +engineering" are extremely common in this very repo, so the skill will activate +often here. I flagged that to the user rather than silently narrowing it. + +### What warrants review +The SKILL.md description trigger breadth (over-activation risk in-repo). And the +factual accuracy of `references/python-api.md` against the actual `ralphify` +package exports — it was derived from `docs/cli.md`, which should be canonical, +but worth a sanity check. + +### Future work +None required; the user immediately moved on to the Pi swap. + +## Step 2: Replace Aider with Pi across the project + +**Author:** main + +### Prompt Context + +**Verbatim prompt:** "remove suggestions about aider across the project. +Replace wiht Pi from pi.dev" +**Interpretation:** Every place the docs/skill/examples suggest Aider as an +agent option should instead suggest Pi (the coding agent at pi.dev), with +correct invocation. +**Inferred intent:** Standardize the recommended non-Claude/non-Codex agent on +Pi everywhere a reader might copy it. + +### What I did +`grep -rin aider` to enumerate hits, then looked up Pi's real invocation via +WebFetch/WebSearch against pi.dev and `earendil-works/pi` docs before writing +any config — established that Pi's print mode (`-p`) reads piped stdin natively +and that `-a`/`--approve` trusts project-local config for unattended runs. + +Replaced Aider with Pi in: `/README.md`, `/docs/index.md`, `/docs/cli.md` (agent +table row + the full `### Aider` → `### Pi` section), `/docs/llms.txt`, +`/docs/llms-full.txt` (taglines, table, the full Aider section incl. the +"specific model" variant and the standalone test snippet), +`/examples/optimize-docs/RALPH.md`, and the `ralph-cli` skill +(`SKILL.md`, `references/agents.md`, `references/running.md`). Used `sed` for the +one-line list swaps and `Edit` for the structural sections. + +In `/tests/test_agent.py` and `/tests/test_console_emitter.py`, "aider" was only +a generic non-Claude placeholder agent in fixtures, so I swapped it to "pi" +(still non-Claude, non-stream-json — behavior unchanged). Left the two mentions +in `/tasks/done/` alone — those are archived completed-task write-ups, i.e. +historical narrative, not user-facing suggestions. + +Reinstalled the skill with `agr add ./skills/ralph-cli --overwrite`. + +### Why +The user wanted Aider gone "across the project." Pi is a genuine improvement to +document because, unlike Aider, it needs no `bash -c '... "$(cat -)"'` wrapper — +the config collapses to `agent: pi -p -a`. + +### What worked +`pytest tests/test_agent.py tests/test_console_emitter.py` → 229 passed, 1 +xpassed. `mkdocs build --strict` → clean, zero warnings. Final +`grep -rin aider` showed only the intentional `tasks/done/` leftovers. + +### What didn't work +One `Edit` on `docs/llms-full.txt` failed with "File has been modified since +read" (a linter/formatter touched it between my Read and Edit). Re-read the +exact line range and re-applied — succeeded. + +### What I learned +Pi install is `npm install -g --ignore-scripts @earendil-works/pi-coding-agent` +or `curl -fsSL https://pi.dev/install.sh | sh`. Print mode merges piped stdin +into the prompt, and `--approve`/`-a` governs trust of project-local +inputs/extensions (not tool-action auto-approval — non-interactive print mode is +inherently non-prompting). I verified this rather than guessing flags. + +### What was tricky +Deciding scope of "across the project." I included the generated-but-committed +`llms*.txt` mirrors and the test fixtures, but deliberately excluded +`tasks/done/` history. Called that boundary out to the user. + +### What warrants review +The `pi -p -a` recommendation — confirm `-a` is the desired default for +unattended loops vs. relying on a saved trust decision. Also the `llms-full.txt` +Pi section, which I rewrote by hand since there's no generator script in the repo +(`scripts/` only has `tui_dev/`). + +### Future work +If ralphify later adds a generator for `llms.txt`/`llms-full.txt`, the +hand-edited Pi content should be reconciled with it. diff --git a/docs/index.md b/docs/index.md index 2b2e4f07..668661cb 100644 --- a/docs/index.md +++ b/docs/index.md @@ -47,7 +47,7 @@ ralph run grow-coverage # loops until Ctrl+C Each iteration starts with a **fresh context window** and **current data** — ralphify runs the commands, fills in the `{{ placeholders }}`, pipes the prompt to your agent, and loops. -*Works with any agent CLI. Swap `claude -p` for Codex, Aider, or your own — just change the `agent` field.* +*Works with any agent CLI. Swap `claude -p` for Codex, Pi, or your own — just change the `agent` field.* [Get Started](getting-started.md){ .md-button .md-button--primary } [The ralph loops format](https://ralphloops.io/){ .md-button } diff --git a/docs/llms-full.txt b/docs/llms-full.txt index 031b6b1c..cece5966 100644 --- a/docs/llms-full.txt +++ b/docs/llms-full.txt @@ -43,7 +43,7 @@ ralph run grow-coverage # loops until Ctrl+C One directory. One command. Each iteration starts with fresh context and current data — ralphify runs the commands, fills in `{{ placeholders }}`, pipes the prompt to your agent, and loops. -Works with any agent CLI. Swap `claude -p` for Codex, Aider, or your own — just change the `agent` field. +Works with any agent CLI. Swap `claude -p` for Codex, Pi, or your own — just change the `agent` field. ## Install @@ -586,7 +586,7 @@ This page shows how to configure the `agent` field in your RALPH.md for popular | Agent | Stdin support | Streaming | Wrapper needed | |---|---|---|---| | [Claude Code](#claude-code) | Native (`-p`) | Yes — real-time activity tracking | No | -| [Aider](#aider) | Via bash wrapper | No | Yes (`bash -c`) | +| [Pi](#pi) | Native (`-p`) | No | No | | [Codex CLI](#codex-cli) | Native (`exec`) | No | No | | [Custom](#custom-wrapper-script) | You implement it | No | Yes (script) | @@ -643,33 +643,32 @@ This enables ralphify to: - Track agent activity in real time - Extract the final result text from the agent's response -## Aider +## Pi -[Aider](https://aider.chat) is an AI pair-programming tool that works with multiple LLM providers. +[Pi](https://pi.dev) is a minimal coding-agent CLI. Its print mode reads the piped prompt from stdin natively, so no wrapper is needed. ```markdown --- -agent: bash -c 'aider --yes-always --no-auto-commits --message "$(cat -)"' +agent: pi -p -a --- ``` | Flag | Purpose | |---|---| -| `--yes-always` | Auto-approve all changes (no interactive prompts) | -| `--no-auto-commits` | Let your prompt control when commits happen | -| `--message "..."` | Pass the prompt as a message instead of stdin | +| `-p` | Print mode — non-interactive: reads the piped prompt from stdin, prints output, exits | +| `-a` / `--approve` | Trust the project-local Pi config and extensions for the run so the loop runs unattended | -Why the bash wrapper? +Install Pi: -Aider doesn't natively read prompts from stdin. The `bash -c` wrapper reads stdin with `cat -` and passes it as a `--message` argument. +```bash +npm install -g --ignore-scripts @earendil-works/pi-coding-agent +# or +curl -fsSL https://pi.dev/install.sh | sh +``` -### Aider with a specific model +Why `-p`? -```markdown ---- -agent: bash -c 'aider --yes-always --no-auto-commits --model claude-sonnet-4-6 --message "$(cat -)"' ---- -``` +Print mode is non-interactive — it reads the assembled prompt straight from stdin and exits when done, exactly what an autonomous loop needs. Because Pi reads stdin natively, there's no `bash -c` wrapper to write. ## Codex CLI @@ -731,7 +730,7 @@ Verify the agent works outside of ralphify first. The command depends on which a ``` ```bash - echo "Say hello and nothing else" | bash -c 'aider --yes-always --no-auto-commits --message "$(cat -)"' + echo "Say hello and nothing else" | pi -p -a ``` ```bash @@ -1935,7 +1934,7 @@ The loop also stops automatically when: When you run `ralph run` in an interactive terminal, the agent's stdout and stderr stream live to the console by default. Press `p` to silence the stream (useful for quieter loops) and press `p` again to resume it. The default is off whenever the output is not a real terminal (piped, redirected, or captured in CI), so `ralph run ... | cat` is unaffected. -Live streaming works with line-buffered agents such as Claude Code, OpenAI Codex, Aider, and any other process that writes one line at a time. When `--log-dir` is set, output is captured to the log file and also echoed after each iteration completes; live peek still works the same way in that mode. +Live streaming works with line-buffered agents such as Claude Code, OpenAI Codex, Pi, and any other process that writes one line at a time. When `--log-dir` is set, output is captured to the log file and also echoed after each iteration completes; live peek still works the same way in that mode. --- diff --git a/docs/llms.txt b/docs/llms.txt index eb6e1bed..eeb002f3 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -40,7 +40,7 @@ ralph run my-ralph # loop until Ctrl+C ralph run my-ralph -n 5 # run 5 iterations ``` -Works with any agent CLI — swap `claude -p` for Codex, Aider, or your own. +Works with any agent CLI — swap `claude -p` for Codex, Pi, or your own. ## Why a format @@ -52,7 +52,7 @@ Everyone writing ralph loops ends up with the same scaffolding: a markdown promp - **Live steering**: Edit `RALPH.md` while the loop runs. Changes take effect on the next iteration. - **Fresh context every cycle**: No conversation bloat or hallucinated memories. The agent reads current codebase state each time. - **Progress in git**: Every iteration commits to git. Roll back with `git reset` if needed. -- **Any agent**: Works with Claude Code, Aider, Codex CLI, or any CLI that reads stdin. +- **Any agent**: Works with Claude Code, Pi, Codex CLI, or any CLI that reads stdin. - **Shareable ralphs**: Ralphs are just directories — share via git repos, install with [agr](https://github.com/computerlovetech/agr). ## Placeholder types @@ -80,7 +80,7 @@ Embed the loop in automation pipelines. See the API docs for details. - [Getting Started](https://ralphify.co/docs/getting-started/): Install to running loop in 10 minutes - [How it Works](https://ralphify.co/docs/how-it-works/): Iteration lifecycle details -- [Using with Different Agents](https://ralphify.co/docs/agents/): Claude Code, Aider, Codex CLI setup +- [Using with Different Agents](https://ralphify.co/docs/agents/): Claude Code, Pi, Codex CLI setup - [Cookbook](https://ralphify.co/docs/cookbook/): Copy-pasteable recipes for coding, docs, research - [CLI Reference](https://ralphify.co/docs/cli/): All commands and options - [Python API](https://ralphify.co/docs/api/): Programmatic usage diff --git a/examples/optimize-docs/RALPH.md b/examples/optimize-docs/RALPH.md index 8fe5dbe6..fb5027df 100644 --- a/examples/optimize-docs/RALPH.md +++ b/examples/optimize-docs/RALPH.md @@ -62,7 +62,7 @@ content. Then: developers actually type - Add clear `description` and `keywords` to frontmatter — base these on actual search intent, not just the page content -- Research what terms competing tools (aider, cursor, claude-code, +- Research what terms competing tools (pi, cursor, claude-code, copilot) rank for and use relevant ones - Target long-tail keywords around specific tasks ("run AI agent in loop", "automate coding agent prompts") diff --git a/skills/ralph-cli/SKILL.md b/skills/ralph-cli/SKILL.md new file mode 100644 index 00000000..e823d046 --- /dev/null +++ b/skills/ralph-cli/SKILL.md @@ -0,0 +1,227 @@ +--- +name: ralph-cli +description: > + Build, run, and debug autonomous AI coding loops with the ralph CLI from + ralphify (https://ralphify.co). Use whenever the user mentions ralphify, + ralph, RALPH.md, "ralph loop", "loop engineering", `ralph run`, `ralph + scaffold`, or asks to: set up an autonomous agent loop, run a coding agent + (Claude Code, Codex, …) repeatedly with fresh context, write or fix a + RALPH.md frontmatter/prompt, wire commands and {{ placeholders }} into a + loop, choose or configure an agent for a loop, run loops concurrently. +--- + +# ralph CLI + +Ralphify is the open-source framework for **loop engineering**: running an AI +coding agent in an autonomous loop, piping it a prompt, capturing fresh dynamic +data each iteration, and repeating with a clean context window. The CLI is +`ralph`. This skill helps you operate it on the user's behalf — scaffold +ralphs, write and fix `RALPH.md`, run loops, pick agents, and reach for the +Python API when programmatic control is needed. + +## When to use + +Use this skill when the user wants to: + +- Scaffold a new ralph (`ralph scaffold my-task`). +- Run an autonomous loop (`ralph run my-ralph -n 5`). +- Write or fix a `RALPH.md` — frontmatter (`agent`, `commands`, `args`), + the prompt body, or `{{ placeholders }}`. +- Wire a command's output into the prompt, or pass user arguments. +- Pick or configure an agent (Claude Code, Codex CLI, custom wrapper). +- Debug a loop that hangs, fails every iteration, or doesn't commit. +- Run loops concurrently, or drive the loop from Python (`run_loop`, + `RunConfig`, `RunManager`, the event system). +- Or whenever a `RALPH.md` is present and the user is doing loop-harness work. + +Do NOT use this skill for: + +- Installing/sharing/syncing *skills* across tools — that's the `agr` CLI + (`agr-cli` skill). Note ralphify ships ralphs as an agr resource type, so + `agr add user/repo/ralph` installs a ralph into `.agents/ralphs/`; the + install/sync mechanics belong to `agr-cli`, the authoring and running of the + ralph itself belong here. + +## Prerequisites + +Verify ralph is installed: + +```bash +ralph --version +``` + +If missing, the standard install is `uv tool install ralphify` (or `pipx +install ralphify`). **Don't install without checking with the user first** — +they may prefer a pinned version. The CLI is `ralph`; the PyPI package is +`ralphify`. + +The loop also needs an **agent CLI** installed and authenticated (Claude Code, +Codex, Pi, …). Claude Code is the default and best-supported: `npm install +-g @anthropic-ai/claude-code`. + +## Mental model — read first + +- A **ralph** is just a directory containing a `RALPH.md` file. No project + config, no special directories. The shape follows the open + [ralph loops format](https://ralphloops.io/). +- **`RALPH.md`** = YAML frontmatter (settings) + body (the prompt). Frontmatter + fields: `agent` (required), `commands`, `args`, `credit`. +- **The loop**, each iteration: run `commands` → assemble the prompt (resolve + `{{ placeholders }}`) → pipe it to the `agent` via stdin → agent works and + exits → repeat with a **fresh context window**. +- **Fresh context every iteration** is the whole point. The agent has no memory + between iterations; the prompt + live command output *are* its memory. Point + it at durable state on disk (`TODO.md`, `PLAN.md`, failing tests, git log). +- **The self-healing loop**: a command like `pytest` captures failures into the + next iteration's prompt, so the agent sees what it broke and fixes it. +- The **prompt body is re-read every iteration** (edit it live); frontmatter is + read once at startup (restart to change `agent`/`commands`/`args`). + +For the full per-iteration lifecycle: [references/the-loop.md](references/the-loop.md). + +## Workflows + +### 1. Scaffold a new ralph + +```bash +ralph scaffold my-task # creates my-task/RALPH.md from a template +ralph scaffold # creates RALPH.md in the current directory +``` + +The template ships an example command (`git-log`), an example arg (`focus`), +and a body using both. Errors if `RALPH.md` already exists. Then edit the +frontmatter and body for the real task — see +[references/writing-ralphs.md](references/writing-ralphs.md). + +### 2. Run the loop + +```bash +ralph run my-ralph # run forever (Ctrl+C to stop) +ralph run my-ralph -n 5 # run 5 iterations +ralph run my-ralph -s # --stop-on-error: stop if agent exits non-zero/times out +ralph run my-ralph -d 10 # --delay 10s between iterations +ralph run my-ralph -t 300 # --timeout: kill agent after 300s/iteration +ralph run my-ralph -l ralph_logs # --log-dir: save iteration output to files +ralph run my-ralph --focus auth # pass a user arg → {{ args.focus }} +``` + +`PATH` may be a directory with a `RALPH.md`, a direct path to a `RALPH.md`, or +the name of an installed ralph in `.agents/ralphs/`. + +**Always validate a new or edited ralph with `ralph run my-ralph -n 1` first** — +one iteration surfaces setup/frontmatter/agent errors clearly before you commit +to a long run. Add `--log-dir ralph_logs` to capture output for inspection. + +Stopping: first `Ctrl+C` finishes the current iteration then stops; a second +`Ctrl+C` force-kills the agent. Full options and the live-peek keys (`p`, shift+`P`): +[references/running.md](references/running.md). + +### 3. Write / fix a RALPH.md + +The single source of truth. Minimal valid example: + +```markdown +--- +agent: claude -p --dangerously-skip-permissions +commands: + - name: tests + run: uv run pytest -x +args: [focus] +--- + +# Your task + +{{ commands.tests }} + +If tests are failing, fix them before new work. Focus area: {{ args.focus }}. +You are in a loop with no memory — track progress in TODO.md. +``` + +Key rules (full guide: [references/writing-ralphs.md](references/writing-ralphs.md)): + +- Only commands **referenced by a placeholder** appear in the prompt (an + unreferenced command still runs, but its output is dropped). +- `run` is parsed with `shlex.split()` and run **directly, not via a shell** — + no pipes, `>`, `&&`, or `$VAR`. Point it at a `.sh` script instead. +- Commands starting with `./` run from the ralph directory; others run from the + project root. Default per-command `timeout` is 60s. +- Placeholders: `{{ commands. }}`, `{{ args. }}`, and the automatic + `{{ ralph.name }}` / `{{ ralph.iteration }}` / `{{ ralph.max_iterations }}`. +- Named flags (`--focus value`) work without declaring `args`. Declaring + `args: [...]` is only needed to accept **positional** arguments. + +### 4. Pick / configure an agent + +Ralphify works with any CLI that **reads a prompt from stdin and exits when +done**. If unsure, start with Claude Code. + +```markdown +agent: claude -p --dangerously-skip-permissions # Claude Code (default, best support) +agent: codex exec --sandbox danger-full-access - # OpenAI Codex CLI +agent: pi -p -a # Pi (pi.dev) — native stdin, no wrapper +agent: ./ralph-agent.sh # custom wrapper script +``` + +`--dangerously-skip-permissions` (Claude) / auto-approve flags are required — +without them the agent hangs waiting for approval nobody is there to give. +Details and the custom-wrapper pattern: [references/agents.md](references/agents.md). + +### 5. Debug a loop + +Checklist before digging in: + +1. `ralph run my-ralph -n 1` — validates setup, shows clear errors. +2. Test the agent standalone: `echo "Say hello" | claude -p`. +3. `--log-dir ralph_logs` to capture output, then read the log. +4. Remember commands don't support shell features — use a script. + +Common failure-cause map and frontmatter/arg/command error messages: +[references/troubleshooting.md](references/troubleshooting.md). + +### 6. Drive the loop from Python + +For concurrent runs, custom event handling, or embedding in another tool, use +the `ralphify` package: `run_loop(config, state, emitter)`, `RunConfig`, +`Command`, `RunState`, and `RunManager` for concurrency. + +```python +from pathlib import Path +from ralphify import run_loop, RunConfig, RunState, Command + +config = RunConfig( + agent="claude -p --dangerously-skip-permissions", + ralph_dir=Path("my-ralph"), + ralph_file=Path("my-ralph/RALPH.md"), + commands=[Command(name="tests", run="uv run pytest -x")], + max_iterations=3, +) +run_loop(config, RunState(run_id="my-run")) +``` + +Full API surface (events, emitters, `RunManager`): +[references/python-api.md](references/python-api.md). + +## Boundaries + +- **Always validate with `ralph run ... -n 1` before a long run**, and before + recommending a ralph the user will leave running unattended. +- **Long autonomous runs do real work** (edit files, commit, run commands). + Run them on a **dedicated branch** — never on `main` — and confirm scope with + the user before kicking off an unbounded (`ralph run` with no `-n`) loop. +- **Run concurrent loops on separate branches** to avoid git conflicts. +- **Ralphify doesn't commit for the agent.** If the user wants commits, the + *prompt* must instruct it, and the agent must have git permission. +- **Don't add `ralph_logs/`** (or other generated output) to git — gitignore + it. Commit `RALPH.md` and any helper `*.sh` scripts. +- **Don't push or `git tag`** as part of these workflows; let the user push. + +## References + +- [writing-ralphs.md](references/writing-ralphs.md) — RALPH.md frontmatter, commands, placeholders, prompt-writing patterns +- [running.md](references/running.md) — `ralph run` options, stopping, live peek, user arguments +- [the-loop.md](references/the-loop.md) — the six steps of an iteration, what's re-read vs fixed, the self-healing loop +- [agents.md](references/agents.md) — Claude Code, Codex, custom wrappers +- [troubleshooting.md](references/troubleshooting.md) — failure causes and all error messages +- [python-api.md](references/python-api.md) — `run_loop`, `RunConfig`, `Command`, `RunState`, events, `RunManager` + +Canonical docs: https://ralphify.co/docs/ — or run `ralph --help`. diff --git a/skills/ralph-cli/references/agents.md b/skills/ralph-cli/references/agents.md new file mode 100644 index 00000000..19217da4 --- /dev/null +++ b/skills/ralph-cli/references/agents.md @@ -0,0 +1,77 @@ +# Using different agents + +Ralphify works with **any CLI that reads a prompt from stdin and exits when +done**. Every iteration it runs `echo "" | `. +Your agent must read the prompt from stdin, do work in the current directory, +and exit (0 = success, non-zero = failure). + +| Agent | Stdin support | Streaming | Wrapper needed | +|---|---|---|---| +| Claude Code | Native (`-p`) | Yes — real-time activity tracking | No | +| Codex CLI | Native (`exec`) | No | No | +| Pi | Native (`-p`) | No | No | +| Custom | You implement it | No | Yes (script) | + +If unsure, **start with Claude Code** — it's the default and has the deepest +integration. + +## Claude Code + +```markdown +agent: claude -p --dangerously-skip-permissions +``` + +`-p` = non-interactive (read prompt from stdin, print, exit). +`--dangerously-skip-permissions` skips approval prompts so the agent works +autonomously — without it the agent hangs forever waiting for an approval +nobody is there to give. Install: `npm install -g @anthropic-ai/claude-code`. +When the command starts with `claude`, ralphify auto-adds +`--output-format stream-json --verbose` for activity tracking and result-text +extraction. + +## Codex CLI + +```markdown +agent: codex exec --sandbox danger-full-access - +``` + +`exec` is non-interactive mode, `--sandbox danger-full-access` grants full +filesystem access, `-` reads the prompt from stdin. + +## Pi + +[Pi](https://pi.dev) is a minimal coding-agent CLI. Print mode reads the piped +prompt from stdin natively, so no wrapper is needed: + +```markdown +agent: pi -p -a +``` + +`-p` is print mode (non-interactive: read piped stdin as the prompt, print the +response, exit). `-a` (`--approve`) trusts the project-local Pi config and +extensions for the run so the loop runs unattended. Install with `npm install +-g --ignore-scripts @earendil-works/pi-coding-agent` or +`curl -fsSL https://pi.dev/install.sh | sh`. + +## Custom wrapper script + +For anything else, write a script that reads stdin and calls your agent: + +```bash +#!/bin/bash +set -e +PROMPT=$(cat -) +my-custom-agent --input "$PROMPT" --auto-approve +``` + +```markdown +agent: ./ralph-agent.sh +``` + +General pattern for tools that take a prompt via a flag: +`bash -c ' --message "$(cat -)"'`. + +**Test any agent outside ralphify first** (`echo "Say hello" | `), then through ralphify with `ralph run my-ralph -n 1 --log-dir +ralph_logs`. The auto-approve / skip-permissions flag is mandatory for every +agent — an unattended loop can't answer interactive prompts. diff --git a/skills/ralph-cli/references/python-api.md b/skills/ralph-cli/references/python-api.md new file mode 100644 index 00000000..12680d91 --- /dev/null +++ b/skills/ralph-cli/references/python-api.md @@ -0,0 +1,165 @@ +# Python API + +Everything is importable from the top-level `ralphify` package. It runs the +same loop as the CLI, so anything `ralph run` does, you can do from Python. + +```python +from pathlib import Path +from ralphify import run_loop, RunConfig, RunState + +config = RunConfig( + agent="claude -p --dangerously-skip-permissions", + ralph_dir=Path("my-ralph"), + ralph_file=Path("my-ralph/RALPH.md"), + commands=[], + max_iterations=3, +) +run_loop(config, RunState(run_id="my-run")) +``` + +## `run_loop(config, state, emitter=None)` + +The main loop. Reads `RALPH.md`, runs commands, assembles prompts, pipes them to +the agent, repeats. **Blocks until the loop finishes.** + +| Param | Type | Description | +|---|---|---| +| `config` | `RunConfig` | All settings for the run | +| `state` | `RunState` | Observable state — counters, status, control methods | +| `emitter` | `EventEmitter \| None` | Event listener; `None` = `NullEmitter` (silent) | + +## `RunConfig` + +Fields mirror the CLI options. Mutable — change a field mid-run and the loop +picks it up at the next iteration boundary. + +| Field | Type | Default | Description | +|---|---|---|---| +| `agent` | `str` | — | Full agent command string | +| `ralph_dir` | `Path` | — | Ralph directory | +| `ralph_file` | `Path` | — | The RALPH.md file | +| `commands` | `list[Command]` | `[]` | Commands run each iteration | +| `args` | `dict[str, str]` | `{}` | User argument values | +| `max_iterations` | `int \| None` | `None` | Max iterations (`None` = unlimited) | +| `delay` | `float` | `0` | Seconds between iterations | +| `timeout` | `float \| None` | `None` | Max seconds per iteration | +| `stop_on_error` | `bool` | `False` | Stop if agent exits non-zero/times out | +| `log_dir` | `Path \| None` | `None` | Directory for iteration log files | +| `credit` | `bool` | `True` | Append co-author trailer instruction | +| `project_root` | `Path` | `Path(".")` | Project root directory | + +## `Command` + +```python +from ralphify import Command +Command(name="tests", run="uv run pytest -x") +Command(name="integration", run="uv run pytest tests/integration", timeout=300) +``` + +`name` and `run` required; `timeout` defaults to 60s. + +## `RunState` + +Observable state for a running loop; thread-safe control methods. + +| Property | Type | Description | +|---|---|---| +| `run_id` | `str` | Unique run id | +| `status` | `RunStatus` | Lifecycle status | +| `iteration` | `int` | Current iteration (starts at 0) | +| `completed` | `int` | Successful iterations | +| `failed` | `int` | Failed iterations (includes timed out) | +| `timed_out_count` | `int` | Timed-out iterations (subset of `failed`) | +| `total` | `int` | `completed + failed` | +| `started_at` | `datetime \| None` | Start time | +| `paused` | `bool` | Currently paused | +| `stop_requested` | `bool` | Stop requested | + +Control: `state.request_stop()`, `request_pause()`, `request_resume()`. + +`RunStatus`: `PENDING`, `RUNNING`, `PAUSED`, `STOPPED`, `COMPLETED`, `FAILED`. +`FAILED` = stopped by `--stop-on-error` after a failure/timeout, or crashed. + +`StopReason` (in `RUN_STOPPED.reason`): `"completed"`, `"error"`, +`"user_requested"`. + +## Event system + +The loop emits structured events at each step. Implement the `EventEmitter` +protocol (a single `emit(event)` method) to listen. + +```python +from ralphify import Event, EventType, RunConfig, RunState, run_loop + +class MyEmitter: + def emit(self, event: Event) -> None: + if event.type == EventType.ITERATION_COMPLETED: + print(f"Iter {event.data['iteration']} in {event.data['duration_formatted']}") + +run_loop(config, RunState(run_id="observed"), emitter=MyEmitter()) +``` + +Each `Event` carries `type` (`EventType`), `run_id`, `data` (dict), and +`timestamp`. `event.to_dict()` serializes it. + +### `EventType` reference + +| Group | Event | Data fields | +|---|---|---| +| Run | `RUN_STARTED` | `ralph_name`, `commands`, `max_iterations`, `timeout`, `delay` | +| Run | `RUN_STOPPED` | `reason`, `total`, `completed`, `failed`, `timed_out_count` | +| Run | `RUN_PAUSED` / `RUN_RESUMED` | — | +| Iteration | `ITERATION_STARTED` | `iteration` | +| Iteration | `ITERATION_COMPLETED` | `iteration`, `returncode`, `duration`, `duration_formatted`, `detail`, `log_file`, `result_text` | +| Iteration | `ITERATION_FAILED` | same as completed | +| Iteration | `ITERATION_TIMED_OUT` | same (`returncode` is `None`) | +| Commands | `COMMANDS_STARTED` / `COMMANDS_COMPLETED` | `iteration`, `count` | +| Prompt | `PROMPT_ASSEMBLED` | `iteration`, `prompt_length` | +| Agent | `AGENT_ACTIVITY` | `iteration`, `raw` (one stream-json line; Claude Code only) | +| Agent | `AGENT_OUTPUT_LINE` | `iteration`, `line`, `stream` (all agents) | +| Other | `LOG_MESSAGE` | `message`, `level`, `traceback` (optional) | + +### Built-in emitters + +| Emitter | Description | +|---|---| +| `NullEmitter` | Discards all events (default) | +| `QueueEmitter` | Pushes events into a `queue.Queue` | +| `FanoutEmitter` | Broadcasts each event to multiple emitters | +| `BoundEmitter` | Wraps an emitter with a fixed `run_id`; has `log_info`, `log_error`, `agent_output_line` | + +## Concurrent runs with `RunManager` + +Thread-safe registry for launching and controlling multiple loops. Each run +gets its own daemon thread and event queue. + +```python +from pathlib import Path +from ralphify import RunManager, RunConfig, Command + +manager = RunManager() +config = RunConfig( + agent="claude -p --dangerously-skip-permissions", + ralph_dir=Path("docs-ralph"), + ralph_file=Path("docs-ralph/RALPH.md"), + commands=[Command(name="build", run="mkdocs build --strict")], + max_iterations=5, +) +run = manager.create_run(config) +manager.start_run(run.state.run_id) + +for r in manager.list_runs(): + print(f"{r.state.run_id}: {r.state.status.value} — {r.state.completed} done") + +manager.pause_run(run.state.run_id) +manager.resume_run(run.state.run_id) +manager.stop_run(run.state.run_id) +``` + +Each run is a `ManagedRun` bundling `config`, `state`, and a `QueueEmitter`. +Register extra listeners **before** `start_run()` with +`run.add_listener(listener)` — events broadcast to both the queue and your +listeners via a `FanoutEmitter`. + +Methods: `create_run(config)`, `start_run(id)`, `stop_run(id)`, `pause_run(id)`, +`resume_run(id)`, `list_runs()`, `get_run(id)`. diff --git a/skills/ralph-cli/references/running.md b/skills/ralph-cli/references/running.md new file mode 100644 index 00000000..cde59d06 --- /dev/null +++ b/skills/ralph-cli/references/running.md @@ -0,0 +1,84 @@ +# Running the loop — `ralph run` + +```bash +ralph run my-ralph # run forever (Ctrl+C to stop) +ralph run my-ralph -n 5 # run 5 iterations +ralph run my-ralph -s # stop on agent error/timeout +ralph run my-ralph -d 10 # 10s delay between iterations +ralph run my-ralph -t 300 # kill agent after 300s/iteration +ralph run my-ralph -l ralph_logs # save iteration output to log files +``` + +| Arg / Option | Short | Default | Description | +|---|---|---|---| +| `PATH` | | required | Ralph directory with `RALPH.md`, a direct `RALPH.md` path, or an installed ralph name in `.agents/ralphs/` | +| `-n` | | unlimited | Max iterations | +| `--stop-on-error` | `-s` | off | Stop if the agent exits non-zero or times out | +| `--delay` | `-d` | `0` | Seconds between iterations | +| `--timeout` | `-t` | none | Max seconds per iteration (kills the agent if exceeded) | +| `--log-dir` | `-l` | none | Directory for per-iteration log files | + +The top-level CLI also has `ralph --version` / `-V`, `ralph --help`, and +`ralph --install-completion ` for shell completion. + +## Validate first + +Run **one iteration** before any long run: + +```bash +ralph run my-ralph -n 1 --log-dir ralph_logs +``` + +This validates the path, frontmatter, agent availability, and command syntax, +and captures the full output for inspection — far cheaper than discovering a +broken prompt 20 iterations deep. + +## Stopping the loop + +| Action | Behavior | +|---|---| +| `Ctrl+C` (first) | Finish the current iteration gracefully, then stop | +| `Ctrl+C` (second) | Force-kill the agent process and exit immediately | + +The loop also stops when all `-n` iterations finish, or when `--stop-on-error` +is set and an iteration fails/times out. + +## Live peek + +In an interactive terminal the agent's stdout/stderr stream live by default +(off automatically when piped/redirected/CI, so `ralph run … | cat` is clean). + +| Key | Action | +|---|---| +| `p` | Toggle the live peek stream on/off | +| shift+`P` | Enter full-screen scrollable peek | +| `j` / `k` | (in full-screen) scroll down / up one line | +| `space` / `b` | page down / up | +| `g` / `G` | jump to top / bottom (bottom re-enables follow) | +| `[` / `]` | previous / next iteration | +| `q` or `P` | exit back to compact view | + +Real-time activity tracking works with line-buffered agents (Claude Code, +Codex, Pi). Full-screen TUI/curses agents aren't supported — ralphify pipes +their stdio, so they detect a non-TTY and fall back to plain output. If output +arrives in bursts, set `PYTHONUNBUFFERED=1` in the launch environment. + +## User-argument grammar + +```bash +# Named flags work with or without args declared in frontmatter +ralph run research --dir ./my-project --focus "performance" +ralph run research --dir=./my-project --focus="performance" + +# Positional args require args: [dir, focus] in frontmatter +ralph run research ./my-project "performance" + +# Mixed — positional values skip names already given via flags +ralph run research --focus "performance" ./my-project + +# -- ends flag parsing; everything after is positional +ralph run research -- --verbose ./src +``` + +Use `--` when a positional value starts with `--`. Missing args → empty string. +Reference values with `{{ args. }}` in the body or in command `run` strings. diff --git a/skills/ralph-cli/references/the-loop.md b/skills/ralph-cli/references/the-loop.md new file mode 100644 index 00000000..0f12001b --- /dev/null +++ b/skills/ralph-cli/references/the-loop.md @@ -0,0 +1,54 @@ +# How the loop works + +The lifecycle of one iteration — the format you write, and the runtime that +runs it. + +## The six steps of each iteration + +1. **Re-read the prompt from disk.** The body (everything below the + frontmatter) is read **every iteration**, so you can edit the task while the + loop runs. Frontmatter (`agent`, `commands`, `args`) is parsed once at + startup — restart to change it. +2. **Run commands and capture output.** Each command runs in order and captures + stdout + stderr **regardless of exit code**. Commands run from the project + root by default; `./`-prefixed commands run from the ralph directory. Each + has a 60s default timeout (override per command with `timeout`). +3. **Resolve placeholders.** `{{ commands. }}`, `{{ args. }}`, and + `{{ ralph.* }}` are substituted. `{{ args.* }}` resolves both in the body and + in command `run` strings. +4. **Assemble the final prompt.** The resolved body becomes one text string. + HTML comments (``) are stripped. By default a co-author trailer + instruction is appended asking the agent to add + `Co-authored-by: Ralphify ` to commits — disable with + `credit: false`. +5. **Pipe the prompt to the agent** via stdin (`echo "" | `). + The agent works in the current directory and exits; ralphify waits. When the + agent command starts with `claude`, ralphify auto-adds + `--output-format stream-json --verbose` for structured streaming. +6. **Loop back with fresh context.** The next iteration starts at step 1 with a + clean context window and fresh command output. + +## What gets re-read vs. fixed + +| What | When read | Why it matters | +|---|---|---| +| Prompt body | Every iteration | Edit the task live; the next iteration follows it | +| Command output | Every iteration | The agent always sees fresh data (git log, test status) | +| Frontmatter (`agent`, `commands`, `args`) | Once at startup | Restart to change | +| User arguments | Once at startup | Constant for the run | + +## The self-healing feedback loop + +When iteration 1 breaks a test, iteration 2's `{{ commands.tests }}` shows it: + +``` +FAILED tests/test_auth.py::test_login - AssertionError: expected 200, got 401 +====================== 1 failed, 4 passed in 1.45s ======================= + +If tests are failing, fix them before starting new work. +``` + +The agent sees the failure plus the instruction to fix it first. The agent +breaks something, the command captures it, the agent fixes it next iteration. +This is why feeding a test/lint/build command back into the prompt is the +single highest-leverage thing you can do in a ralph. diff --git a/skills/ralph-cli/references/troubleshooting.md b/skills/ralph-cli/references/troubleshooting.md new file mode 100644 index 00000000..646c2a1f --- /dev/null +++ b/skills/ralph-cli/references/troubleshooting.md @@ -0,0 +1,89 @@ +# Troubleshooting + +Quick checklist before digging in: + +1. `ralph run my-ralph -n 1` — validates setup, shows clear errors. +2. Test the agent standalone: `echo "Say hello" | claude -p`. +3. `--log-dir ralph_logs` to capture output, then read the log. +4. Commands don't support shell features (pipes, `&&`) — use a wrapper script. + +## Setup + +- **"is not a directory, RALPH.md file, or installed ralph"** — the path + doesn't resolve. `ralph run` accepts a directory containing `RALPH.md`, a + direct `RALPH.md` path, or an installed ralph name in `.agents/ralphs/`. +- **"Missing or empty 'agent' field"** — add `agent: claude -p + --dangerously-skip-permissions` to the frontmatter. +- **"Agent command 'claude' not found on PATH"** — the agent CLI isn't + installed / on PATH. Verify with `claude --version`. + +## Loop behavior + +- **Agent hangs / no output** — run the agent directly (`echo "Say hello" | + claude -p`). If it hangs there, it's the agent CLI. If it works standalone, + add `--timeout 300` to kill stalled iterations. +- **Agent exits non-zero every iteration** — capture with `-n 1 --log-dir + ralph_logs` and read the log. Common causes: missing agent auth, a prompt + asking for a failing command, or a prompt too large for the context window. +- **Agent runs but doesn't commit** — ralphify doesn't commit for the agent. + Add explicit commit instructions to the prompt and ensure the agent has git + permission (`--dangerously-skip-permissions` for Claude Code). +- **Loop runs too fast / no useful work** — the prompt is too vague or has no + concrete task source. Tell the agent it's in a loop with no memory; point it + at `TODO.md`, `PLAN.md`, or failing tests. +- **Output not streaming** — press `p` to toggle live peek. Streaming is off in + non-TTY environments (piped/CI) by design; use `--log-dir`. If bursty, set + `PYTHONUNBUFFERED=1`. + +## Frontmatter errors + +- **"Invalid YAML in frontmatter"** — missing colon, bad indentation, or an + unquoted special char (`:`, `#`, `{`, `[`). Quote risky values. +- **"Frontmatter must be a YAML mapping"** — use `key: value` pairs, not a bare + string or list. +- **"Each command must have 'name' and 'run' fields"** — both required, + non-empty strings. +- **"Malformed 'agent' field"** — usually an unmatched quote in `agent`. +- **"'credit' must be true or false"** — only YAML booleans, not `"yes"`/`0`. +- **"... name contains invalid characters"** — command/arg names: letters, + digits, hyphens, underscores only. +- **"Duplicate arg name" / "Duplicate command name"** — names must be unique. +- **"'commands' must be a list" / "'args' must be a list of strings"** — use + list syntax (`args: [focus]`; `commands:` as a list of `{name, run}`). Quote + `args` items that look like numbers/booleans. +- **"Command '...' has invalid timeout"** — `timeout` must be a positive number. + +## Argument errors + +- **"Positional argument '...' requires args declared in frontmatter"** — add + an `args` field, or use `--name value` flag syntax (works without declaration). +- **"Too many positional arguments"** — more positional values than `args` + declares. +- **"Flag '--...' requires a value"** — a `--name` flag passed without a value. + +## Command errors + +- **"Command '...' has invalid syntax"** — malformed shell syntax in `run`, + usually an unmatched quote. Use a script for complex quoting. +- **"Command '...' binary not found"** — not installed / on PATH. If it's in a + virtualenv, prefix with `uv run`. +- **Pipes/redirections not working** — `run` is `shlex`-split and run directly; + `|`, `2>&1`, `&&`, `$VAR` won't work. Wrap in a `.sh` script. +- **Output truncated** — the command hit its 60s timeout. Raise `timeout: 300` + or speed up the command. +- **Command output missing from prompt** — check the placeholder name matches + the command `name` exactly, that the command produces output, and that you + used `commands` (plural). + +## Common questions + +- **Run multiple loops in parallel?** Yes — on **separate branches** to avoid + git conflicts. For programmatic control, use the Python `RunManager`. +- **What to commit?** `RALPH.md` and helper `*.sh` scripts. Gitignore + `ralph_logs/`. +- **Edit RALPH.md while running?** Yes — the body is re-read each iteration; + frontmatter needs a restart. +- **Disable the co-author credit?** Set `credit: false` in frontmatter. + +If it's not here: `ralph run my-ralph -n 1` to validate, `--log-dir` to +capture, or file an issue at github.com/computerlovetech/ralphify/issues. diff --git a/skills/ralph-cli/references/writing-ralphs.md b/skills/ralph-cli/references/writing-ralphs.md new file mode 100644 index 00000000..306048cc --- /dev/null +++ b/skills/ralph-cli/references/writing-ralphs.md @@ -0,0 +1,103 @@ +# Writing a RALPH.md + +A `RALPH.md` is the single configuration + prompt file for a ralph: YAML +frontmatter for settings, the body for the prompt text. + +```markdown +--- +agent: claude -p --dangerously-skip-permissions +commands: + - name: tests + run: uv run pytest -x + - name: git-log + run: git log --oneline -10 +args: [dir, focus] +--- + +# Prompt body + +{{ commands.tests }} + +{{ commands.git-log }} + +Your instructions here. Reference args with {{ args.dir }}. +``` + +## Frontmatter fields + +| Field | Type | Required | Description | +|---|---|---|---| +| `agent` | string | yes | Full agent command the prompt is piped to (see `agents.md`) | +| `commands` | list | no | Commands run each iteration; each has `name` and `run` | +| `args` | list of strings | no | Declared user-argument names. Letters, digits, `-`, `_` only | +| `credit` | bool | no | Append co-author trailer instruction to prompt (default `true`) | + +## Commands + +| Field | Type | Default | Description | +|---|---|---|---| +| `name` | string | required | Identifier used in `{{ commands. }}`. Letters, digits, `-`, `_` only | +| `run` | string | required | Command run each iteration (supports `{{ args. }}`) | +| `timeout` | number | `60` | Max seconds before the command is killed | + +- Commands run **in order**, each iteration. +- Output (stdout + stderr) is captured **regardless of exit code** — a failing + `pytest -x` is exactly what you want in the prompt. +- `run` is parsed with `shlex.split()` and run **directly, not through a + shell**. Pipes (`|`), redirections (`>`, `2>&1`), chaining (`&&`), and shell + variables (`$VAR`) do NOT work. Point `run` at a script instead: + `run: ./my-script.sh`. +- Commands starting with `./` run from the **ralph directory**; all others run + from the **project root** (current working directory of `ralph run`). + +## Placeholders + +| Syntax | Resolves to | +|---|---| +| `{{ commands. }}` | Output of the named command | +| `{{ args. }}` | Value of the named user argument | +| `{{ ralph.name }}` | Ralph directory name | +| `{{ ralph.iteration }}` | Current iteration number (1-based) | +| `{{ ralph.max_iterations }}` | Total iterations if `-n` set, else empty | + +- `ralph.*` placeholders are automatic — no frontmatter needed. +- **Only commands referenced by a `{{ commands.* }}` placeholder appear in the + prompt.** An unreferenced command still *runs* each iteration (useful for side + effects), but its output is excluded. +- Unmatched placeholders resolve to an empty string. +- `{{ args.* }}` is resolved both in the body and inside command `run` strings. + +## Prompt-writing patterns + +The agent starts every iteration with **no memory** — only the assembled prompt +and fresh command output. Good prompts make that explicit and anchor the agent +to durable state: + +- **State the loop contract**: "You are running in a loop with no memory of + previous iterations. Read TODO.md to see what's left." +- **Give one concrete task source**: a `TODO.md`/`PLAN.md`, failing tests, an + issue list, or a command whose output names the next thing to do. A vague + prompt with no task source makes the loop spin without useful work. +- **Feed back live signal**: include `{{ commands.tests }}` (or lint, build, + typecheck) and tell the agent to fix failures *before* new work. This is the + self-healing loop. +- **Instruct commits explicitly** if you want them — ralphify never commits for + the agent. "Commit each chunk of work with a descriptive message." +- **HTML comments are stripped** from the assembled prompt — use `` + for notes to yourself that the agent never sees. +- Edit the body freely while the loop runs; the next iteration picks it up. + +## User arguments + +Named flags (`--focus value` or `--focus=value`) work **without** any +frontmatter declaration. The `args` field is only needed to accept **positional** +arguments — it maps positions to names. + +```bash +ralph run research --dir ./proj --focus performance # named flags, no args: needed +ralph run research ./proj performance # positional, needs args: [dir, focus] +ralph run research -- --verbose ./src # -- ends flag parsing +``` + +Missing args resolve to an empty string. See `running.md` for the full argument +grammar. diff --git a/tests/test_agent.py b/tests/test_agent.py index f3358448..92754f2f 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -33,7 +33,7 @@ def test_claude_absolute_path(self): assert _supports_stream_json(["/usr/local/bin/claude", "-p"]) is True def test_non_claude_binary(self): - assert _supports_stream_json(["aider", "--yes"]) is False + assert _supports_stream_json(["pi", "--yes"]) is False def test_empty_command(self): assert _supports_stream_json([]) is False diff --git a/tests/test_console_emitter.py b/tests/test_console_emitter.py index ab6ff13e..85b48b35 100644 --- a/tests/test_console_emitter.py +++ b/tests/test_console_emitter.py @@ -199,7 +199,7 @@ def test_run_started_shows_ralph_name(self): _make_event( EventType.RUN_STARTED, ralph_name="my-ralph", - agent="aider", + agent="pi", timeout=0, commands=0, max_iterations=None, @@ -216,7 +216,7 @@ def test_run_started_shows_timeout(self): _make_event( EventType.RUN_STARTED, ralph_name="test", - agent="aider", + agent="pi", timeout=120, commands=0, max_iterations=None, @@ -232,7 +232,7 @@ def test_run_started_shows_command_count(self): _make_event( EventType.RUN_STARTED, ralph_name="test", - agent="aider", + agent="pi", timeout=0, commands=3, max_iterations=None, @@ -248,7 +248,7 @@ def test_run_started_singular_command(self): _make_event( EventType.RUN_STARTED, ralph_name="test", - agent="aider", + agent="pi", timeout=0, commands=1, max_iterations=None, @@ -266,7 +266,7 @@ def test_run_started_shows_max_iterations(self): _make_event( EventType.RUN_STARTED, ralph_name="test", - agent="aider", + agent="pi", timeout=0, commands=0, max_iterations=5, @@ -282,7 +282,7 @@ def test_run_started_no_info_line_when_no_config(self): _make_event( EventType.RUN_STARTED, ralph_name="test", - agent="aider", + agent="pi", timeout=0, commands=0, max_iterations=None, @@ -301,7 +301,7 @@ def test_run_started_combines_info(self): _make_event( EventType.RUN_STARTED, ralph_name="test", - agent="aider", + agent="pi", timeout=60, commands=2, max_iterations=3, @@ -322,7 +322,7 @@ def test_startup_hint_shown_when_peek_on_by_default(self): _make_event( EventType.RUN_STARTED, ralph_name="my-ralph", - agent="aider", + agent="pi", timeout=0, commands=0, max_iterations=None, @@ -356,7 +356,7 @@ def test_startup_hint_raw_for_non_claude(self): _make_event( EventType.RUN_STARTED, ralph_name="my-ralph", - agent="aider --yes", + agent="pi --yes", timeout=0, commands=0, max_iterations=None, @@ -374,7 +374,7 @@ def test_no_startup_hint_when_peek_off(self): _make_event( EventType.RUN_STARTED, ralph_name="my-ralph", - agent="aider", + agent="pi", timeout=0, commands=0, max_iterations=None, @@ -1510,7 +1510,7 @@ def test_claude_full_path(self): assert _is_claude_command("/usr/local/bin/claude -p") is True def test_not_claude(self): - assert _is_claude_command("aider --yes") is False + assert _is_claude_command("pi --yes") is False def test_empty(self): assert _is_claude_command("") is False