diff --git a/.jaiph/async.jh b/.jaiph/async.jh index c7348434..8abcd209 100755 --- a/.jaiph/async.jh +++ b/.jaiph/async.jh @@ -3,13 +3,17 @@ const prompt_text = "Say: Greetings! I am [model name]." workflow cursor_say_hello(name) { - config { agent.backend = "cursor" } + config { + agent.backend = "cursor" + } const response = prompt "${prompt_text}" log response } workflow claude_say_hello(name) { - config { agent.backend = "claude" } + config { + agent.backend = "claude" + } const response = prompt "${prompt_text}" log response } diff --git a/.jaiph/engineer.jh b/.jaiph/engineer.jh index 3e3e5781..e2dc4c11 100755 --- a/.jaiph/engineer.jh +++ b/.jaiph/engineer.jh @@ -8,14 +8,14 @@ import "jaiphlang/queue" as queue import "jaiphlang/artifacts" as artifacts import "./docs_parity.jh" as docs import "./ensure_ci_passes.jh" as ci -import "./git.jh" as git +import "jaiphlang/git" as git config { - agent.backend = "cursor" - agent.default_model = "gpt-5.3-codex" - agent.cursor_flags = "--force" - # agent.backend = "claude" - # agent.claude_flags = "--permission-mode bypassPermissions" + # agent.backend = "cursor" + # agent.default_model = "composer-2" + # agent.cursor_flags = "--force" + agent.backend = "claude" + agent.claude_flags = "--permission-mode bypassPermissions" } const code_philosophy = """ @@ -286,7 +286,7 @@ workflow default(name) { run docs.update_from_task(task) run queue.remove_completed_task(task_header) - const patch_file = run git.patch(task) - const target_path = run artifacts.save(patch_file, patch_file) - return target_path + const patch_file = run git.commit(task) + run artifacts.save(patch_file) + return patch_file } diff --git a/.jaiph/libs/jaiphlang/artifacts.jh b/.jaiph/libs/jaiphlang/artifacts.jh index e23b64d0..fb68a0cb 100644 --- a/.jaiph/libs/jaiphlang/artifacts.jh +++ b/.jaiph/libs/jaiphlang/artifacts.jh @@ -10,27 +10,57 @@ # import "jaiphlang/artifacts" as artifacts # # workflow default() { -# run artifacts.save("./build/output.bin", "build-output.bin") +# # Single file: +# run artifacts.save("./build/output.bin") +# +# # Or several files: newline-separated list of paths. +# const paths = """ +# a.txt +# b/nested.txt +# """ +# run artifacts.save(paths) # } # script save_script = ``` set -euo pipefail ARTIFACTS_DIR="${JAIPH_ARTIFACTS_DIR:?JAIPH_ARTIFACTS_DIR is not set}" - src="$1" - dest_name="$2" - if [[ ! -f "$src" ]]; then - printf 'artifacts save: file not found: %s\n' "$src" >&2 + paths_list="$1" + out="" + + while IFS= read -r line || [[ -n "${line-}" ]]; do + [[ -z "${line//[[:space:]]/}" ]] && continue + src="${line#"${line%%[![:space:]]*}"}" + src="${src%"${src##*[![:space:]]}"}" + if [[ ! -f "$src" ]]; then + printf 'artifacts save: file not found: %s\n' "$src" >&2 + exit 1 + fi + if [[ "$src" = /* ]]; then + relpath="$(basename -- "$src")" + else + relpath="${src#./}" + fi + dest="${ARTIFACTS_DIR}/${relpath}" + mkdir -p "$(dirname -- "$dest")" + cp -- "$src" "$dest" + if [[ -n "$out" ]]; then + out+=$'\n' + fi + out+="$dest" + done <<<"$paths_list" + + if [[ -z "$out" ]]; then + printf 'artifacts save: no paths in list\n' >&2 exit 1 fi - dest="${ARTIFACTS_DIR}/${dest_name}" - mkdir -p "$(dirname "$dest")" - cp -- "$src" "$dest" - printf '%s' "$dest" + printf '%s' "$out" ``` -# Copies the file at `local_path` into the artifacts directory under `name`. -# Returns the absolute path of the saved artifact. -export workflow save(local_path, name) { - return run save_script(local_path, name) +# `paths` is a single file path or a newline-separated list of file paths. +# Each file is copied under the same relative name as in the list +# (leading `./` stripped; absolute paths use basename only). +# Returns the absolute destination paths, one per line, in the same order. +export workflow save(paths) { + return run save_script(paths) } diff --git a/.jaiph/git.jh b/.jaiph/libs/jaiphlang/git.jh similarity index 50% rename from .jaiph/git.jh rename to .jaiph/libs/jaiphlang/git.jh index 2450aa5b..8cf01eea 100755 --- a/.jaiph/git.jh +++ b/.jaiph/libs/jaiphlang/git.jh @@ -1,23 +1,33 @@ #!/usr/bin/env jaiph -script git_inside_worktree = `git rev-parse --is-inside-work-tree >/dev/null 2>&1` +script git_inside_worktree = `git rev-parse --is-inside-work-tree 2>&1` script git_porcelain_empty = `test -z "$(git status --porcelain)"` script git_porcelain_nonempty = `test -n "$(git status --porcelain)"` +script git_mark_workspace_safe = `git config --global --add safe.directory "$(pwd)"` + +# format-patch emits real diff to stdout only with --stdout; otherwise git writes *.patch files and stdout is only the path. +script git_create_patch_from_commit = `git config --global --add safe.directory "$(pwd)" && git format-patch -1 HEAD --stdout > $1` + rule in_git_repo() { + run git_mark_workspace_safe() run git_inside_worktree() catch (err) { fail "not inside a git repository" } } rule branch_clean() { - run git_porcelain_empty() + run git_porcelain_empty() catch (err) { + fail "git working tree is not clean" + } } rule has_changes() { - run git_porcelain_nonempty() + run git_porcelain_nonempty() catch (err) { + fail "git working tree has no changes" + } } rule is_clean() { @@ -26,55 +36,43 @@ rule is_clean() { } workflow commit(task) { - ensure in_git_repo() - - ensure has_changes() catch (err) { - log "No changes to commit — skipping." - return "" + config { + agent.backend = "cursor" + agent.cursor_flags = "--force" + agent.default_model = "auto" } - prompt """ - Commit the current repository changes now. + ensure in_git_repo() + ensure has_changes() + const response = prompt """ + Please commit current changes and respond with a commit message and + suggested patch file name (excluding extension). + Requirements for commit message: 1. Write a commit message - first line in imperative mood, under 72 chars. 2. Start with the common prefix like 'Feat:', 'Fix:', 'Refactor:' etc. 3. Write a body paragraph with more details about the change. - + Requirements for commit: 1. Review current git changes (git diff --stat, git status). 2. Stage all relevant changes with git add. 3. Create exactly one commit. 4. Do not push. 5. Remove files that are not relevant to the commit and not git ignored. - + Changes were made for the following task: ${task} """ + returns "{ message: string, patch_file_name: string }" + + const patch_file_name = "${response.patch_file_name}.patch" + + run git_create_patch_from_commit(patch_file_name) + + return patch_file_name } -# Writes a unified diff (HEAD vs working tree, excluding `.jaiph/`) to `dest`. -# Returns `dest` (relative path). `task` is reserved for callers / future naming. -script write_tree_patch = ``` - set -euo pipefail - dest="$1" - mkdir -p "$(dirname "$dest")" - diff_out="$(git diff HEAD -- . ':!.jaiph/' 2>/dev/null || true)" - if [[ -z "${diff_out}" ]]; then - git add -N . -- ':!.jaiph/' 2>/dev/null || true - diff_out="$(git diff HEAD -- . ':!.jaiph/' 2>/dev/null || true)" - git reset HEAD -- . 2>/dev/null || true - fi - if [[ -n "${diff_out}" ]]; then - printf '%s\n' "${diff_out}" > "$dest" - else - : > "$dest" - fi - printf '%s' "$dest" -``` - -workflow patch(task) { - ensure in_git_repo() - const dest = ".jaiph/tmp/engineer-workspace.patch" - return run write_tree_patch(dest) +workflow default(task) { + return run commit(task) } diff --git a/.jaiph/main.jh b/.jaiph/main.jh index 5f3db9fd..aaf143f7 100755 --- a/.jaiph/main.jh +++ b/.jaiph/main.jh @@ -7,7 +7,7 @@ import "./engineer.jh" as implement import "./architect_review.jh" as architect -import "./git.jh" as git +import "jaiphlang/git" as git workflow default() { ensure git.is_clean() diff --git a/.jaiph/qa.jh b/.jaiph/qa.jh index f4d8166d..af08b4a7 100755 --- a/.jaiph/qa.jh +++ b/.jaiph/qa.jh @@ -1,7 +1,7 @@ #!/usr/bin/env jaiph import "./ensure_ci_passes.jh" as ci -import "./git.jh" as git +import "jaiphlang/git" as git config { agent.backend = "claude" @@ -9,35 +9,35 @@ config { } script read_contributing_docs = ``` -test -f docs/contributing.md || { - echo "docs/contributing.md not found (run from repo root)" >&2 - return 1 -} -awk ' -/^---$/ { - if (sec == 0) { sec = 1; next } - if (sec == 1) { sec = 2; next } -} -sec == 1 { next } -sec == 2 { print } -' docs/contributing.md + test -f docs/contributing.md || { + echo "docs/contributing.md not found (run from repo root)" >&2 + return 1 + } + awk ' + /^---$/ { + if (sec == 0) { sec = 1; next } + if (sec == 1) { sec = 2; next } + } + sec == 1 { next } + sec == 2 { print } + ' docs/contributing.md ``` -script read_txtar_format_spec = `cat compiler-tests/README.md` +script read_txtar_format_spec = `cat test-fixtures/compiler-txtar/README.md` script read_txtar_fixture_names = ``` -for f in compiler-tests/*.txt; do - echo "=== FILE: $f ===" - grep '^=== ' "$f" | sed 's/^=== / /' -done + for f in test-fixtures/compiler-txtar/*.txt; do + echo "=== FILE: $f ===" + grep '^=== ' "$f" | sed 's/^=== / /' + done ``` script read_txtar_fixtures = ``` -for f in compiler-tests/*.txt; do - echo "========== $f ==========" - cat "$f" - echo "" -done + for f in test-fixtures/compiler-txtar/*.txt; do + echo "========== $f ==========" + cat "$f" + echo "" + done ``` script new_qa_gap_report_path = `echo ".jaiph/tmp/qa_gap_report_$(date +%Y-%m-%d_%H-%M-%S).md"` @@ -45,103 +45,103 @@ script new_qa_gap_report_path = `echo ".jaiph/tmp/qa_gap_report_$(date +%Y-%m-%d script write_gap_report_pointer = `printf '%s\n' "$1" > .jaiph/tmp/qa_gap_report_active.txt` script gap_report_nonempty = ``` -p="$(cat .jaiph/tmp/qa_gap_report_active.txt 2>/dev/null)" || return 1 -[ -z "$p" ] && return 1 -test -s "$p" + p="$(cat .jaiph/tmp/qa_gap_report_active.txt 2>/dev/null)" || return 1 + [ -z "$p" ] && return 1 + test -s "$p" ``` script read_gap_report = ``` -p="$(cat .jaiph/tmp/qa_gap_report_active.txt)" -cat "$p" + p="$(cat .jaiph/tmp/qa_gap_report_active.txt)" + cat "$p" ``` script save_gap_report_file = `printf '%s\n' "$1" > "$2"` const analyze_gaps_prompt = """ - - You are a QA engineer analyzing the Jaiph codebase for test coverage gaps. - - - ${contributing_docs} - - - - ${txtar_format} - - - - ${txtar_names} - - - Use the testing philosophy and layer locations from contributing_docs above. - The test layers are: - - 1. **Compiler tests (txtar)** — language-agnostic fixtures in compiler-tests/*.txt. - Each test case is a .jh source with an expected outcome (ok or error). - These test parse + validate without execution. This is the PRIMARY layer - for compiler error paths and valid-source acceptance. - - 2. **Module unit tests** — colocated src/**/*.test.ts for internal API testing - (AST structure, helper functions, display formatting). - - 3. **Compiler acceptance** — src/transpile/*.acceptance.test.ts for cross-module - and integration scenarios. - - 4. **Runtime e2e** — e2e/tests/*.sh for full execution (compile + run + verify output). - - 5. **CLI display** — src/cli/run/display.test.ts and progress tree rendering tests. - - - Your task: - - 1. Read every source file in src/parse/*.ts. For each fail() call, check if - the error is covered by either: - - A txtar test case in compiler-tests/*.txt (check existing_txtar_tests above) - - A colocated TS test in src/parse/*.test.ts - List fail() calls with no coverage in either layer. These are COMPILER gaps — - they should be fixed with txtar test cases. - - 2. Read src/transpile/validate.ts, src/transpile/validate-ref-resolution.ts, - src/transpile/validate-string.ts, src/transpile/validate-prompt-schema.ts, - and src/transpile/shell-jaiph-guard.ts. For each jaiphError() call, check - coverage in txtar fixtures and TS tests. List uncovered paths. - These are COMPILER gaps — txtar test cases. - - 3. Read e2e/tests/*.sh file names and compare against the feature list in - docs/grammar.md and docs/cli.md. List features with no runtime e2e test. - Also check if all e2e test files are registered in e2e/test_all.sh. - These are RUNTIME gaps — e2e bash tests. - - 4. Read src/cli/run/display.ts and src/cli/run/progress.ts. For each - formatting branch (start line, end line, prompt label, async label, - depth indentation, TTY vs non-TTY), check test coverage in - display.test.ts. List gaps. Focus on response tree rendering edge cases. - These are DISPLAY gaps — TS unit tests. - - 5. Read src/cli/commands/*.ts. List error paths and edge cases with no - test coverage. These are CLI gaps — TS unit tests or e2e tests. - - Output a structured gap report. For each gap, specify which test layer - should be used: - - ## Gap: [short description] - - File: [source file with untested code path] - - Code path: [specific branch/function/line range] - - Why it matters: [what bug this could hide] - - Test layer: **txtar** | **unit** | **acceptance** | **e2e** - - Priority: [high | medium | low] - - For **txtar** gaps, also include the suggested test case in txtar format: - \`\`\` - === suggested test name - # @expect error E_CODE "substring" - --- input.jh - - \`\`\` - - Sort by priority descending. Be thorough but skip trivial paths. - - + + You are a QA engineer analyzing the Jaiph codebase for test coverage gaps. + + + ${contributing_docs} + + + + ${txtar_format} + + + + ${txtar_names} + + + Use the testing philosophy and layer locations from contributing_docs above. + The test layers are: + + 1. **Compiler tests (txtar)** — language-agnostic fixtures in compiler-tests/*.txt. + Each test case is a .jh source with an expected outcome (ok or error). + These test parse + validate without execution. This is the PRIMARY layer + for compiler error paths and valid-source acceptance. + + 2. **Module unit tests** — colocated src/**/*.test.ts for internal API testing + (AST structure, helper functions, display formatting). + + 3. **Compiler acceptance** — src/transpile/*.acceptance.test.ts for cross-module + and integration scenarios. + + 4. **Runtime e2e** — e2e/tests/*.sh for full execution (compile + run + verify output). + + 5. **CLI display** — src/cli/run/display.test.ts and progress tree rendering tests. + + + Your task: + + 1. Read every source file in src/parse/*.ts. For each fail() call, check if + the error is covered by either: + - A txtar test case in compiler-tests/*.txt (check existing_txtar_tests above) + - A colocated TS test in src/parse/*.test.ts + List fail() calls with no coverage in either layer. These are COMPILER gaps — + they should be fixed with txtar test cases. + + 2. Read src/transpile/validate.ts, src/transpile/validate-ref-resolution.ts, + src/transpile/validate-string.ts, src/transpile/validate-prompt-schema.ts, + and src/transpile/shell-jaiph-guard.ts. For each jaiphError() call, check + coverage in txtar fixtures and TS tests. List uncovered paths. + These are COMPILER gaps — txtar test cases. + + 3. Read e2e/tests/*.sh file names and compare against the feature list in + docs/grammar.md and docs/cli.md. List features with no runtime e2e test. + Also check if all e2e test files are registered in e2e/test_all.sh. + These are RUNTIME gaps — e2e bash tests. + + 4. Read src/cli/run/display.ts and src/cli/run/progress.ts. For each + formatting branch (start line, end line, prompt label, async label, + depth indentation, TTY vs non-TTY), check test coverage in + display.test.ts. List gaps. Focus on response tree rendering edge cases. + These are DISPLAY gaps — TS unit tests. + + 5. Read src/cli/commands/*.ts. List error paths and edge cases with no + test coverage. These are CLI gaps — TS unit tests or e2e tests. + + Output a structured gap report. For each gap, specify which test layer + should be used: + + ## Gap: [short description] + - File: [source file with untested code path] + - Code path: [specific branch/function/line range] + - Why it matters: [what bug this could hide] + - Test layer: **txtar** | **unit** | **acceptance** | **e2e** + - Priority: [high | medium | low] + + For **txtar** gaps, also include the suggested test case in txtar format: + \`\`\` + === suggested test name + # @expect error E_CODE "substring" + --- input.jh + + \`\`\` + + Sort by priority descending. Be thorough but skip trivial paths. + + """ workflow analyze_gaps() { @@ -220,7 +220,7 @@ workflow write_tests() { ${gap_report} -""" + """ } script mkdir_tmp_jaiph_qa = `mkdir -p .jaiph/tmp` diff --git a/.jaiph/sandbox.jh b/.jaiph/sandbox.jh new file mode 100755 index 00000000..4cbc212c --- /dev/null +++ b/.jaiph/sandbox.jh @@ -0,0 +1,9 @@ +#!/usr/bin/env jaiph + +import "jaiphlang/artifacts" as artifacts + +workflow default() { + run `echo "Hello, world!" > output.txt`() + const path = run artifacts.save("./output.txt") + return path +} \ No newline at end of file diff --git a/.jaiph/simplifier.jh b/.jaiph/simplifier.jh index fafe9505..96356475 100644 --- a/.jaiph/simplifier.jh +++ b/.jaiph/simplifier.jh @@ -1,7 +1,7 @@ #!/usr/bin/env jaiph import "./ensure_ci_passes.jh" as ci -import "./git.jh" as git +import "jaiphlang/git" as git config { agent.backend = "claude" diff --git a/CHANGELOG.md b/CHANGELOG.md index eeb070f8..add83fb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Unreleased +- **Cleanup — remove `JAIPH_TEST_MODE` event suppression from production runtime code:** `RuntimeEventEmitter.emitStep` / `emitLog` no longer read `this.env.JAIPH_TEST_MODE` to decide whether to write `__JAIPH_EVENT__` lines to stderr. A construction-time `suppressLiveEvents?: boolean` option replaces the per-call env check: `NodeWorkflowRuntime` accepts it in its options and forwards it to `RuntimeEventEmitter`. `node-test-runner.ts` passes `suppressLiveEvents: true` when constructing the in-process runtime for `test_run_workflow` steps so `node --test` reporter output stays clean. `JAIPH_TEST_MODE: "1"` is still set in the test runner's env — but only for `prompt.ts`'s mock-mode selection, not event emission. No other production caller constructs `NodeWorkflowRuntime` directly, so the spawned `node-workflow-runner.js` child defaults to `suppressLiveEvents: false` and live events stream to stderr exactly as before. Durable `appendRunSummaryLine` writes to `run_summary.jsonl` are unchanged in either mode. Existing in-process unit tests under `node-workflow-runtime.artifacts.test.ts` pass the new option through their `NodeWorkflowRuntime` constructions. + +- **Repo — `node-workflow-runtime.ts` split:** The 1915-LoC `src/runtime/kernel/node-workflow-runtime.ts` god file is split into the orchestrator plus three focused sibling modules under `src/runtime/kernel/`. No behavior changes — pure relocation; existing tests pass unchanged (helpers re-imported from their new location where needed). + - **`runtime-arg-parser.ts`** — every stateless free helper that used to live above the `NodeWorkflowRuntime` class (`interpolate`, `parseInlineCaptureCall`, `commaArgsToInterpolated`, `parseArgsRaw`, `parseInlineScriptAt`, `parseManagedArgAt`, `parseArgTokens`, `stripOuterQuotes`, `parsePromptSchema`, `sanitizeName`, `nowIso`), the `BARE_IDENT_RE` / `MAX_EMBED` / `MAX_RECURSION_DEPTH` constants, and the `ParsedArgToken` / `PromptSchemaField` types. Direct unit tests added in `runtime-arg-parser.test.ts`. + - **`runtime-event-emitter.ts`** — `RuntimeEventEmitter` owns `emitWorkflow`, `emitStep`, `emitPromptStepStart`, `emitPromptStepEnd`, `emitPromptEvent`, `emitLog`, plus the monotonic step and prompt sequence counters. Constructed with `{ runId, runDir, env, getFrameStack, getAsyncIndices }`. No more direct `process.stderr.write(__JAIPH_EVENT__ …)` scattered through the runtime. + - **`runtime-mock.ts`** — `executeMockBodyDef` and `executeMockShellBody` move here as exported functions taking `{ ref, args, env, cwd, executeStepsBack }` (the last is a callback so steps-kind mocks dispatch back into the runtime). The `require("node:child_process")` call that shadowed ESM imports inside `executeMockShellBody` is gone — replaced by a top-of-file `import`. + - The orchestrator (`node-workflow-runtime.ts`) keeps the `NodeWorkflowRuntime` class, workflow/step orchestration (`runDefault`, `runNamedWorkflow`, `executeSteps`, `executeStep`, `runRecoverBody`, `runPromptStep`, frame and scope management), async-handle bookkeeping (`getAsyncIndices`, `getFrameStack`), and heartbeat (`startHeartbeat`, `stopHeartbeat`, `writeHeartbeat`). Dependency direction is one-way (orchestrator → helpers/emitter/mock); no circular imports. + +- **Breaking — Inbox dispatch is always sequential** — The optional parallel inbox mode is removed: there is no `run.inbox_parallel` config key, no `JAIPH_INBOX_PARALLEL` environment variable (it is ignored), and no `JAIPH_INBOX_PARALLEL_LOCKED` shim. Route targets for a queued message always run **one after another** in declaration order on the `channel` line, inside `NodeWorkflowRuntime`’s `drainWorkflowQueue`. Using `run.inbox_parallel = …` in a `config { … }` block is `E_PARSE: unknown config key: run.inbox_parallel`. Docs and E2E now match sequential-only semantics; unit tests cover the unknown key and parity of dispatch event order with and without the old env var set. + +- **Fix — CLI failure footer:** `Output of failed step` and the footer `out:` / `err:` paths now resolve from the **last** non-zero `STEP_END` in `run_summary.jsonl` (append order), not the first. The first failure line could be a recovered `catch`/`ensure` attempt, a stray record, or unrelated noise; the last failure matches the terminal step (the one the progress tree marks as failed). **`src/cli/shared/errors.test.ts`** covers multiple non-zero `STEP_END` lines. +- **Fix — Docker default image tag:** `curl` / `docs/install` copied only `dist/src` into `~/.local/bin/.jaiph`, so the CLI could not read `package.json` and defaulted the sandbox image to `ghcr.io/jaiphlang/jaiph-runtime:nightly` even for stable installs. The installer now copies `package.json` beside `src/`, and `resolveDefaultDockerImageTag` checks both the installer layout and the npm `dist/src/runtime` layout. +- **Repo — Test directory consolidation:** Consolidated the five-way test directory split (`src/**/*.test.ts`, `test/`, `tests/`, `compiler-tests/`, `golden-ast/`) into three test "places" plus two clearly named support directories. File moves: + - `src/compiler-test-runner.ts` → `test-infra/compiler-test-runner.ts` + - `src/golden-ast-runner.ts` → `test-infra/golden-ast-runner.ts` + - `compiler-tests/` → `test-fixtures/compiler-txtar/` (README preserved) + - `golden-ast/` → `test-fixtures/golden-ast/` + - `tests/e2e-samples/landing-page.spec.ts` → `e2e/playwright/landing-page.spec.ts` + - `tests/e2e-samples/docs-site.ts` → `e2e/playwright/docs-site.ts` + - `test/run-summary-jsonl.test.ts`, `test/signal-lifecycle.test.ts`, `test/tty-running-timer.test.ts` → `integration/` + - `test/sample-build.test.ts` (2814 LoC) split into 7 focused files under `integration/sample-build/` (each ≤500 LoC) plus a shared `helpers.ts` + - `test/fixtures/`, `test/expected/` → `test-fixtures/sample-build/` + - `test/` and `tests/` directories removed. + + Final layout: `src/**/*.test.ts` (unit, colocated), `integration/` (integration tests), `e2e/` (shell + Playwright), `test-fixtures/` (compiler-txtar, golden-ast, sample-build), `test-infra/` (test runners). `package.json` scripts and `tsconfig.json` updated. No test logic, assertions, or fixtures changed. + # 0.9.3 ## Summary @@ -15,7 +41,7 @@ - **Breaking — Runtime config:** `runtime.docker_timeout` renamed to `runtime.docker_timeout_seconds` to make the unit explicit. The old key produces an `E_PARSE` migration message. `DockerRunConfig.timeout` renamed to `timeoutSeconds` internally. - **Docker:** Default container execution timeout is **3600** seconds (one hour), up from 300, via `resolveDockerConfig` / `runtime.docker_timeout_seconds` when not overridden by `JAIPH_DOCKER_TIMEOUT` or in-file config. - **Docker:** `reportResult` fallback — when `discoverDockerRunDir` cannot match the expected `run_id`, the CLI now prints the sandbox runs root and the expected `run_id` instead of emitting just "Workflow execution failed." Paired with a rewritten `76_docker_failure_parity.sh` E2E that compares full normalized output between Docker and no-sandbox modes for both script-step and rule-match failures. -- **Library:** `jaiphlang/artifacts` provides `save(local_path, name)` via a named `save_script` and drops the unpublished git-oriented helpers (`save_patch`, `apply_patch`) and standalone `artifacts.sh`. +- **Library:** `jaiphlang/artifacts` provides a single `save(paths)` workflow that accepts either a file path or a newline-separated list of paths; the destination relpath is derived from the source path (leading `./` stripped; absolute sources use `basename` only). Replaces the prior two-argument `save(local_path, name)` and drops the unpublished git-oriented helpers (`save_patch`, `apply_patch`) and standalone `artifacts.sh`. See [Libraries](docs/libraries.md#jaiphlangartifacts--publishing-files-out-of-the-sandbox). - **Language:** `return ` — bare identifiers are now accepted in return position. `return response` is sugar for `return "${response}"`, resolved against the same scope rules used for `${ident}` interpolation and bare-identifier call arguments (`const`, capture, or parameter). Unknown identifiers (`return missing_name` where `missing_name` is not in scope) produce a precise `E_VALIDATE` unknown-identifier error naming the missing binding. Previously, bare identifiers in return position fell through to the catch-all "inline shell steps are forbidden" diagnostic, which was incorrect — the user was not writing a shell statement, and the suggested fix (explicit script block) did not solve the problem. Both `return response` and `return "${response}"` are valid and equivalent; existing interpolated return forms are unchanged. Parser updated in all return-position paths (top-level workflow body, brace blocks, catch/recover bodies). Unit tests cover bare-identifier returns from `const`, parameters, and catch bindings; compiler tests cover acceptance and unknown-identifier rejection; E2E test covers end-to-end propagation. - **Language:** Immutable binding enforcement — `const`, parameter, capture, and `script` names are now immutable. Rebinding a parameter via `const`, declaring duplicate `const` names in the same scope, or colliding a `script` name with an existing immutable binding are all rejected at compile time with `E_VALIDATE: cannot rebind immutable name "…"`. The error names the conflicting binding and where it was first bound. Existing files that shadowed parameters (e.g. `workflow default(x) { const x = … }`) must use distinct names. `examples/say_hello.jh` migrated as a reference. - **Language:** `return run \`…\`(args)` and `log run \`…\`(args)` — inline scripts wrapped with explicit `run` now work in value positions (`return`, `log`, `logerr`). Bare inline scripts without `run` remain rejected at compile time with clear errors. Parser, validator, emitter, formatter, and runtime all updated. E2E and unit tests cover zero-arg and argument forms plus rejection paths. diff --git a/QUEUE.md b/QUEUE.md index f52504a5..72264987 100644 --- a/QUEUE.md +++ b/QUEUE.md @@ -13,123 +13,38 @@ Process rules: *** -## Cleanup — consolidate the 5-way test directory split #dev-ready +## Performance — investigate and fix slow installation **Goal** -Today there are five different places that contain "tests": `src/**/*.test.ts` (66 unit tests, adjacent to source), `test/` (4 integration files including a 2427-LoC `sample-build.test.ts`), `tests/e2e-samples/` (a single Playwright file), `compiler-tests/` (txtar fixtures), `golden-ast/` (fixtures + expected). Plus runners `src/compiler-test-runner.ts` and `src/golden-ast-runner.ts` mixed into the production source tree. A new contributor cannot tell where a new test belongs without reading the whole layout. Fix the structure in one pass. - -**Context (read before starting)** - -* The current `package.json` `test` script enumerates the test sources explicitly; this gives us a precise inventory of what is wired in: - ``` - dist/test/*.test.js - dist/src/**/*.test.js - dist/src/**/*.acceptance.test.js - dist/src/compiler-test-runner.js - dist/src/golden-ast-runner.js - ``` - Any move must update this script and keep the same test set running. Adding tests is out of scope; this is purely reorganization. -* `src/compiler-test-runner.ts` and `src/golden-ast-runner.ts` are compiled and shipped in `dist/`, but they are test infrastructure (they consume fixtures, produce assertions). They should not live in `src/`. -* `compiler-tests/README.md` already documents the txtar format — preserve that doc next to the fixtures it describes. +`jaiph install` (and related dependency or bootstrap steps) feels unreasonably slow; find the dominant cost and improve it without weakening reproducibility (lockfile, shallow clone behavior, etc.). **Scope** -* **Move test infrastructure out of `src/`**: - - `src/compiler-test-runner.ts` → `test-infra/compiler-test-runner.ts` - - `src/golden-ast-runner.ts` → `test-infra/golden-ast-runner.ts` - - `tsconfig.json` and `package.json` `test` script updated to reference the new locations. -* **Rename and group fixture directories**: - - `compiler-tests/` → `test-fixtures/compiler-txtar/` (preserves the README inside). - - `golden-ast/` → `test-fixtures/golden-ast/` (preserves the `fixtures/` and `expected/` subdirs underneath). - - Update path references in `test-infra/compiler-test-runner.ts` and `test-infra/golden-ast-runner.ts`. -* **Fold the singleton Playwright test**: - - `tests/e2e-samples/landing-page.spec.ts` → `e2e/playwright/landing-page.spec.ts`. - - Update `playwright.config.ts` and the `test:samples` npm script accordingly. - - Delete the now-empty `tests/` directory. -* **Triage `test/` (4 files, 2960 LoC)**: - - `test/run-summary-jsonl.test.ts` (178 LoC), `test/signal-lifecycle.test.ts` (220 LoC), `test/tty-running-timer.test.ts` (135 LoC) — keep in a renamed `integration/` directory. They are integration-flavored, not unit, and don't have an obvious adjacent home. - - `test/sample-build.test.ts` (2427 LoC) — split. Read the file, group its tests by which subsystem they actually exercise, and move each group either next to that subsystem (`src/.../.integration.test.ts`) or into `integration/sample-build/.test.ts`. Aim for no resulting file over ~600 LoC. The split is the work; it is not optional. - - Move `test/expected/` and `test/fixtures/` to `test-fixtures/sample-build/` if any test still references them after the split. -* **Final layout** (target): - ``` - src/**/*.test.ts # unit, adjacent (unchanged) - src/**/*.acceptance.test.ts # acceptance, adjacent (unchanged) - integration/**/*.test.ts # integration tests (was `test/`, after split) - test-fixtures/compiler-txtar/ # was `compiler-tests/` - test-fixtures/golden-ast/ # was `golden-ast/` - test-fixtures/sample-build/ # if any sample-build fixtures survive the split - test-infra/compiler-test-runner.ts # was `src/compiler-test-runner.ts` - test-infra/golden-ast-runner.ts # was `src/golden-ast-runner.ts` - e2e/ # shell + .jh (unchanged) - e2e/playwright/landing-page.spec.ts # was `tests/e2e-samples/` - ``` - Three test "places" instead of five (`src/`-adjacent, `integration/`, `e2e/`); plus two clearly named support directories (`test-fixtures/`, `test-infra/`). -* Update `package.json` `test`, `test:compiler`, `test:golden-ast`, `test:samples`, `test:acceptance`, `test:ci`, `test:e2e` scripts to reference the new paths. Verify by running `npm test` end-to-end. - -**Non-goals** - -* Do not change any test's logic, assertions, or fixtures' contents. The goal is layout, not behavior. -* Do not change the unit-tests-adjacent-to-source convention. That part works. -* Do not delete any test (other than ones absorbed into the `sample-build.test.ts` split, where the original file goes away after redistribution). +* Profile or instrument the install path (git clone, lockfile I/O, post-install) and document the top 1–3 contributors to latency. +* Implement targeted fixes (e.g. avoid redundant work, reduce subprocess churn, cache safely) and verify wall-clock improvement on a cold and warm run where applicable. **Acceptance criteria** -* `npm test` passes with the same test count (or higher, if the `sample-build` split surfaces previously-bundled cases as separate tests). Test count must not decrease. -* No file in `src/` is named `*-test-runner.ts`. Test infrastructure lives only in `test-infra/`. -* No file under `integration/` exceeds ~600 LoC after the `sample-build` split. -* The repo root no longer has both `test/` and `tests/`. (`tests/` is deleted after folding.) -* `package.json` test scripts reference the new paths and the same test set runs in CI. -* Commit message documents the file-move map (old → new) so reviewers can sanity-check that nothing was lost. +* A short note in the commit or PR description states what was slow and what changed, with before/after rough timings on the same machine. +* `jaiph install` behavior remains correct: same lockfile semantics and failure modes for bad URLs or missing refs. +* `npm test` passes. *** -## Refactor — split `src/runtime/kernel/node-workflow-runtime.ts` (1901 LoC) #dev-ready +## Performance — investigate and fix slow workflow start (initial 2–4 s lag) **Goal** -`src/runtime/kernel/node-workflow-runtime.ts` is a 1901-LoC god file: ~280 LoC of free arg-parsing helpers above the class, then ~1620 LoC of `NodeWorkflowRuntime` spanning workflow orchestration, step execution, prompt step lifecycle, event emission, mock execution, frame stack management, and heartbeat I/O. Reading or modifying any one concern requires holding all of them in head. Split along clean seams so each concern is in a focused module. - -**Context (read before starting)** - -* This file is actively touched by the `Handle` task. If that task is in flight, **rebase on it before splitting** — do not do this work in parallel without coordinating, or the merge will be miserable. -* The class has stateful internals (`runId`, `runDir`, `summaryFile`, `heartbeatTimer`, `frameStack`, `asyncIndices`, `env`, `cwd`, `graph`, `mockBodies`). The split must keep state in the class and move stateless helpers out, or pass state explicitly into the extracted modules. Do not invent a second source of truth. -* Free helpers above the class (`interpolate`, `parseInlineCaptureCall`, `commaArgsToInterpolated`, `parseArgsRaw`, `parseInlineScriptAt`, `parseManagedArgAt`, `parseArgTokens`, `stripOuterQuotes`, `parsePromptSchema`, `BARE_IDENT_RE`, `MAX_EMBED`, `MAX_RECURSION_DEPTH`, `sanitizeName`, `nowIso`) — all stateless. Safe to extract. -* Methods that are pure event emission (`emitWorkflow`, `emitStep`, `emitPromptStepStart`, `emitPromptStepEnd`, `emitPromptEvent`, `emitLog`) all call `appendRunSummaryLine` and `process.stderr.write`. They depend on the class only for `runId`, `summaryFile`, and `getAsyncIndices()`. Can move to a module that takes those as constructor args. -* Mock execution methods (`executeMockBodyDef`, `executeMockShellBody`) are largely self-contained and could move to a sibling module. +When starting workflows (e.g. `jaiph run` / first step), users observe a 2–4 second delay before useful work; reduce that lag or explain and eliminate unnecessary startup work (JIT, imports, process spawn, discovery). **Scope** -Extract three new sibling modules under `src/runtime/kernel/`: - -* **`runtime-arg-parser.ts`** — every stateless free helper currently above the `NodeWorkflowRuntime` class: - - `interpolate`, `parseInlineCaptureCall`, `commaArgsToInterpolated`, `parseArgsRaw`, `parseInlineScriptAt`, `parseManagedArgAt`, `parseArgTokens`, `stripOuterQuotes`, `parsePromptSchema`, `sanitizeName`, `nowIso` - - The `BARE_IDENT_RE`, `MAX_EMBED`, `MAX_RECURSION_DEPTH` constants - - The `ParsedArgToken`, `PromptSchemaField` types if they are not used elsewhere in the class - - **Required**: extracted helpers must have unit tests (some already do indirectly via runtime tests; new direct tests live in `runtime-arg-parser.test.ts`). -* **`runtime-event-emitter.ts`** — a small class `RuntimeEventEmitter` constructed with `{ runId, asyncIndicesGetter, env }`, exposing `emitWorkflow`, `emitStep`, `emitPromptStepStart`, `emitPromptStepEnd`, `emitPromptEvent`, `emitLog`. The runtime constructs one and delegates. No more direct `process.stderr.write(__JAIPH_EVENT__ ...)` scattered through the runtime. -* **`runtime-mock.ts`** — `executeMockBodyDef` and `executeMockShellBody` move here as exported functions taking `{ ref, args, env, cwd, executeStepsBack }` (the last is a callback so the mock can dispatch back into the runtime for `kind: "steps"` mocks). Removes the `require("node:child_process")` and `require("node:fs")` calls that currently shadow ESM imports inside the class body — that is a code smell that should die in this task. - -After the split, `node-workflow-runtime.ts` keeps only: -* The `NodeWorkflowRuntime` class -* Workflow/step orchestration (`runDefault`, `runNamedWorkflow`, `executeSteps`, `executeStep`, frame and scope management) -* The async-handle bookkeeping (`getAsyncIndices`, `getFrameStack`) -* Heartbeat (`startHeartbeat`, `stopHeartbeat`, `writeHeartbeat`) - -Target size for `node-workflow-runtime.ts` after split: ~1000–1200 LoC. Still large, but a single coherent concern (the orchestrator). - -**Non-goals** - -* Do not change behavior. Every existing test must still pass without modification. -* Do not redesign the event format, the mock contract, or the arg-parser's accepted syntax. This is a relocation task only. -* Do not split further than the three new modules listed. Over-decomposition is its own problem; this task is calibrated for one round of splitting. -* Do not touch `node-workflow-runner.ts` (the CLI shim) or `run-step-exec.ts` (subprocess plumbing) — those are already correctly sized and out of scope. +* Reproduce the lag with a minimal `.jh` workflow; trace Node startup, module load, and runtime init (`NodeWorkflowRuntime` and friends). +* Address fixable costs (e.g. defer heavy work, lazy imports, avoid redundant file scans) without changing user-visible workflow semantics. **Acceptance criteria** -* `src/runtime/kernel/node-workflow-runtime.ts` is between 1000 and 1200 LoC after the split. -* `src/runtime/kernel/runtime-arg-parser.ts`, `runtime-event-emitter.ts`, `runtime-mock.ts` exist and own their respective concerns. -* `runtime-arg-parser.test.ts` exists with direct unit tests for the extracted helpers. -* `npm test` passes with no test changes other than possibly importing helpers from their new location. -* No `require("node:...")` calls inside class methods (they are replaced by top-of-file `import` statements as part of the mock extraction). -* The new modules have no circular imports back into `node-workflow-runtime.ts`. Dependency direction is one-way: orchestrator → helpers/emitter/mock. +* Documented repro (command + minimal file) and what was measured (time to first event / first step). +* Measurable reduction in the cold-start path on a representative case, or a clear justification if the lag is irreducible (e.g. external subprocess). +* `npm test` passes. *** diff --git a/design/2026-05-12-agent-proxy.md b/design/2026-05-12-agent-proxy.md new file mode 100644 index 00000000..a5d428e7 --- /dev/null +++ b/design/2026-05-12-agent-proxy.md @@ -0,0 +1,213 @@ +# agent-proxy — design doc + +*Phantom Token credential proxy for the jaiph Docker sandbox. Container holds only a placeholder; real credentials live on the host and never cross the sandbox boundary.* + +**Status:** design — ready for implementation +**Date (UTC):** 2026-05-12 + +## Problem + +jaiph's sandbox (`src/runtime/docker.ts`) deliberately drops every host credential channel: `SSH_*`, `GITHUB_TOKEN`, `GIT_*`, and anything outside the `JAIPH_/ANTHROPIC_/CURSOR_/CLAUDE_` env allowlist. Host `~/.ssh`, `~/.gitconfig`, and `~/.claude` are not mounted. Network egress is allowed by default. + +Container CLIs like `claude` therefore see no credentials and prompt `Not logged in`. Naively forwarding an API key as an env var would re-introduce exactly the exfiltration surface the sandbox was designed to remove — a prompt-injection attack on the agent could dump `process.env` at any time. + +```mermaid +flowchart LR + subgraph HOST["macOS / Linux host"] + KC[(Keychain / libsecret /
~/.claude/.credentials.json)] + end + subgraph CT["jaiph sandbox container"] + CC[claude CLI] + end + CC -. blocked .-> KC + CC -->|HTTPS allowed| API[api.anthropic.com] + API -.->|401 no auth| CC + linkStyle 0,2 stroke:#a40000,stroke-dasharray: 4 3; +``` + +## Design — Phantom Token proxy + +The container is given a *placeholder* credential (`ANTHROPIC_API_KEY=placeholder`) and a base URL pointing at a host proxy (`ANTHROPIC_BASE_URL=http://:3001`). The proxy strips the placeholder on every request and injects the real credential — API key or OAuth bearer — pulled from the host token store, before forwarding to `api.anthropic.com`. + +This is the **Phantom Token Pattern** (same shape as [NanoClaw](https://jonno.nz/posts/nanoclaw-architecture-masterclass-in-doing-less/)'s credential proxy). The container literally never holds the real secret — even a prompt-injection-driven env dump exfiltrates only the string `placeholder`. Secrets live in a plain in-memory object on the proxy, never in `process.env`. + +### Request flow — strip and inject + +```mermaid +sequenceDiagram + autonumber + participant CC as claude (in container) + participant Proxy as agent-proxy (host:3001) + participant TS as Host token store + participant API as api.anthropic.com + + CC->>Proxy: POST /v1/messages\nx-api-key: "placeholder" + Proxy->>Proxy: strip placeholder header + Proxy->>TS: read real credential + TS-->>Proxy: API key OR OAuth bearer (refresh if expired) + Proxy->>API: POST /v1/messages\nx-api-key: "sk-ant-..." OR Authorization: Bearer ... + API-->>Proxy: 200 stream (SSE) + Proxy-->>CC: 200 stream (SSE, passthrough) +``` + +### Lifecycle & discovery + +Stopped by default. The runtime starts the daemon on first sandbox launch, every concurrent sandbox reuses the same instance, and the daemon self-exits 15s after the last keepalive. Discovery is a single file `~/.jaiph/agent-proxy.json` carrying `{pid, address, port}`; the runtime reads it, verifies the PID is alive, and respawns if not. + +```mermaid +sequenceDiagram + autonumber + participant R as jaiph runtime + participant F as ~/.jaiph/agent-proxy.json + participant P as agent-proxy + participant CT as sandbox container + R->>F: read pid + port + alt missing or pid dead + R->>P: spawn (port 3001) + P->>F: write {pid, address, port} + end + R->>P: GET /healthz + P-->>R: 200 ok + R->>CT: launch with ANTHROPIC_BASE_URL=
: + loop while sandbox alive + R->>P: POST /keepalive + P-->>R: 204 + end + Note over P: idle 15s → exit, rm agent-proxy.json +``` + +### Healthcheck + +`GET /healthz` verifies token store reachable, credential valid (or refreshable), `api.anthropic.com` reachable. The runtime calls it before launching the container so auth failures surface up front, not deep inside a model request. + +### Cross-platform token source (OAuth mode) + +| Platform | Source | Reader | +|---|---|---| +| macOS | Keychain item `Claude Code-credentials` | `security find-generic-password -w` | +| Linux (GNOME/KDE) | libsecret via Secret Service | `secret-tool lookup ...` | +| Linux (headless) | `~/.claude/.credentials.json` | read file (with refresh) | + +> API-key mode skips this entirely — the runtime passes `JAIPH_PROXY_API_KEY` to the daemon at spawn, which loads it into the in-memory `SECRET` object and scrubs `process.env`. + +### Cross-platform bind address + +`ensure.ts` resolves both sides; the daemon is platform-agnostic. + +| Platform | Proxy binds to | Container reaches via | +|---|---|---| +| macOS / WSL2 | `127.0.0.1:3001` | `host.docker.internal:3001` (built-in) | +| Linux | `:3001` | that same IP — resolved at sandbox launch | + +## Codebase layout + +All new code in one directory under `src/agent-proxy/`; matches the jaiph code philosophy (short files, ≤3 files per feature). + +``` +src/agent-proxy/ +├── index.ts # daemon entry: HTTP server, /healthz, /keepalive, idle-exit +├── secrets.ts # cross-platform credential loader (API key + OAuth) +└── ensure.ts # runtime-side: read state, spawn if dead, probe, return endpoint + +e2e/tests/agent_proxy_*.bats # phantom-token, lifecycle, healthcheck, concurrency... +src/runtime/docker.ts # one new call site (see Wiring) +~/.jaiph/agent-proxy.json # discovery state file shared runtime ↔ proxy +``` + +### File responsibilities + +| File | Runs in | Responsibility | Public API | +|---|---|---|---| +| `index.ts` | spawned daemon | HTTP server, header strip + inject, write state file, idle-exit timer | CLI entry (no exports) | +| `secrets.ts` | spawned daemon | Read credential from Keychain / libsecret / file / env; refresh OAuth on expiry | `loadSecret()`, `refreshIfExpired()` | +| `ensure.ts` | jaiph runtime (host process) | Compute bind address, read state file, spawn daemon if not alive, probe `/healthz`, return endpoint | `ensureProxy(): Promise<{address, port}>`, `heartbeat()` | + +## Reference implementation + +Sketch of `src/agent-proxy/index.ts`. Daemon is platform-agnostic; the runtime tells it where to bind via env. Secrets loaded into `SECRET` (never `process.env`); placeholder stripped on every request. + +```ts +// src/agent-proxy/index.ts — daemon entry; started on demand by the runtime +import http from "node:http"; +import https from "node:https"; +import fs from "node:fs"; +import path from "node:path"; +import os from "node:os"; +import { loadSecret } from "./secrets.js"; + +const STATE = path.join(os.homedir(), ".jaiph", "agent-proxy.json"); +const BIND = process.env.JAIPH_PROXY_BIND || "127.0.0.1"; +const PORT = Number(process.env.JAIPH_PROXY_PORT) || 3001; +const IDLE_MS = 15_000; + +const SECRET = loadSecret(); // { mode: "apiKey" | "oauth", apiKey?, oauthToken? } + +function inject(headers) { + const h = { ...headers, host: "api.anthropic.com" }; + delete h["x-api-key"]; // strip placeholder + delete h["authorization"]; + if (SECRET.mode === "apiKey") h["x-api-key"] = SECRET.apiKey; + else h["authorization"] = `Bearer ${SECRET.oauthToken}`; + return h; +} + +let lastBeat = Date.now(); + +const server = http.createServer((req, res) => { + if (req.url === "/healthz") { return res.end("ok"); } + if (req.url === "/keepalive") { lastBeat = Date.now(); res.statusCode = 204; return res.end(); } + + const up = https.request({ + host: "api.anthropic.com", path: req.url, method: req.method, + headers: inject(req.headers), + }, upRes => { res.writeHead(upRes.statusCode, upRes.headers); upRes.pipe(res); }); + req.pipe(up); +}); + +server.listen(PORT, BIND, () => { + const { address, port } = server.address(); + fs.mkdirSync(path.dirname(STATE), { recursive: true }); + fs.writeFileSync(STATE, JSON.stringify({ pid: process.pid, address, port })); +}); + +setInterval(() => { + if (Date.now() - lastBeat > IDLE_MS) { + fs.rmSync(STATE, { force: true }); + process.exit(0); + } +}, 1000); +``` + +> Elided: OAuth refresh, error mapping, request-body re-streaming for retries, libsecret reader. + +## End-to-end tests + +`e2e/tests/agent_proxy_*.bats`, run as part of `npm run test:e2e`: + +- **Phantom token:** assert container env contains only `ANTHROPIC_API_KEY=placeholder`; capture outbound traffic from container, assert real key/token never appears. +- **Lifecycle:** launching a sandbox spawns the proxy and creates `agent-proxy.json`; stopping heartbeats causes exit + file removal within ~16s. +- **Concurrency:** two sandboxes launched in parallel share one proxy — port and PID unchanged across both. +- **Healthcheck:** `/healthz` returns 200 with a valid credential and 503 once the token is revoked / API key cleared. +- **Auth-mode switch:** proxy started in API-key mode and OAuth mode each pass an end-to-end model call from the container. +- **Token refresh:** force-expire the OAuth access token — the next request transparently refreshes via the host without the container noticing. +- **Streaming:** SSE response from `/v1/messages` arrives chunked in the container; no buffering at the proxy. +- **Platform matrix:** macOS (Keychain + 127.0.0.1), Linux GNOME (libsecret + docker0), Linux headless (file + docker0) all green in CI. + +## Wiring into the runtime + +`src/runtime/docker.ts` gains one call before container launch and one heartbeat loop alongside the existing sandbox lifecycle. No env allowlist change — `ANTHROPIC_API_KEY` and `ANTHROPIC_BASE_URL` already match the `ANTHROPIC_*` prefix. + +```ts +// src/runtime/docker.ts (sketch of the new call sites) +import { ensureProxy, heartbeat } from "../agent-proxy/ensure.js"; + +const { address, port } = await ensureProxy(); // spawns daemon if needed + +dockerArgs.push( + "--env", "ANTHROPIC_API_KEY=placeholder", + "--env", `ANTHROPIC_BASE_URL=http://${address}:${port}`, +); + +const beat = setInterval(() => heartbeat(), 5_000); // keepalive while container runs +container.on("exit", () => clearInterval(beat)); +``` diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index a023350a..0301b67c 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -10,7 +10,8 @@ GEM eventmachine (>= 0.12.9) http_parser.rb (~> 0) eventmachine (1.2.7) - ffi (1.17.3) + ffi (1.17.3-arm64-darwin) + ffi (1.17.3-x86_64-linux-gnu) forwardable-extended (2.6.0) http_parser.rb (0.8.1) i18n (1.14.8) @@ -67,6 +68,7 @@ GEM PLATFORMS arm64-darwin-23 + arm64-darwin-25 x86_64-linux DEPENDENCIES diff --git a/docs/architecture.md b/docs/architecture.md index 141f5735..8b8a9e2d 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -45,14 +45,18 @@ All orchestration — local `jaiph run`, `jaiph test`, and **Docker `jaiph run`* - **`emitScriptsForModule`** parses, runs **`validateReferences`**, and **`buildScriptFiles`** — the only compile path for `jaiph run` / `jaiph test` — **persists only atomic `script` files** under `scripts/`. Inline scripts (`` run `body`(args) ``) are also emitted as `scripts/__inline_` with deterministic hash-based names. There is no workflow-level bash emission. - **Node Workflow Runtime (`src/runtime/kernel/node-workflow-runtime.ts`)** - - `NodeWorkflowRuntime` interprets the AST directly: walks workflow steps, manages scope/variables, delegates prompt and script execution to kernel helpers, handles channels/inbox/dispatch, emits events, and writes run artifacts. + - `NodeWorkflowRuntime` interprets the AST directly: walks workflow steps, manages scope/variables, delegates prompt and script execution to kernel helpers, handles channels/inbox/dispatch, owns the frame stack and heartbeat, and writes run artifacts. + - Three sibling modules under `src/runtime/kernel/` carry concerns that used to live inline in the runtime file. Dependency direction is one-way (orchestrator → helpers/emitter/mock); no circular imports back. + - **`runtime-arg-parser.ts`** — stateless interpolation and call-argument parsing (`interpolate`, `parseInlineCaptureCall`, `commaArgsToInterpolated`, `parseArgsRaw`, `parseInlineScriptAt`, `parseManagedArgAt`, `parseArgTokens`, `stripOuterQuotes`, `parsePromptSchema`, `sanitizeName`, `nowIso`) plus shared constants and the `ParsedArgToken` / `PromptSchemaField` types. Direct unit tests live in `runtime-arg-parser.test.ts`. + - **`runtime-event-emitter.ts`** — `RuntimeEventEmitter` owns the `__JAIPH_EVENT__` stderr stream and `run_summary.jsonl` writes for workflow/step/prompt/log events, plus the monotonic step and prompt sequence counters. Constructed with `{ runId, runDir, env, getFrameStack, getAsyncIndices, suppressLiveEvents? }`; the runtime delegates all event emission to it. The optional `suppressLiveEvents` flag (forwarded from `NodeWorkflowRuntime`'s `suppressLiveEvents` option) skips the live stderr write while leaving the durable `run_summary.jsonl` append intact — used by in-process callers like the test runner that share stderr with `node --test` reporter output. The CLI's spawned `node-workflow-runner` child does not set it, so production runs stream events to stderr as before. + - **`runtime-mock.ts`** — `executeMockBodyDef` and `executeMockShellBody` for `*.test.jh` workflow/rule/script mocks. Shell-kind mocks run `bash -c`; steps-kind mocks dispatch back into the runtime via an `executeStepsBack` callback so the body runs against the full step interpreter. - `buildRuntimeGraph()` (`graph.ts`) loads reachable modules with **`parsejaiph` only** (import closure); it does **not** run `validateReferences`. Cross-module refs are resolved from that graph at runtime. - **Node Test Runner (`src/runtime/kernel/node-test-runner.ts`)** - Executes `*.test.jh` test blocks using `NodeWorkflowRuntime` with mock support (mock prompts, mock workflow/rule/script bodies). Pure Node harness — no Bash test transpilation. - **JS kernel (`src/runtime/kernel/`)** - - Prompt execution (`prompt.ts`), managed subprocess execution (`run-step-exec.ts`), streaming parse (`stream-parser.ts`), schema (`schema.ts`), mocks (`mock.ts`), **`emit.ts`** (live `__JAIPH_EVENT__` + `run_summary.jsonl`), **`workflow-launch.ts`** (spawn contract). + - Prompt execution (`prompt.ts`), streaming parse (`stream-parser.ts`), schema (`schema.ts`), mocks (`mock.ts`), **`emit.ts`** (live `__JAIPH_EVENT__` + `run_summary.jsonl`), **`workflow-launch.ts`** (spawn contract). Script subprocesses are launched directly from `NodeWorkflowRuntime`. - **Formatter (`src/format/emit.ts`)** - `jaiph format` rewrites `.jh` / `.test.jh` files into canonical style. Pure AST→text emitter; no side-effects beyond file writes. @@ -103,7 +107,7 @@ The runtime persists step captures and the event timeline under a UTC-dated hier run_summary.jsonl # durable event timeline ``` -Step sequence numbers are monotonic and unique per run: `NodeWorkflowRuntime` allocates them in memory when opening each step’s capture files (`%06d-.out|.err`). The standalone module `kernel/seq-alloc.ts` is a **file-backed** allocator (and CLI `node seq-alloc.js`) for tooling or non-kernel callers; the Node workflow runtime does **not** rely on a `.seq` file in the run directory for ordinary execution. +Step sequence numbers are monotonic and unique per run: `NodeWorkflowRuntime` allocates them in memory when opening each step’s capture files (`%06d-.out|.err`). There is no `.seq` file in the run directory. ## Channels and hooks in context @@ -111,7 +115,7 @@ Channels are validated at compile time (`validateReferences` / send RHS rules) a ## Test runner integration (`*.test.jh` in the kernel) -**How** `jaiph test` wires into the same stack as `jaiph run`: `*.test.jh` files are parsed in the CLI; `runTestFile()` drives blocks in-process. **`buildRuntimeGraph(testFile)`** is called **once per `runTestFile` invocation** and the resulting graph is reused across all blocks and `test_run_workflow` steps (the import closure is constant for a given test file within a single process run). Each `test_run_workflow` step resolves mocks against that cached graph, then constructs `NodeWorkflowRuntime` with `mockBodies` / mock prompt env. Mock prompts, workflows, rules, and scripts are supported through the runtime's mock infrastructure. +**How** `jaiph test` wires into the same stack as `jaiph run`: `*.test.jh` files are parsed in the CLI; `runTestFile()` drives blocks in-process. **`buildRuntimeGraph(testFile)`** is called **once per `runTestFile` invocation** and the resulting graph is reused across all blocks and `test_run_workflow` steps (the import closure is constant for a given test file within a single process run). Each `test_run_workflow` step resolves mocks against that cached graph, then constructs `NodeWorkflowRuntime` with `mockBodies` / mock prompt env, passing **`suppressLiveEvents: true`** so the in-process runtime's `__JAIPH_EVENT__` stderr writes are skipped (durable `run_summary.jsonl` writes are unaffected). Without this flag, every workflow event would print to the test process's stderr and swamp `node --test` reporter output. Mock prompts, workflows, rules, and scripts are supported through the runtime's mock infrastructure. Before that, the CLI prepares script executables via **`buildScripts(testFileAbs, tmpDir, workspaceRoot)`** — the same **`buildScripts`** helper as `jaiph run`, with the **test file as the entrypoint**. That walks the test module and its **import closure** (transitive `import` edges), runs **`validateReferences`** / **`emitScriptsForModule`** per reachable file, and writes `scripts/` so imported workflows have paths under `JAIPH_SCRIPTS`. Unrelated `*.jh` files elsewhere in the repo are not compiled unless imported. Authoring rules, fixtures, and mock syntax for `*.test.jh` are documented in [Testing](testing.md), not here. diff --git a/docs/cli.md b/docs/cli.md index 132aabde..ddd7c6a0 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -163,7 +163,7 @@ log response ### Failed run summary (stderr) -On non-zero exit, the CLI may print a footer with the path to `run_summary.jsonl`, `out:` / `err:` artifact paths, and `Output of failed step:` plus a trimmed excerpt. These are resolved from the **first** `STEP_END` object in the summary with `status` != 0, using `out_content` / `err_content` when present and otherwise the `out_file` / `err_file` fields. If no failed `STEP_END` is found, the CLI falls back to a run-directory artifact heuristic. +On non-zero exit, the CLI may print a footer with the path to `run_summary.jsonl`, `out:` / `err:` artifact paths, and `Output of failed step:` plus a trimmed excerpt. These are resolved from the **last** `STEP_END` object in the summary with `status` != 0, using `out_content` / `err_content` when present and otherwise the `out_file` / `err_file` fields (last matches terminal failure after `catch`/`ensure` retries and stray earlier failures). If no failed `STEP_END` is found, the CLI falls back to a run-directory artifact heuristic. In Docker mode, artifact paths recorded by the container use container-internal prefixes (`/jaiph/run/…`). The CLI remaps these to host paths and discovers the run directory from the bind-mounted runs directory by matching the `JAIPH_RUN_ID` in each `run_summary.jsonl` when the container meta file is inaccessible. This run-id-based lookup is safe under concurrent `jaiph run` invocations sharing the same runs directory. The failure summary therefore displays identically to local (no-sandbox) runs — same structure, same host-resolvable paths, same "Output of failed step" excerpt. See [Sandboxing — Path remapping](sandboxing.md#path-remapping). @@ -185,7 +185,7 @@ If a stream stays empty for a step, the runtime may omit that artifact file. Any ### Run summary (`run_summary.jsonl`) {#run-summary-jsonl} -Each run directory also contains `run_summary.jsonl`: one JSON object per line, appended in execution order. It is the canonical append-only record of runtime events (lifecycle, logs, inbox flow, and step boundaries). Tooling can tail the file by byte offset and process new lines idempotently; parallel inbox dispatch may reorder some events relative to wall-clock time, but each line is written atomically under the same lock used for concurrent writers (see [Inbox — Lock behavior](inbox.md#lock-behavior)). +Each run directory also contains `run_summary.jsonl`: one JSON object per line, appended in execution order. It is the canonical append-only record of runtime events (lifecycle, logs, inbox flow, and step boundaries). Tooling can tail the file by byte offset and process new lines idempotently. For a single run, lines follow execution order; inbox routes always drain **sequentially**, so inbox lifecycle events stay aligned with dispatch order. Summary lines are still appended atomically under a lock shared with other concurrent writers on the same run directory (for example `run async` branches appending step events). **Versioning.** Every object includes `event_version` (currently `1`). New fields may be added; consumers should tolerate unknown keys. @@ -194,10 +194,10 @@ Each run directory also contains `run_summary.jsonl`: one JSON object per line, **Correlation rules:** - **`run_id`:** same across all lines in a given run's file. -- **Workflow boundaries:** for each workflow name, `WORKFLOW_START` count equals `WORKFLOW_END` count. With `JAIPH_INBOX_PARALLEL=true`, lifecycle lines may interleave — use per-name counts, not a global stack. +- **Workflow boundaries:** for each workflow name, `WORKFLOW_START` count equals `WORKFLOW_END` count. - **Steps:** `STEP_START` and `STEP_END` share the same `id`. Use `parent_id`, `seq`, and `depth` to rebuild the tree. - **Inbox:** one `INBOX_ENQUEUE` per `send` with a unique `inbox_seq` (zero-padded, e.g. `001`). Each routed target gets one `INBOX_DISPATCH_START` and one `INBOX_DISPATCH_COMPLETE` sharing the same `inbox_seq`, `channel`, `target`, and `sender`. -- **Ordering under parallel inbox:** lines are valid JSONL (one object per line, atomic append). Wall-clock `ts` order may diverge from append order between concurrent branches. +- **Ordering:** lines are valid JSONL (one object per line, atomic append). Inbox dispatch is sequential; `ts` order matches dispatch order for inbox lifecycle events on a single run. **Event taxonomy (schema `event_version` 1):** @@ -432,7 +432,6 @@ These variables apply to `jaiph run` and workflow execution. Variables marked ** - `JAIPH_DEBUG` — set to `true` for debug tracing. - `JAIPH_RECURSION_DEPTH_LIMIT` — maximum recursion depth for workflows and rules (default: **256**). Exceeding this limit produces a runtime error. -- `JAIPH_INBOX_PARALLEL` — set to `true` for parallel dispatch of inbox route targets (overrides in-file `run.inbox_parallel`). See [Inbox](inbox.md). - `NO_COLOR` — disables colored output. **Non-TTY heartbeat:** diff --git a/docs/configuration.md b/docs/configuration.md index edce69b0..85dbf640 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -7,7 +7,7 @@ redirect_from: # Configuration -When you need the same workflow sources to behave differently on different machines, you separate **what the graph does** (rules, `prompt` / `script` / `run`, channels) from **operational knobs**: which LLM backend to use, where to write run logs, how inbox dispatch behaves, and how the CLI chooses host vs. Docker. Jaiph keeps the language stable and pushes those choices into **configuration** — in-file `config` blocks, environment variables, and defaults in the tool. +When you need the same workflow sources to behave differently on different machines, you separate **what the graph does** (rules, `prompt` / `script` / `run`, channels) from **operational knobs**: which LLM backend to use, where to write run logs and debug output, and how the CLI chooses host vs. Docker. Jaiph keeps the language stable and pushes those choices into **configuration** — in-file `config` blocks, environment variables, and defaults in the tool. Inbox dispatch order is defined by the language (sequential drain of route targets — see [Inbox & Dispatch](inbox.md)); it is not a configuration toggle. All execution is interpreted by the Node workflow runtime (`NodeWorkflowRuntime`): the AST, managed scripts, prompts, channels, inbox, and `.jaiph/runs` artifacts (see [Architecture](architecture.md)). Configuration only adjusts that stack; it does not change the workflow language or the compile graph. @@ -19,7 +19,7 @@ All execution is interpreted by the Node workflow runtime (`NodeWorkflowRuntime` Jaiph provides three configuration mechanisms. When the same key is set in more than one place, the highest-priority source wins: -1. **Environment variables** — highest priority. Includes `JAIPH_AGENT_*`, `JAIPH_RUNS_DIR`, `JAIPH_DEBUG`, `JAIPH_INBOX_PARALLEL`, `JAIPH_DOCKER_ENABLED`, other `JAIPH_DOCKER_*`, and `JAIPH_UNSAFE` (for Docker on/off, see [Sandboxing — Enabling Docker](sandboxing.md#enabling-docker)). Docker **enablement** is only controlled here — there is no `runtime.*` in-file key for that (removed; using it is a parse error with a migration message). +1. **Environment variables** — highest priority. Includes `JAIPH_AGENT_*`, `JAIPH_RUNS_DIR`, `JAIPH_DEBUG`, `JAIPH_DOCKER_ENABLED`, other `JAIPH_DOCKER_*`, and `JAIPH_UNSAFE` (for Docker on/off, see [Sandboxing — Enabling Docker](sandboxing.md#enabling-docker)). Docker **enablement** is only controlled here — there is no `runtime.*` in-file key for that (removed; using it is a parse error with a migration message). 2. **In-file `config { ... }` blocks** — at module scope and optionally inside a `workflow` body. 3. **Built-in defaults** — lowest priority, used when nothing else sets a value. @@ -134,7 +134,6 @@ These control runtime behavior unrelated to the agent. |-----|------|---------|--------------|-------------| | `run.logs_dir` | string | `.jaiph/runs` | `JAIPH_RUNS_DIR` | Step log directory. Relative paths are joined with the workspace root; absolute paths are used as-is. | | `run.debug` | boolean | `false` | `JAIPH_DEBUG` | Enables debug tracing for the run. | -| `run.inbox_parallel` | boolean | `false` | `JAIPH_INBOX_PARALLEL` | Dispatch inbox route targets concurrently. See [Inbox — Parallel dispatch](inbox.md#parallel-dispatch). | | `run.recover_limit` | integer | `10` | _(no env override)_ | Maximum number of retry attempts for `run … recover` loops before the step fails. See [Language — `recover`](language.md#recover--repair-and-retry-loop). | ### Module keys @@ -180,7 +179,7 @@ These configure Docker sandboxing. Unlike agent and run keys, runtime keys are r For **agent and run keys**, resolution order (highest wins): -1. **Environment** — `JAIPH_AGENT_*`, `JAIPH_RUNS_DIR`, `JAIPH_DEBUG`, `JAIPH_INBOX_PARALLEL`. When set, these lock the value for the entire process (see [Locked variables](#locked-variables)). +1. **Environment** — `JAIPH_AGENT_*`, `JAIPH_RUNS_DIR`, `JAIPH_DEBUG`. When set, these lock the value for the entire process (see [Locked variables](#locked-variables)). 2. **Workflow-level `config`** — overrides module values for the duration of that workflow. 3. **Module-level `config`** — applies to workflows that don't define their own block. 4. **Built-in defaults.** @@ -191,7 +190,7 @@ For **Docker enablement**, the `jaiph run` driver uses **`JAIPH_DOCKER_ENABLED` When `jaiph run` builds the runner environment, any of these environment variables already present in `process.env` gets a matching `${NAME}_LOCKED` flag set to `"1"`: -`JAIPH_AGENT_MODEL`, `JAIPH_AGENT_COMMAND`, `JAIPH_AGENT_BACKEND`, `JAIPH_AGENT_TRUSTED_WORKSPACE`, `JAIPH_AGENT_CURSOR_FLAGS`, `JAIPH_AGENT_CLAUDE_FLAGS`, `JAIPH_RUNS_DIR`, `JAIPH_DEBUG`, `JAIPH_INBOX_PARALLEL` +`JAIPH_AGENT_MODEL`, `JAIPH_AGENT_COMMAND`, `JAIPH_AGENT_BACKEND`, `JAIPH_AGENT_TRUSTED_WORKSPACE`, `JAIPH_AGENT_CURSOR_FLAGS`, `JAIPH_AGENT_CLAUDE_FLAGS`, `JAIPH_RUNS_DIR`, `JAIPH_DEBUG` Locked values cannot be overridden by module-level or workflow-level config — they are authoritative for the entire process. This is how environment variables always win in the precedence chain. @@ -320,7 +319,6 @@ Quick reference for all in-file keys and their environment variable equivalents: | `agent.claude_flags` | `JAIPH_AGENT_CLAUDE_FLAGS` | | `run.logs_dir` | `JAIPH_RUNS_DIR` | | `run.debug` | `JAIPH_DEBUG` | -| `run.inbox_parallel` | `JAIPH_INBOX_PARALLEL` | | `run.recover_limit` | _(no env override)_ | | `runtime.docker_image` | `JAIPH_DOCKER_IMAGE` | | `runtime.docker_network` | `JAIPH_DOCKER_NETWORK` | diff --git a/docs/contributing.md b/docs/contributing.md index 589848b5..a53f3fac 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -48,14 +48,14 @@ For day-to-day work on the compiler and CLI you usually stay inside the clone: i | `npm install` | Installs TypeScript and types (dev dependencies). | | `npm run build` | Runs `tsc`, then copies **`src/runtime`** → **`dist/src/runtime`** (kernel JS for the compiled CLI) and **`runtime/overlay-run.sh`** → **`dist/src/runtime/overlay-run.sh`** (Docker overlay entrypoint). | | `npm run build:standalone` | `npm run build`, then copies **`dist/src/runtime`** → **`dist/runtime`** and runs **`bun build --compile`** on `src/cli.ts` → **`dist/jaiph`**. Requires [Bun](https://bun.sh). Ship the **`dist/`** tree (binary plus the runtime directory) for a self-contained layout. | -| `npm test` | **`npm run clean`**, then **`npm run build`**, then the Node.js test runner with **`JAIPH_UNSAFE=true`**, **`NODE_OPTIONS`** including **`--enable-source-maps`** and a large heap limit, on `dist/test/*.test.js`, every file under `dist/src/` matching `*.test.js` or `*.acceptance.test.js` (via `find`), `dist/src/compiler-test-runner.js` (txtar compiler tests), and `dist/src/golden-ast-runner.js` (golden AST tests). | -| `npm run test:compiler` | **`npm run build`**, then **`node --test`** on `dist/src/compiler-test-runner.js` — runs txtar-based compiler test fixtures from `compiler-tests/`. | -| `npm run test:golden-ast` | **`npm run build`**, then **`node --test`** on `dist/src/golden-ast-runner.js` — runs golden AST tests from `golden-ast/`. Use `UPDATE_GOLDEN=1 npm run test:golden-ast` to regenerate goldens after intentional parser changes. | +| `npm test` | **`npm run clean`**, then **`npm run build`**, then the Node.js test runner with **`JAIPH_UNSAFE=true`**, **`NODE_OPTIONS`** including **`--enable-source-maps`** and a large heap limit, on every file under `dist/integration/` matching `*.test.js`, every file under `dist/src/` matching `*.test.js` or `*.acceptance.test.js` (via `find`), `dist/test-infra/compiler-test-runner.js` (txtar compiler tests), and `dist/test-infra/golden-ast-runner.js` (golden AST tests). | +| `npm run test:compiler` | **`npm run build`**, then **`node --test`** on `dist/test-infra/compiler-test-runner.js` — runs txtar-based compiler test fixtures from `test-fixtures/compiler-txtar/`. | +| `npm run test:golden-ast` | **`npm run build`**, then **`node --test`** on `dist/test-infra/golden-ast-runner.js` — runs golden AST tests from `test-fixtures/golden-ast/`. Use `UPDATE_GOLDEN=1 npm run test:golden-ast` to regenerate goldens after intentional parser changes. | | `npm run test:acceptance:compiler` | **`npm run build`**, then **`node --test`** on only `dist/src/**/*.acceptance.test.js` — compiler acceptance tests without the full unit suite or E2E. | | `npm run test:acceptance:runtime` | **`bash ./e2e/test_all.sh`** only — same E2E driver as below **without** an implicit rebuild; ensure `dist/` is up to date before running. | | `npm run test:acceptance` | **`npm run test:acceptance:compiler`** then **`npm run test:acceptance:runtime`**. | | `npm run test:e2e` | **`npm run build`**, then **`bash ./e2e/test_all.sh`**. Prefer this when you want a fresh `dist/` before E2E. By default this exercises the **Docker** sandbox when `JAIPH_UNSAFE` is unset. For a faster host-only run (no container), use **`JAIPH_UNSAFE=true npm run test:e2e`**. | -| `npm run test:samples` | **`npx playwright test`** — Playwright suite for the docs landing page (`tests/e2e-samples/`). Uses `http://127.0.0.1:4000` (see `playwright.config.ts`); starts Jekyll via `webServer` or reuses one already on that port. Requires Playwright (`npx playwright install chromium` once). | +| `npm run test:samples` | **`npx playwright test`** — Playwright suite for the docs landing page (`e2e/playwright/`). Uses `http://127.0.0.1:4000` (see `playwright.config.ts`); starts Jekyll via `webServer` or reuses one already on that port. Requires Playwright (`npx playwright install chromium` once). | | `npm run test:ci` | `npm test` followed by `npm run test:e2e` — useful before pushing when you want the full local picture. | Run a single Node test file after a build with e.g. `node --test dist/src/parse/parse-core.test.js`. The `dist/` paths mirror the source layout under `src/`. @@ -98,9 +98,9 @@ Jaiph uses several test layers. Each layer catches a different class of bug. Use | **Module tests** | `src/**/*.test.ts` (colocated) | Bugs in pure functions (event parsing, param formatting, path resolution, config merging) | The function is self-contained, takes input and returns output, no I/O | | **Compiler acceptance tests** | `src/transpile/*.acceptance.test.ts` (colocated) | Cross-module compiler behavior: validation errors, resolution, and other cases that need a temp project tree or subprocess | You need a deterministic error string, multi-file `buildScripts`, or behavior that does not fit a tiny golden snippet | | **Compiler golden tests** | `src/transpile/compiler-golden.test.ts` (colocated) | Regressions in the parser, validation messages, and scripts-only extraction (`buildScriptFiles` in `emit-script.ts`) — expectations are inline in the test file | You changed the parser, validator, or script extraction and need to lock an exact error string, extracted script shape, or corpus behavior | -| **Compiler tests (txtar)** | `compiler-tests/*.txt` | Parse and validate outcomes — success, parse errors, validation errors — using language-agnostic txtar fixtures (hundreds of `===` cases across the four `*.txt` files) | You want a portable test case that can be reused by alternative compiler implementations; the test is a `.jh` input paired with an expected outcome | -| **Golden AST tests** | `golden-ast/fixtures/*.jh` + `golden-ast/expected/*.json` | Parse tree shape for successful parses — serialized to deterministic JSON with locations stripped (9 fixtures: e.g. imports, brace-if, log, match and match-multiline, params, prompt-capture, run-ensure, script-defs) | You changed the parser and need to verify the AST structure hasn't drifted; txtar tests only check pass/fail, goldens lock in the actual tree shape | -| **Cross-cutting tests** | `test/*.test.ts` | Process-level integration behavior: signal handling, TTY rendering, run summary structure, sample builds | The test spans multiple modules or requires subprocess/PTY harnesses | +| **Compiler tests (txtar)** | `test-fixtures/compiler-txtar/*.txt` | Parse and validate outcomes — success, parse errors, validation errors — using language-agnostic txtar fixtures (hundreds of `===` cases across the four `*.txt` files) | You want a portable test case that can be reused by alternative compiler implementations; the test is a `.jh` input paired with an expected outcome | +| **Golden AST tests** | `test-fixtures/golden-ast/fixtures/*.jh` + `test-fixtures/golden-ast/expected/*.json` | Parse tree shape for successful parses — serialized to deterministic JSON with locations stripped (9 fixtures: e.g. imports, brace-if, log, match and match-multiline, params, prompt-capture, run-ensure, script-defs) | You changed the parser and need to verify the AST structure hasn't drifted; txtar tests only check pass/fail, goldens lock in the actual tree shape | +| **Integration tests** | `integration/*.test.ts`, `integration/sample-build/*.test.ts` | Process-level integration behavior: signal handling, TTY rendering, run summary structure, sample builds | The test spans multiple modules or requires subprocess/PTY harnesses | | **E2E tests** | `e2e/tests/*.sh` | Runtime behavior — does the workflow actually execute correctly end-to-end? | The behavior involves the CLI launcher, Node runtime, process lifecycle, or file artifacts | ### Key principles @@ -108,16 +108,16 @@ Jaiph uses several test layers. Each layer catches a different class of bug. Use 1. **Compile-time validation vs graph loading.** `buildScripts` / `emitScriptsForModule` run **`validateReferences`** before any script files are written. **`buildRuntimeGraph()`** only parses modules and follows imports — it does **not** re-run that validation. Lock compile errors in the compiler/validator tests; the runtime graph is the wrong layer for that (see [Architecture — Transpiler / Node workflow runtime](architecture.md#core-components)). 2. **Tests are behavior contracts.** E2E tests and acceptance tests define what the product does. Default approach: change production code to satisfy tests, not the other way around. 3. **Modify existing tests only with a strong reason:** intentional product behavior change, incorrect test expectation, or removal of an obsolete feature. Any such change should be minimal and paired with a clear rationale. -4. **Golden tests are the compiler's safety net.** After transpiler changes, run `npm test`. Failures in `src/transpile/compiler-golden.test.ts` usually mean updating an explicit expected string or fixture in that file — there is no separate dump script; align expectations with intentional emitter changes and re-run `npm test`. **Golden AST tests** (`golden-ast/`) complement this by locking in the parse tree shape — if those fail, regenerate with `UPDATE_GOLDEN=1 npm run test:golden-ast` and review the diff. +4. **Golden tests are the compiler's safety net.** After transpiler changes, run `npm test`. Failures in `src/transpile/compiler-golden.test.ts` usually mean updating an explicit expected string or fixture in that file — there is no separate dump script; align expectations with intentional emitter changes and re-run `npm test`. **Golden AST tests** (`test-fixtures/golden-ast/`) complement this by locking in the parse tree shape — if those fail, regenerate with `UPDATE_GOLDEN=1 npm run test:golden-ast` and review the diff. 5. **E2E tests assert two things independently:** what the user sees (CLI tree output via `e2e::expect_stdout`) and what the runtime persists (artifact files via `e2e::expect_out`, `e2e::expect_file`). A bug could break one without the other. 6. **Prefer the narrowest test layer.** A pure function bug should be caught by a unit test, not an E2E test. E2E tests are expensive to run and hard to debug — reserve them for integration-level behavior. ### TypeScript test layout - **Module tests** — live next to the source they validate under `src/` (e.g. `src/parse/parse-core.test.ts`, `src/cli/run/display.test.ts`, `src/transpile/compiler-golden.test.ts`). Names are `*.test.ts` or `*.acceptance.test.ts`. -- **Cross-cutting tests** — span multiple modules or need subprocess/PTY harnesses; they stay in `test/` (see [Cross-cutting tests in `test/`](#cross-cutting-tests-in-test)). +- **Integration tests** — span multiple modules or need subprocess/PTY harnesses; they live in `integration/` (see [Integration tests](#integration-tests)). - **E2E** — bash scripts in `e2e/tests/*.sh`, driven by `e2e/test_all.sh`. -- **`npm test`** discovers colocated files under `src/` and everything in `test/`; see the [Developing in the repository](#developing-in-the-repository) table for the exact command. +- **`npm test`** discovers colocated files under `src/`, integration tests under `integration/`, and test infrastructure in `test-infra/`; see the [Developing in the repository](#developing-in-the-repository) table for the exact command. ### Module test layout (colocated) @@ -140,18 +140,24 @@ find src -type f \( -name '*.test.ts' -o -name '*.acceptance.test.ts' \) | sort When adding a new source module or extending an existing one, create or extend the corresponding `*.test.ts` in the same directory. For kernel internals, the compile path, and artifact contracts, see [Architecture](architecture.md). -### Cross-cutting tests in `test/` +### Integration tests -Tests that span multiple modules, require subprocess/PTY harnesses, or exercise process-level behavior remain in `test/`. These do not belong to a single module: +Tests that span multiple modules, require subprocess/PTY harnesses, or exercise process-level behavior live in `integration/`. These do not belong to a single module: | Test file | Kind | What it covers | |-----------|------|----------------| -| `sample-build.test.ts` | Integration | Cross-module build/transpile/run-tree behavior using real compiler and CLI components | -| `run-summary-jsonl.test.ts` | Integration | Runs the CLI on a small workflow and asserts structure and fields of `run_summary.jsonl` under `.jaiph/runs/` | -| `signal-lifecycle.test.ts` | Acceptance | After SIGINT/SIGTERM, verifies `jaiph run` exits within a time bound and leaves no stale child processes | -| `tty-running-timer.test.ts` | Acceptance | In a TTY, verifies the “RUNNING workflow” line updates over time (requires Python 3 PTY harness) | - -Shared test data (`test/fixtures/`, `test/expected/`) also remains in `test/`. +| `integration/sample-build/build.test.ts` | Integration | Build/transpile behavior — `buildScripts`, `buildScriptFiles`, script extraction | +| `integration/sample-build/cli-tree.test.ts` | Integration | CLI tree output rendering for sample workflows | +| `integration/sample-build/run-core.test.ts` | Integration | Core runtime execution — workflow runs, step sequencing, artifacts | +| `integration/sample-build/run-prompt-agent.test.ts` | Integration | Prompt and agent interaction in sample workflows | +| `integration/sample-build/recover-handle.test.ts` | Integration | `recover` / `Handle` async behavior in sample workflows | +| `integration/sample-build/test-advanced.test.ts` | Integration | Advanced test harness behavior — mocks, channels, edge cases | +| `integration/sample-build/test-framework.test.ts` | Integration | Test framework basics — `mock prompt`, `expect_*`, test block lifecycle | +| `integration/run-summary-jsonl.test.ts` | Integration | Runs the CLI on a small workflow and asserts structure and fields of `run_summary.jsonl` under `.jaiph/runs/` | +| `integration/signal-lifecycle.test.ts` | Acceptance | After SIGINT/SIGTERM, verifies `jaiph run` exits within a time bound and leaves no stale child processes | +| `integration/tty-running-timer.test.ts` | Acceptance | In a TTY, verifies the “RUNNING workflow” line updates over time (requires Python 3 PTY harness) | + +The `integration/sample-build/` directory also has a shared `helpers.ts` module used by the sample-build tests. Shared test fixtures (`.jh` source files and expected output) live in `test-fixtures/sample-build/`. ## CI pipeline @@ -188,7 +194,7 @@ The Jekyll project lives entirely inside `docs/` — `Gemfile`, `_config.yml`, l ### Landing-page sample verification (Playwright) -After the Jekyll smoke-check, the CI job also verifies that code samples shown on the landing page match real CLI behavior. This uses Playwright (Chromium) with a test suite in `tests/e2e-samples/landing-page.spec.ts`. +After the Jekyll smoke-check, the CI job also verifies that code samples shown on the landing page match real CLI behavior. This uses Playwright (Chromium) with a test suite in `e2e/playwright/landing-page.spec.ts`. The test does two things: diff --git a/docs/getting-started.md b/docs/getting-started.md index 6b923a82..b1620d34 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -9,9 +9,9 @@ redirect_from: ## Overview -**Jaiph** is a workflow system for building agent-style pipelines. You write `.jh` sources (and optional `*.test.jh` test modules) that combine **prompts**, **rules**, **scripts**, and **workflows**. The project ships a **TypeScript CLI** and a **JavaScript kernel** under the Node workflow runtime: the same AST is **parsed and validated** at prepare time, **script** bodies are written as files under `scripts/`, and **execution** is direct AST interpretation in process—there is no separate workflow shell binary (see [Architecture](architecture.md) for boundaries, pipelines, and contracts such as `__JAIPH_EVENT__` and `.jaiph/runs/`). +**Jaiph** is a workflow system for building agent-style pipelines. You write `.jh` sources (and optional `*.test.jh` test modules) that combine **prompts**, **rules**, **scripts**, and **workflows**. -This page is a **map**: it does not teach syntax end-to-end; it points to install steps, language references, and runtime behavior. +This page is a **map**: it does not teach syntax end-to-end; it points to install steps, language references, and runtime behavior. For how the tool fits together, see [Architecture](architecture.md). ## Setup diff --git a/docs/grammar.md b/docs/grammar.md index 3ae513c4..c4c1f140 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -904,7 +904,7 @@ config_block = "config" "{" { config_line } "}" ; config_line = config_key "=" config_value ; config_key = "agent.default_model" | "agent.command" | "agent.backend" | "agent.trusted_workspace" | "agent.cursor_flags" | "agent.claude_flags" | "run.logs_dir" | "run.debug" - | "run.inbox_parallel" | "run.recover_limit" | "runtime.docker_image" | "runtime.docker_network" + | "run.recover_limit" | "runtime.docker_image" | "runtime.docker_network" | "runtime.docker_timeout_seconds" | "module.name" | "module.version" | "module.description" ; config_value = string | "true" | "false" | integer | string_array ; diff --git a/docs/inbox.md b/docs/inbox.md index ac762d96..7cea5fd9 100644 --- a/docs/inbox.md +++ b/docs/inbox.md @@ -60,9 +60,8 @@ channel name, and sender bound to its declared parameters `message`, `chan`, and - **Inbox is an event bus, not a filesystem watcher.** Delivery is driven by an explicit **drain** after the orchestrator workflow's steps finish — no `inotifywait`, no `fswatch`, no polling for new files. -- **Sequential by default, parallel opt-in.** For each queued message, route - targets run **in list order** unless `run.inbox_parallel = true` or - `JAIPH_INBOX_PARALLEL=true` (see [Parallel dispatch](#parallel-dispatch)). +- **Sequential dispatch.** For each queued message, route targets run **in list + order** (declaration order on the `channel` line), one completion at a time. - **Inbox is scoped per run.** Message files live under that run's **`inbox/`** directory; they are not a separate mailbox outside `.jaiph/runs`. - **Channels are compile-checked.** Unknown channels, bad route targets, and @@ -168,8 +167,7 @@ workflow default() { ``` **Multiple targets on one declaration** are comma-separated — they share one -route and dispatch in **declaration order** (or concurrently when parallel -dispatch is on): +route and dispatch in **declaration order**, sequentially: ```jh channel findings -> analyst, reviewer @@ -251,11 +249,9 @@ handling and `drainWorkflowQueue`. same queue and are processed in subsequent iterations. - For each message, look up targets for `channel` on **that** workflow's context. If there is no route, **skip** (silent drop). - - If there are targets, invoke each target, binding message, channel, and - sender to the target's 3 declared parameters — **sequentially** in - target-list order by default, or **all targets concurrently** via - `Promise.all` when `JAIPH_INBOX_PARALLEL=true` - (see [Ordering guarantees](#ordering-guarantees)). + - If there are targets, invoke each target **sequentially** in target-list + order, binding message, channel, and sender to the target's 3 declared + parameters (see [Ordering and sequence ids](#ordering-and-sequence-ids)). 6. Pop the workflow context and return. There is no `E_DISPATCH_DEPTH` / `JAIPH_INBOX_MAX_DISPATCH_DEPTH` check in @@ -269,61 +265,21 @@ There is no `E_DISPATCH_DEPTH` / `JAIPH_INBOX_MAX_DISPATCH_DEPTH` check in - **Sender identity** is the **current workflow name** from the context that performed the send (e.g. `researcher`), stable across modules. -## Parallel dispatch +### Ordering and sequence ids -When `run.inbox_parallel = true` is set in a `config` block (module or workflow -scope) or the environment sets `JAIPH_INBOX_PARALLEL=true`, **all targets listed -for a single message** are dispatched concurrently (via `Promise.all` in -`drainWorkflowQueue`) instead of awaiting each target in order. +Messages are handled **one at a time** in queue order (FIFO). For each message, +targets run **strictly in list order** on the `channel` line; the next message is +not processed until all targets for the current message have finished (success, or +fail-fast on the first non-zero exit). -**Precedence** matches the rest of Jaiph agent/run settings: an explicit -environment value wins over in-file config. See [Configuration — Defaults and precedence](configuration.md#defaults-and-precedence). - -```jh -config { - run.inbox_parallel = true -} - -channel findings -> analyst, reviewer # analyst and reviewer run in parallel - -workflow default() { - run producer() -} -``` - -### Ordering guarantees - -Messages are handled **one at a time** in queue order (FIFO). **Parallel mode** -only parallelizes **targets for the same message**; the next message is not -started until the current message's targets have all finished (`Promise.all` -completes). Within **sequential** mode, targets for that message run strictly in -list order. - -- **Non-determinism:** With `JAIPH_INBOX_PARALLEL=true`, the order in which - concurrent targets finish is undefined; only the per-message barrier is - guaranteed before the next message runs. - **Sequence ids:** Monotonic per run in the runtime (`inboxSeq`); message filenames use the same padded counter. -### Failure propagation - -In parallel mode, all targets for a message are awaited together. If any target -exits non-zero, the owning workflow fails after all concurrent targets complete -(analogous to `Promise.all` failure semantics vs sequential fail-fast). - -### Rollback - -To revert to sequential dispatch, remove `run.inbox_parallel = true` from config -or set `JAIPH_INBOX_PARALLEL=false` in the environment. Sequential mode is the -default. - ## Error semantics - **Undefined channel reference:** validation error `Channel "" is not defined`. -- **Dispatched workflow exits non-zero:** the owning workflow fails. In - **sequential** mode the first failing target stops further targets for that - message. In **parallel** mode all targets for that message are awaited, then - the run fails if any failed. +- **Dispatched workflow exits non-zero:** the owning workflow fails; the first + failing target stops further targets for that message (fail-fast). - **No route for a channel:** the message file and queue entry still exist, but dispatch **skips** that message (silent drop). This is intentional for optional subscribers; use a dedicated workflow if missing handlers should be an error. @@ -340,17 +296,17 @@ Routed receivers get three dispatch values bound to their declared parameters: | 2nd declared parameter | Channel name (e.g. `findings`) | | 3rd declared parameter | Sender name (the **workflow name** that performed the send) | -The environment variables `JAIPH_DISPATCH_CHANNEL` and `JAIPH_DISPATCH_SENDER` -are **not** set by `NodeWorkflowRuntime`; receivers get channel and sender via -their declared parameter names. +Receivers get channel and sender via their declared parameter names — +no environment-variable plumbing. - **`run_summary.jsonl`:** `NodeWorkflowRuntime` appends `INBOX_ENQUEUE`, `INBOX_DISPATCH_START`, and `INBOX_DISPATCH_COMPLETE` via `appendRunSummaryLine` (see [CLI — Run summary](cli.md#run-summary-jsonl)). `INBOX_DISPATCH_COMPLETE` includes `elapsed_ms`. For `INBOX_ENQUEUE` from `jaiph run`, the line includes `channel`, `sender`, and - `inbox_seq`. The full message body is always available on disk at - `inbox/NNN-.txt`. + `inbox_seq`. When a route consumes the channel, the full message body + is also written to `inbox/NNN-.txt` for audit; sends to + unrouted channels stay in the JSONL summary only. - **Calling a receiver with explicit args:** the CLI’s `jaiph run` only starts the file’s `default` workflow; extra CLI arguments are passed to `default` (see [CLI — `jaiph run`](cli.md#jaiph-run)). There is no `jaiph run diff --git a/docs/install b/docs/install index 85815f8f..a6be8c1a 100755 --- a/docs/install +++ b/docs/install @@ -108,6 +108,7 @@ print_step "Installing runtime to ${LIB_DIR}..." rm -rf "${LIB_DIR}" mkdir -p "${LIB_DIR}" cp -R "${tmp_dir}/src/dist/src" "${LIB_DIR}/src" +cp "${tmp_dir}/src/package.json" "${LIB_DIR}/package.json" cp "${tmp_dir}/src/docs/jaiph-skill.md" "${SKILL_TARGET}" print_step "Installing binary to ${TARGET}..." diff --git a/docs/jaiph-skill.md b/docs/jaiph-skill.md index e0104001..a92166d6 100644 --- a/docs/jaiph-skill.md +++ b/docs/jaiph-skill.md @@ -160,7 +160,7 @@ Conventions: - Inside a workflow, `run` targets a workflow or script (local or `alias.name`), not a raw shell command. Call scripts with `run`, never `fn args` or `$(fn ...)`. - Inside a rule, use `ensure` for **rules** and `run` for **scripts only** — not `prompt`, `send`, or `run async`. - Treat rules as non-mutating checks; perform filesystem or agent mutations in **workflows**. Script steps from rules use the same managed subprocess path as workflows. Details: [Sandboxing](sandboxing.md). -- **Parallelism:** `run async ref([args...])` for managed async with implicit join. For concurrent **bash**, use `&` and the shell builtin `wait` inside a **`script`** and call it with `run`. Do not call Jaiph internals from background subprocesses unless you understand `run.inbox_parallel` locking. +- **Parallelism:** `run async ref([args...])` for managed async with implicit join. For concurrent **bash**, use `&` and the shell builtin `wait` inside a **`script`** and call it with `run`. Do not call Jaiph internals from background subprocesses unless you understand how isolation and logging interact with the runtime. - **Shell conditions:** Express conditionals with `run` to a **script** and handle failure with `catch`, or use `if` / `match` for value branching. Short-circuit brace groups remain valid **inside `script`** bodies: `cmd || { ... }`. - **No shell redirection around managed calls:** `run foo() > file`, `run foo() | cmd`, `run foo() &` are all `E_PARSE` errors — shell operators (`>`, `>>`, `|`, `&`) are not supported adjacent to `run` or `ensure` steps. Move shell pipelines and redirections into a **`script`** block and call it with `run`. - **Script reuse:** Prefer `import script "./tool.py" as tool` (or a sibling `.jh` module) instead of maintaining ad-hoc bash outside the compiler. Avoid informal workspace-level shared-bash directories that bypass the module graph. diff --git a/docs/libraries.md b/docs/libraries.md index 31148f09..2f6d2bb8 100644 --- a/docs/libraries.md +++ b/docs/libraries.md @@ -78,9 +78,15 @@ Copies files from the **workspace** (or sandbox overlay) into the run’s `artif import "jaiphlang/artifacts" as artifacts workflow default() { - # Copy a file into the artifacts directory under a chosen name. - # Returns the absolute path of the saved artifact. - const path = run artifacts.save("./build/output.bin", "build-output.bin") + # Single file: + const path = run artifacts.save("./build/output.bin") + + # Or several files at once — newline-separated list of paths: + const paths = """ + a.txt + b/nested.txt + """ + const dests = run artifacts.save(paths) } ``` @@ -88,4 +94,4 @@ workflow default() { | Workflow | Description | |----------|-------------| -| `save(local_path, name)` | Requires `local_path` to be a **file**. Copies to `${JAIPH_ARTIFACTS_DIR}/${name}` (creates parent dirs). Returns the absolute destination path. | +| `save(paths)` | `paths` is a single file path or a **newline-separated** list of file paths. Each file is copied to `${JAIPH_ARTIFACTS_DIR}/…` using the same relative path (`./` prefix stripped; absolute sources use `basename` only). Returns the absolute destination path(s), one per line, in order. Fails if the list is empty or any file is missing. | diff --git a/docs/sandboxing.md b/docs/sandboxing.md index 8a1b201e..1e56ebfb 100644 --- a/docs/sandboxing.md +++ b/docs/sandboxing.md @@ -160,7 +160,7 @@ If `/dev/fuse` is missing on the host, the CLI uses **copy mode**: before launch **Workspace immutability contract** -- Docker runs cannot directly modify the host workspace. In overlay mode the host checkout is bind-mounted read-only and writes land in a tmpfs upper layer that is discarded on container exit. In copy mode the container writes to a separate host-side clone of the workspace (`/.sandbox-/`), which is removed on container exit unless explicitly kept for debugging. In both modes the only persistence channel from a Docker run to the host is the run-artifacts directory (`/jaiph/run` → host `.jaiph/runs`). Non-Docker (local) runs are unaffected by this contract. -**Workspace patch export** -- To capture workspace changes as a patch, run `git diff` (or your own exporter) inside the workflow, write the result to a file under the workspace, then call `artifacts.save(local_path, name)` so the patch lands in the run’s `artifacts/` tree on the host. Callers choose when and what to record. The published GHCR runtime image includes `git` if you use it from a script step. See [Libraries — `jaiphlang/artifacts`](libraries.md#jaiphlangartifacts--publishing-files-out-of-the-sandbox). +**Workspace patch export** -- To capture workspace changes as a patch, run `git diff` (or your own exporter) inside the workflow, write the result to a file under the workspace, then call `artifacts.save(local_path)` so the patch lands in the run’s `artifacts/` tree on the host. Callers choose when and what to record. The published GHCR runtime image includes `git` if you use it from a script step. See [Libraries — `jaiphlang/artifacts`](libraries.md#jaiphlangartifacts--publishing-files-out-of-the-sandbox). **Network** -- `"default"` omits `--network`, which uses Docker's default bridge network (outbound access allowed). `"none"` passes `--network none` and fully disables networking -- use this for workflows that should not make external calls. Any other value (e.g. a custom Docker network name) is passed through as-is. Set `runtime.docker_network` in config or `JAIPH_DOCKER_NETWORK` in the environment. diff --git a/docs/testing.md b/docs/testing.md index 73b70921..3d7aa366 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -232,13 +232,13 @@ For each workflow run inside a test block, the harness builds the runtime enviro | Variable | Value | |---|---| -| `JAIPH_TEST_MODE` | `1` | +| `JAIPH_TEST_MODE` | `1` (selects mock prompt dispatch in `prompt.ts`) | | `JAIPH_WORKSPACE` | Project root (from `detectWorkspaceRoot`) | | `JAIPH_RUNS_DIR` | Per test block, `…/tmp/jaiph-test-block-*/.jaiph/runs` (ephemeral) | | `JAIPH_SCRIPTS` | Directory containing extracted `script` files from `buildScripts` (temp) | | `JAIPH_MOCK_RESPONSES_FILE` or `JAIPH_MOCK_DISPATCH_SCRIPT` | Set by the runner when using inline or block `mock prompt` (do not set manually) | -You do not set `JAIPH_TEST_MODE` yourself; the harness manages it. +You do not set `JAIPH_TEST_MODE` yourself; the harness manages it. Its only purpose is to route prompt steps to the mock dispatcher in `prompt.ts`. It no longer controls `__JAIPH_EVENT__` stderr suppression — the test runner now passes `suppressLiveEvents: true` directly to the in-process `NodeWorkflowRuntime` constructor so test reporter output stays clean. Durable `run_summary.jsonl` writes are unaffected; production runs (`jaiph run` via the spawned `node-workflow-runner` child) do not set the flag and stream events to stderr as before. ## Organizing tests @@ -263,7 +263,7 @@ test "default workflow prints greeting" { Compiler tests verify parse and validate outcomes using a language-agnostic txtar format. Unlike the TypeScript-embedded tests in `src/`, these fixtures are plain text files that can be reused by alternative implementations (e.g. a Rust compiler). -Test fixture files live in `compiler-tests/` as `.txt` files. Each file contains multiple test cases separated by `===` delimiters: +Test fixture files live in `test-fixtures/compiler-txtar/` as `.txt` files. Each file contains multiple test cases separated by `===` delimiters: ``` === test name here @@ -309,7 +309,7 @@ The entry file is determined by priority: `main.jh` if present, otherwise `input npm run test:compiler ``` -The runner discovers all `.txt` files in `compiler-tests/`, parses them, writes virtual files to a temp directory per case, runs `parsejaiph` + `validateReferences`, and asserts the expected outcome. Results are reported per test case via `node:test`. Compiler tests are also included in `npm test`. +The runner (`test-infra/compiler-test-runner.ts`) discovers all `.txt` files in `test-fixtures/compiler-txtar/`, parses them, writes virtual files to a temp directory per case, runs `parsejaiph` + `validateReferences`, and asserts the expected outcome. Results are reported per test case via `node:test`. Compiler tests are also included in `npm test`. ### Fixture files @@ -317,10 +317,10 @@ Test cases are organized by error type and single-vs-multi-module: | File | Cases | What it covers | |------|-------|----------------| -| `compiler-tests/valid.txt` | 119 | Success cases — source compiles without error (single-module) | -| `compiler-tests/parse-errors.txt` | 274 | `E_PARSE` error cases — syntax and grammar violations | -| `compiler-tests/validate-errors.txt` | 88 | `E_VALIDATE`, `E_IMPORT_NOT_FOUND`, `E_SCHEMA` error cases (single-module) | -| `compiler-tests/validate-errors-multi-module.txt` | 20 | Validation errors requiring imports (multi-file) | +| `test-fixtures/compiler-txtar/valid.txt` | 119 | Success cases — source compiles without error (single-module) | +| `test-fixtures/compiler-txtar/parse-errors.txt` | 274 | `E_PARSE` error cases — syntax and grammar violations | +| `test-fixtures/compiler-txtar/validate-errors.txt` | 88 | `E_VALIDATE`, `E_IMPORT_NOT_FOUND`, `E_SCHEMA` error cases (single-module) | +| `test-fixtures/compiler-txtar/validate-errors-multi-module.txt` | 20 | Validation errors requiring imports (multi-file) | (Counts are one `# @expect` per test case; re-count after large fixture changes.) @@ -332,7 +332,7 @@ The initial cases were extracted from TypeScript test files across `src/parse/*. - Test names should be descriptive and unique within a file. - Keep test cases minimal — only include what is necessary to trigger the expected outcome. -The format is documented in detail in `compiler-tests/README.md`. +The format is documented in detail in `test-fixtures/compiler-txtar/README.md`. ## Golden AST tests @@ -340,7 +340,7 @@ Golden AST tests verify that the parser produces the expected tree shape for suc ### How it works -Each `.jh` fixture in `golden-ast/fixtures/` is parsed and serialized to deterministic JSON (locations and file paths stripped, keys sorted). The result is compared against a checked-in `.json` golden file in `golden-ast/expected/`. +Each `.jh` fixture in `test-fixtures/golden-ast/fixtures/` is parsed and serialized to deterministic JSON (locations and file paths stripped, keys sorted). The result is compared against a checked-in `.json` golden file in `test-fixtures/golden-ast/expected/`. - **Txtar tests** = error messages and "this compiles." - **Golden AST tests** = parse tree shape for successful parses. @@ -366,17 +366,17 @@ Review the diff to confirm the changes are expected, then commit the updated `.j ### Adding a new fixture -1. Create a small, focused `.jh` file in `golden-ast/fixtures/` (one concern per file). -2. Run `UPDATE_GOLDEN=1 npm run test:golden-ast` to generate `golden-ast/expected/.json`. +1. Create a small, focused `.jh` file in `test-fixtures/golden-ast/fixtures/` (one concern per file). +2. Run `UPDATE_GOLDEN=1 npm run test:golden-ast` to generate `test-fixtures/golden-ast/expected/.json`. 3. Review the generated JSON and commit both files. ## Stress and soak testing -For concurrency-sensitive behavior (for example parallel inbox dispatch), the repository includes shell-based E2E scenarios that go beyond single native tests: +For concurrency-sensitive behavior (for example inbox stress with many sends and route targets, or `run async` with interleaved managed steps), the repository includes shell-based E2E scenarios that go beyond single native tests: -- High volume and fan-out to exercise locking and dispatch under concurrent writes. +- High volume and fan-out to exercise locking and dispatch under concurrent writes to the same run directory. - Soak loops to flush out intermittent failures. -- Order-insensitive checks (counts, uniqueness) when parallel work makes ordering non-deterministic. +- Order-insensitive checks (counts, uniqueness) when concurrent work makes ordering non-deterministic for the surface under test (for example async branch completion in the progress tree). See `e2e/tests/91_inbox_dispatch.sh`, `e2e/tests/93_inbox_stress.sh`, and `e2e/tests/94_parallel_shell_steps.sh` for examples. @@ -405,7 +405,7 @@ Similarly, every `.jh` and `.test.jh` file under `examples/` must be accounted f ## Landing-page sample verification -The project includes a Playwright-based test (`tests/e2e-samples/landing-page.spec.ts`) that verifies landing-page code samples stay in sync with real CLI behavior. Run it with `npm run test:samples`. See [Contributing — Landing-page sample verification](contributing.md#landing-page-sample-verification-playwright) for details. +The project includes a Playwright-based test (`e2e/playwright/landing-page.spec.ts`) that verifies landing-page code samples stay in sync with real CLI behavior. Run it with `npm run test:samples`. See [Contributing — Landing-page sample verification](contributing.md#landing-page-sample-verification-playwright) for details. ## Limitations (v1) diff --git a/e2e/lib/common.sh b/e2e/lib/common.sh index 2fadb35c..8b7dd080 100644 --- a/e2e/lib/common.sh +++ b/e2e/lib/common.sh @@ -418,17 +418,20 @@ e2e::prepare_shared_context() { fi mkdir -p "${JAIPH_E2E_BIN_DIR}" "${JAIPH_E2E_WORK_DIR}" + # Agent/nested jaiph sessions export many JAIPH_* variables (including *_LOCKED). + # E2E must start from a clean contract; `unset` individual keys is insufficient. + local _jaiph_var + while IFS= read -r _jaiph_var; do + case "${_jaiph_var}" in + JAIPH_E2E_* | JAIPH_REPO_URL | JAIPH_REPO_REF) continue ;; + esac + unset "${_jaiph_var}" 2>/dev/null || true + done < <(compgen -e | grep '^JAIPH_' || true) + export PATH="${JAIPH_E2E_BIN_DIR}:${PATH}" export JAIPH_BIN_DIR="${JAIPH_E2E_BIN_DIR}" # Docker sandbox is opt-in (beta); keep it disabled for e2e tests. export JAIPH_DOCKER_ENABLED="${JAIPH_DOCKER_ENABLED:-false}" - # Keep e2e deterministic by removing user/machine agent overrides. - unset JAIPH_AGENT_MODEL - unset JAIPH_AGENT_COMMAND - unset JAIPH_AGENT_BACKEND - unset JAIPH_AGENT_TRUSTED_WORKSPACE - unset JAIPH_AGENT_CURSOR_FLAGS - unset JAIPH_AGENT_CLAUDE_FLAGS if [[ -z "${JAIPH_REPO_URL:-}" ]]; then export JAIPH_REPO_URL="${E2E_REPO_ROOT}" diff --git a/tests/e2e-samples/docs-site.ts b/e2e/playwright/docs-site.ts similarity index 100% rename from tests/e2e-samples/docs-site.ts rename to e2e/playwright/docs-site.ts diff --git a/tests/e2e-samples/landing-page.spec.ts b/e2e/playwright/landing-page.spec.ts similarity index 100% rename from tests/e2e-samples/landing-page.spec.ts rename to e2e/playwright/landing-page.spec.ts diff --git a/e2e/test_all.sh b/e2e/test_all.sh index 657fb502..114c6a7a 100755 --- a/e2e/test_all.sh +++ b/e2e/test_all.sh @@ -81,8 +81,13 @@ TEST_SCRIPTS=( "e2e/tests/126_file_shorthand_routing.sh" "e2e/tests/127_cli_edge_cases.sh" "e2e/tests/128_examples_format_check.sh" + "e2e/tests/128_if_statement.sh" + "e2e/tests/129_artifacts_lib.sh" "e2e/tests/130_run_recover_loop.sh" + "e2e/tests/131_tty_async_progress.sh" + "e2e/tests/132_return_log_inline_script.sh" "e2e/tests/133_return_bare_identifier.sh" + "e2e/tests/134_script_imports.sh" ) PASS_COUNT=0 diff --git a/e2e/tests/112_interpreter_tags.sh b/e2e/tests/112_interpreter_tags.sh index 1c64c1bf..913bee34 100755 --- a/e2e/tests/112_interpreter_tags.sh +++ b/e2e/tests/112_interpreter_tags.sh @@ -108,7 +108,7 @@ if run_out="$(e2e::run "bad_tag.jh" 2>&1)"; then e2e::fail "expected compile error for unknown tag, but run succeeded" else # nondeterministic: error includes absolute file path prefix which varies - e2e::assert_contains "${run_out}" 'script:lang syntax is no longer supported' "unknown tag produces actionable error" + e2e::assert_contains "${run_out}" 'unsupported top-level statement: script:golang' "unknown tag produces parse error" fi # ---------- script:node with manual shebang: compile error ---------- diff --git a/e2e/tests/129_artifacts_lib.sh b/e2e/tests/129_artifacts_lib.sh index 2169a2b9..5eb39bcf 100755 --- a/e2e/tests/129_artifacts_lib.sh +++ b/e2e/tests/129_artifacts_lib.sh @@ -22,7 +22,7 @@ e2e::file "artifacts_e2e.jh" <<'EOF' import "jaiphlang/artifacts" as artifacts workflow default() { - const save_path = run artifacts.save("./build_output.txt", "saved-output.txt") + const save_path = run artifacts.save("./build_output.txt") log save_path } EOF @@ -36,8 +36,8 @@ e2e::assert_contains "${artifacts_out}" "PASS" "output contains PASS" run_dir="$(e2e::run_dir "artifacts_e2e.jh")" artifacts_dir="${run_dir}artifacts" -e2e::assert_file_exists "${artifacts_dir}/saved-output.txt" "saved artifact exists" -saved_content="$(<"${artifacts_dir}/saved-output.txt")" +e2e::assert_file_exists "${artifacts_dir}/build_output.txt" "saved artifact exists" +saved_content="$(<"${artifacts_dir}/build_output.txt")" e2e::assert_equals "${saved_content}" "build-output-content" "saved artifact content matches source" e2e::pass "artifacts save" diff --git a/e2e/tests/134_script_imports.sh b/e2e/tests/134_script_imports.sh new file mode 100755 index 00000000..a97d2f79 --- /dev/null +++ b/e2e/tests/134_script_imports.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +source "${ROOT_DIR}/e2e/lib/common.sh" +trap e2e::cleanup EXIT + +HAS_PYTHON3=0 +command -v python3 >/dev/null 2>&1 && HAS_PYTHON3=1 + +# --------------------------------------------------------------------------- +e2e::section "import script: shell script via run" +# --------------------------------------------------------------------------- + +e2e::prepare_test_env "script_import_shell" + +e2e::file "greet.sh" <<'EOF' +#!/usr/bin/env bash +echo "hello from imported shell" +EOF +chmod +x "${JAIPH_E2E_TEST_DIR}/greet.sh" + +e2e::file "main_shell.jh" <<'EOF' +import script "./greet.sh" as greet + +workflow default() { + run greet() +} +EOF + +shell_out="$(e2e::run "main_shell.jh")" + +e2e::expect_stdout "${shell_out}" <<'EOF' + +Jaiph: Running main_shell.jh + +workflow default + ▸ script greet + ✓ script greet (