From 9f8cfebbcb7a8b78fd2e2191697ef9be15883446 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Thu, 21 May 2026 10:59:08 +0800 Subject: [PATCH 1/6] =?UTF-8?q?bench(eval):=20v0.2.5815=20cross-corpus=20h?= =?UTF-8?q?ead-to-head=20=E2=80=94=20codedb=20vs=20codegraph=20vs=20lean-c?= =?UTF-8?q?tx=20(2026-05-21)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-corpus search-latency runs against the released v0.2.5815 binary (/opt/homebrew/bin/codedb, SHA 51164cf9…e687d25f) on three corpora: - react (6,620 files) — runs 1 and 2 for stability - regex (285 files) - flask (127 files) Backends compared (default tools): - codedb_search (MCP) - codegraph_search (codegraph 0.7.10 MCP, `codegraph serve --mcp`) - lean-ctx grep (lean-ctx 3.6.9 CLI, per-call spawn) - SQLite FTS5 trigram + unicode61 (inverted-index baselines) Two outliers from prior RESULTS.md are gone on this binary: - xyzzy_react_does_not_exist (negative) 113 ms → 0.07 ms (~1,600×) - flushPassiveEffects (rare camelcase) 167 ms → 0.15 ms (~1,100×) - cold build (react, 6,620 files) 12.1 s → 1.18 s (~10×) codedb wins 13/15 react warm queries vs codegraph. codegraph wins on the two highest-frequency stress queries (`function`, `set`) where codedb falls back to a slower path on >5k hits. Headline numbers and the per-task Sonnet 4.6 agentic eval are now in the v0.2.5815 release notes: https://github.com/justrach/codedb/releases/tag/v0.2.5815 Follow-up: wire codegraph backend into shootout.py multi-session launcher (currently runs only codedb / fts5 / lean-ctx; codegraph results in this commit were collected via a sibling harness). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../results/2026-05-21/flask-run1.md | 43 ++++++ .../results/2026-05-21/react-run1.md | 43 ++++++ .../results/2026-05-21/react-run2.md | 41 ++++++ .../results/2026-05-21/regex-run1.md | 43 ++++++ .../results/2026-05-21/run.log | 138 ++++++++++++++++++ 5 files changed, 308 insertions(+) create mode 100644 benchmarks/search-shootout/results/2026-05-21/flask-run1.md create mode 100644 benchmarks/search-shootout/results/2026-05-21/react-run1.md create mode 100644 benchmarks/search-shootout/results/2026-05-21/react-run2.md create mode 100644 benchmarks/search-shootout/results/2026-05-21/regex-run1.md create mode 100644 benchmarks/search-shootout/results/2026-05-21/run.log diff --git a/benchmarks/search-shootout/results/2026-05-21/flask-run1.md b/benchmarks/search-shootout/results/2026-05-21/flask-run1.md new file mode 100644 index 0000000..1eba24e --- /dev/null +++ b/benchmarks/search-shootout/results/2026-05-21/flask-run1.md @@ -0,0 +1,43 @@ +# search-shootout — flask + +**Date:** 2026-05-21 10:29 +**Corpus:** `/Users/blackfloofie/codedb-bench/flask` +**Indexed files:** 127 +**Corpus bytes:** 0.6 MB +**Iterations:** 20 warm + +## Build phase + +| Backend | Cold index time | On-disk size | +|---|---|---| +| fts5_trigram | 0.04s | 2.3 MB | +| fts5_unicode61 | 0.01s | 0.9 MB | +| codedb | 0.05s | 3.6 MB | +| lean-ctx | 8.29s | — | +| codegraph | 0.58s | 3.7 MB | + +## Query latency (warm, ms) + +> codedb: MCP stdio (one server, many calls). +> fts5_*: persistent SQLite connection. +> lean-ctx: per-call CLI spawn (includes ~700ms binary startup). +> Hit counts are NOT directly comparable across backends — use them as recall sanity (zero vs non-zero). + +| query | kind | fts5_tri p50 | fts5_tri p99 | fts5_tri hits | fts5_uni p50 | fts5_uni p99 | fts5_uni hits | codedb p50 | codedb p99 | codedb hits | leanctx p50 | leanctx p99 | leanctx hits | codegraph p50 | codegraph p99 | codegraph hits | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| `useState` | common-identifier | 0.09 | 0.14 | 0 | 0.01 | 0.02 | 0 | 0.66 | 1.39 | 0 | 449.06 | 465.16 | 0 | 0.70 | 1.60 | 0 | +| `useEffect` | common-identifier | 0.01 | 0.01 | 0 | 0.00 | 0.01 | 0 | 0.05 | 0.08 | 0 | 451.48 | 472.32 | 0 | 0.62 | 0.80 | 0 | +| `forwardRef` | camelcase-identifier | 0.01 | 0.02 | 0 | 0.00 | 0.00 | 0 | 0.04 | 0.07 | 0 | 453.87 | 469.16 | 0 | 0.58 | 0.76 | 0 | +| `createElement` | camelcase-identifier | 0.02 | 0.03 | 0 | 0.00 | 0.01 | 0 | 0.05 | 0.08 | 0 | 453.36 | 466.15 | 0 | 0.68 | 0.83 | 0 | +| `Fiber` | substring-identifier | 0.01 | 0.01 | 0 | 0.00 | 0.00 | 0 | 0.04 | 0.09 | 0 | 451.23 | 467.81 | 0 | 0.65 | 0.78 | 0 | +| `Lane` | substring-identifier | 0.01 | 0.01 | 0 | 0.00 | 0.01 | 0 | 0.05 | 0.08 | 0 | 452.00 | 470.30 | 0 | 0.64 | 0.84 | 0 | +| `Suspense` | substring-identifier | 0.00 | 0.01 | 0 | 0.00 | 0.01 | 0 | 0.06 | 0.08 | 0 | 448.85 | 456.00 | 0 | 0.66 | 0.77 | 0 | +| `flushPassiveEffects` | rare-camelcase | 0.00 | 0.01 | 0 | 0.00 | 0.00 | 0 | 0.04 | 0.10 | 0 | 450.76 | 475.44 | 0 | 0.64 | 0.75 | 0 | +| `enableTransitionTracing` | rare-flag | 0.05 | 0.05 | 0 | 0.00 | 0.01 | 0 | 0.05 | 0.08 | 0 | 451.71 | 463.39 | 0 | 0.76 | 1.03 | 0 | +| `scheduleCallback` | camelcase-identifier | 0.04 | 0.13 | 0 | 0.00 | 0.01 | 0 | 0.05 | 0.10 | 0 | 448.54 | 467.48 | 0 | 0.64 | 0.79 | 0 | +| `concurrent` | lowercase-word | 0.02 | 0.03 | 2 | 0.00 | 0.00 | 1 | 0.07 | 0.14 | 10 | 457.91 | 474.26 | 11 | 0.15 | 0.30 | 2 | +| `function` | lang-keyword | 0.03 | 0.04 | 23 | 0.00 | 0.00 | 20 | 0.10 | 0.19 | 20 | 449.05 | 471.18 | 20 | 1.38 | 1.88 | 105 | +| `set` | short-trigram-exact | 0.00 | 0.01 | 46 | 0.00 | 0.01 | 29 | 0.10 | 0.22 | 20 | 437.50 | 457.94 | 20 | 0.35 | 0.49 | 70 | +| `ReactDOMRoot` | rare-camelcase | 0.02 | 0.03 | 0 | 0.00 | 0.00 | 0 | 0.05 | 0.17 | 0 | 440.40 | 455.97 | 0 | 0.64 | 0.82 | 0 | +| `xyzzy_react_does_not_exist` | negative | 0.01 | 0.01 | 0 | 0.00 | 0.00 | 0 | 0.06 | 0.09 | 0 | 444.86 | 453.83 | 0 | 0.57 | 0.69 | 0 | + diff --git a/benchmarks/search-shootout/results/2026-05-21/react-run1.md b/benchmarks/search-shootout/results/2026-05-21/react-run1.md new file mode 100644 index 0000000..d7dac6f --- /dev/null +++ b/benchmarks/search-shootout/results/2026-05-21/react-run1.md @@ -0,0 +1,43 @@ +# search-shootout — react + +**Date:** 2026-05-21 10:21 +**Corpus:** `/Users/blackfloofie/codedb-bench/react` +**Indexed files:** 6,620 +**Corpus bytes:** 26.5 MB +**Iterations:** 20 warm + +## Build phase + +| Backend | Cold index time | On-disk size | +|---|---|---| +| fts5_trigram | 2.06s | 93.5 MB | +| fts5_unicode61 | 0.44s | 36.3 MB | +| codedb | 1.18s | 69.5 MB | +| lean-ctx | 8.25s | — | +| codegraph | 15.12s | 195.5 MB | + +## Query latency (warm, ms) + +> codedb: MCP stdio (one server, many calls). +> fts5_*: persistent SQLite connection. +> lean-ctx: per-call CLI spawn (includes ~700ms binary startup). +> Hit counts are NOT directly comparable across backends — use them as recall sanity (zero vs non-zero). + +| query | kind | fts5_tri p50 | fts5_tri p99 | fts5_tri hits | fts5_uni p50 | fts5_uni p99 | fts5_uni hits | codedb p50 | codedb p99 | codedb hits | leanctx p50 | leanctx p99 | leanctx hits | codegraph p50 | codegraph p99 | codegraph hits | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| `useState` | common-identifier | 0.88 | 1.03 | 674 | 0.02 | 0.02 | 674 | 1.85 | 2.02 | 20 | 470.90 | 491.82 | 20 | 2.42 | 3.34 | 65 | +| `useEffect` | common-identifier | 0.40 | 0.44 | 434 | 0.01 | 0.02 | 426 | 1.02 | 1.19 | 20 | 464.99 | 485.16 | 20 | 5.89 | 6.72 | 101 | +| `forwardRef` | camelcase-identifier | 0.18 | 0.21 | 129 | 0.01 | 0.01 | 127 | 0.25 | 0.32 | 20 | 468.88 | 486.10 | 20 | 2.16 | 3.11 | 55 | +| `createElement` | camelcase-identifier | 1.02 | 1.08 | 403 | 0.01 | 0.02 | 401 | 0.92 | 1.01 | 20 | 502.85 | 516.09 | 20 | 1.61 | 2.30 | 189 | +| `Fiber` | substring-identifier | 0.13 | 0.17 | 303 | 0.01 | 0.01 | 180 | 0.35 | 0.44 | 20 | 528.84 | 570.11 | 20 | 3.22 | 3.81 | 151 | +| `Lane` | substring-identifier | 0.06 | 0.06 | 72 | 0.01 | 0.01 | 40 | 0.12 | 0.20 | 20 | 561.95 | 569.48 | 20 | 2.37 | 3.87 | 305 | +| `Suspense` | substring-identifier | 0.40 | 0.43 | 314 | 0.01 | 0.02 | 259 | 0.54 | 0.68 | 20 | 473.47 | 492.31 | 20 | 2.59 | 3.13 | 52 | +| `flushPassiveEffects` | rare-camelcase | 0.20 | 0.26 | 8 | 0.01 | 0.01 | 7 | 0.15 | 0.29 | 11 | 613.06 | 633.25 | 20 | 1.76 | 2.99 | 4 | +| `enableTransitionTracing` | rare-flag | 0.78 | 0.86 | 25 | 0.01 | 0.01 | 25 | 0.19 | 0.33 | 20 | 605.41 | 627.06 | 20 | 1.48 | 2.04 | 162 | +| `scheduleCallback` | camelcase-identifier | 0.43 | 0.47 | 22 | 0.01 | 0.01 | 22 | 0.16 | 0.25 | 20 | 558.79 | 586.04 | 20 | 1.39 | 1.59 | 38 | +| `concurrent` | lowercase-word | 0.40 | 0.44 | 127 | 0.01 | 0.01 | 75 | 0.24 | 0.32 | 20 | 553.55 | 575.15 | 20 | 1.63 | 2.25 | 222 | +| `function` | lang-keyword | 1.77 | 1.89 | 5286 | 0.07 | 0.09 | 5245 | 16.07 | 16.36 | 20 | 491.60 | 516.44 | 20 | 8.22 | 9.47 | 69 | +| `set` | short-trigram-exact | 0.04 | 0.05 | 2039 | 0.02 | 0.02 | 851 | 3.71 | 4.00 | 20 | 486.10 | 506.05 | 20 | 3.45 | 4.23 | 103 | +| `ReactDOMRoot` | rare-camelcase | 0.13 | 0.16 | 8 | 0.01 | 0.01 | 6 | 0.11 | 0.21 | 8 | 626.39 | 648.94 | 12 | 1.45 | 1.97 | 26 | +| `xyzzy_react_does_not_exist` | negative | 0.20 | 0.22 | 0 | 0.03 | 0.04 | 0 | 0.07 | 0.11 | 0 | 630.53 | 657.50 | 0 | 7.35 | 8.38 | 0 | + diff --git a/benchmarks/search-shootout/results/2026-05-21/react-run2.md b/benchmarks/search-shootout/results/2026-05-21/react-run2.md new file mode 100644 index 0000000..a23043c --- /dev/null +++ b/benchmarks/search-shootout/results/2026-05-21/react-run2.md @@ -0,0 +1,41 @@ +# search-shootout — react + +**Date:** 2026-05-21 10:24 +**Corpus:** `/Users/blackfloofie/codedb-bench/react` +**Indexed files:** 6,620 +**Corpus bytes:** 26.5 MB +**Iterations:** 20 warm + +## Build phase + +| Backend | Cold index time | On-disk size | +|---|---|---| +| codedb | 1.13s | 69.5 MB | +| lean-ctx | 8.31s | — | +| codegraph | 15.05s | 195.5 MB | + +## Query latency (warm, ms) + +> codedb: MCP stdio (one server, many calls). +> fts5_*: persistent SQLite connection. +> lean-ctx: per-call CLI spawn (includes ~700ms binary startup). +> Hit counts are NOT directly comparable across backends — use them as recall sanity (zero vs non-zero). + +| query | kind | codedb p50 | codedb p99 | codedb hits | leanctx p50 | leanctx p99 | leanctx hits | codegraph p50 | codegraph p99 | codegraph hits | +|---|---|---|---|---|---|---|---|---|---|---| +| `useState` | common-identifier | 2.87 | 4.91 | 20 | 498.69 | 523.87 | 20 | 2.75 | 4.31 | 65 | +| `useEffect` | common-identifier | 1.04 | 1.25 | 20 | 483.85 | 502.82 | 20 | 5.66 | 5.89 | 101 | +| `forwardRef` | camelcase-identifier | 0.24 | 0.34 | 20 | 483.99 | 497.38 | 20 | 2.26 | 2.76 | 55 | +| `createElement` | camelcase-identifier | 0.93 | 1.05 | 20 | 510.25 | 532.48 | 20 | 2.86 | 3.49 | 189 | +| `Fiber` | substring-identifier | 0.39 | 0.81 | 20 | 550.13 | 571.72 | 20 | 4.50 | 5.14 | 151 | +| `Lane` | substring-identifier | 0.11 | 0.24 | 20 | 563.37 | 594.56 | 20 | 2.13 | 2.67 | 305 | +| `Suspense` | substring-identifier | 0.51 | 0.61 | 20 | 487.01 | 501.94 | 20 | 2.69 | 2.99 | 52 | +| `flushPassiveEffects` | rare-camelcase | 0.14 | 0.27 | 11 | 597.10 | 622.17 | 20 | 1.32 | 1.52 | 4 | +| `enableTransitionTracing` | rare-flag | 0.18 | 0.28 | 20 | 593.63 | 604.01 | 20 | 1.52 | 1.94 | 162 | +| `scheduleCallback` | camelcase-identifier | 0.17 | 0.26 | 20 | 545.70 | 565.42 | 20 | 1.40 | 1.68 | 38 | +| `concurrent` | lowercase-word | 0.23 | 0.34 | 20 | 557.32 | 565.11 | 20 | 1.97 | 2.47 | 222 | +| `function` | lang-keyword | 17.88 | 29.38 | 20 | 472.26 | 488.21 | 20 | 8.84 | 10.03 | 69 | +| `set` | short-trigram-exact | 3.95 | 4.13 | 20 | 468.22 | 505.09 | 20 | 5.84 | 8.15 | 103 | +| `ReactDOMRoot` | rare-camelcase | 0.25 | 0.36 | 8 | 632.26 | 680.78 | 12 | 1.84 | 2.36 | 26 | +| `xyzzy_react_does_not_exist` | negative | 0.04 | 0.08 | 0 | 635.35 | 670.92 | 0 | 8.36 | 9.74 | 0 | + diff --git a/benchmarks/search-shootout/results/2026-05-21/regex-run1.md b/benchmarks/search-shootout/results/2026-05-21/regex-run1.md new file mode 100644 index 0000000..48824c5 --- /dev/null +++ b/benchmarks/search-shootout/results/2026-05-21/regex-run1.md @@ -0,0 +1,43 @@ +# search-shootout — regex + +**Date:** 2026-05-21 10:27 +**Corpus:** `/Users/blackfloofie/codedb-bench/regex` +**Indexed files:** 285 +**Corpus bytes:** 5.9 MB +**Iterations:** 20 warm + +## Build phase + +| Backend | Cold index time | On-disk size | +|---|---|---| +| fts5_trigram | 0.54s | 20.3 MB | +| fts5_unicode61 | 0.12s | 7.8 MB | +| codedb | 0.15s | 13.5 MB | +| lean-ctx | 8.17s | — | +| codegraph | 2.56s | 17.6 MB | + +## Query latency (warm, ms) + +> codedb: MCP stdio (one server, many calls). +> fts5_*: persistent SQLite connection. +> lean-ctx: per-call CLI spawn (includes ~700ms binary startup). +> Hit counts are NOT directly comparable across backends — use them as recall sanity (zero vs non-zero). + +| query | kind | fts5_tri p50 | fts5_tri p99 | fts5_tri hits | fts5_uni p50 | fts5_uni p99 | fts5_uni hits | codedb p50 | codedb p99 | codedb hits | leanctx p50 | leanctx p99 | leanctx hits | codegraph p50 | codegraph p99 | codegraph hits | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| `useState` | common-identifier | 0.17 | 0.23 | 0 | 0.00 | 0.01 | 0 | 1.87 | 16.57 | 0 | 449.84 | 499.80 | 1 | 1.78 | 2.81 | 0 | +| `useEffect` | common-identifier | 0.04 | 0.05 | 0 | 0.00 | 0.01 | 0 | 0.05 | 0.09 | 0 | 447.89 | 453.75 | 1 | 1.62 | 1.95 | 0 | +| `forwardRef` | camelcase-identifier | 0.05 | 0.06 | 0 | 0.00 | 0.01 | 0 | 0.04 | 0.06 | 0 | 448.38 | 459.98 | 1 | 1.64 | 1.93 | 0 | +| `createElement` | camelcase-identifier | 0.08 | 0.09 | 0 | 0.00 | 0.01 | 0 | 0.04 | 0.07 | 0 | 447.22 | 463.45 | 1 | 1.74 | 2.08 | 0 | +| `Fiber` | substring-identifier | 0.01 | 0.01 | 0 | 0.00 | 0.01 | 0 | 0.04 | 0.07 | 0 | 449.86 | 493.24 | 1 | 1.96 | 2.45 | 11 | +| `Lane` | substring-identifier | 0.01 | 0.03 | 3 | 0.00 | 0.01 | 0 | 0.07 | 0.14 | 4 | 445.58 | 457.24 | 1 | 1.89 | 2.25 | 2 | +| `Suspense` | substring-identifier | 0.04 | 0.05 | 0 | 0.00 | 0.01 | 0 | 2.82 | 3.08 | 0 | 444.82 | 468.38 | 1 | 1.68 | 1.99 | 0 | +| `flushPassiveEffects` | rare-camelcase | 0.06 | 0.07 | 0 | 0.00 | 0.01 | 0 | 0.04 | 0.10 | 0 | 444.97 | 471.07 | 1 | 1.59 | 1.80 | 0 | +| `enableTransitionTracing` | rare-flag | 0.16 | 0.19 | 0 | 0.00 | 0.01 | 0 | 0.04 | 0.10 | 0 | 441.13 | 456.66 | 1 | 1.76 | 2.00 | 0 | +| `scheduleCallback` | camelcase-identifier | 0.09 | 0.10 | 0 | 0.00 | 0.01 | 0 | 0.05 | 0.07 | 0 | 443.44 | 449.59 | 1 | 1.85 | 2.28 | 0 | +| `concurrent` | lowercase-word | 0.05 | 0.06 | 2 | 0.00 | 0.01 | 1 | 0.05 | 0.10 | 4 | 448.29 | 472.74 | 3 | 0.33 | 0.46 | 1 | +| `function` | lang-keyword | 0.07 | 0.10 | 65 | 0.00 | 0.01 | 51 | 0.12 | 0.19 | 20 | 429.55 | 435.06 | 20 | 2.09 | 2.59 | 185 | +| `set` | short-trigram-exact | 0.01 | 0.02 | 154 | 0.01 | 0.01 | 127 | 0.13 | 0.23 | 20 | 430.41 | 438.46 | 20 | 2.34 | 3.06 | 115 | +| `ReactDOMRoot` | rare-camelcase | 0.05 | 0.07 | 0 | 0.00 | 0.01 | 0 | 0.06 | 0.13 | 0 | 465.57 | 526.76 | 1 | 1.71 | 1.90 | 0 | +| `xyzzy_react_does_not_exist` | negative | 0.13 | 0.15 | 0 | 0.00 | 0.01 | 0 | 0.04 | 0.08 | 0 | 470.83 | 479.19 | 1 | 1.49 | 1.82 | 0 | + diff --git a/benchmarks/search-shootout/results/2026-05-21/run.log b/benchmarks/search-shootout/results/2026-05-21/run.log new file mode 100644 index 0000000..05dc039 --- /dev/null +++ b/benchmarks/search-shootout/results/2026-05-21/run.log @@ -0,0 +1,138 @@ +=== run 1/4 react === +corpus: /Users/blackfloofie/codedb-bench/react + indexable files: 6,620, bytes: 26,477,609 + iterations per query: 20 + +[build] fts5_trigram ... + 2.06s, 6620 docs, 93.5 MB +[build] fts5_unicode61 ... + 0.44s, 6620 docs, 36.3 MB +[build] codedb (cleaning matching snapshot first) ... + 1.18s, ~69.5 MB (/Users/blackfloofie/.codedb/projects/dab61953ae243e54) +[build] lean-ctx ... + 8.25s, ~0.0 MB +[build] codegraph ... + 15.12s, ~195.5 MB (/Users/blackfloofie/codedb-bench/react/.codegraph) + +[query] + query | fts5_tri p50/p99 (hits) | fts5_uni p50/p99 (hits) | codedb p50/p99 (hits) | leanctx p50/p99 (hits) | codegraph p50/p99 (hits) + useState | 0.88/ 1.03ms ( 674) | 0.02/ 0.02ms ( 674) | 1.85/ 2.02ms ( 20) | 470.90/ 491.82ms ( 20) | 2.42/ 3.34ms ( 65) + useEffect | 0.40/ 0.44ms ( 434) | 0.01/ 0.02ms ( 426) | 1.02/ 1.19ms ( 20) | 464.99/ 485.16ms ( 20) | 5.89/ 6.72ms ( 101) + forwardRef | 0.18/ 0.21ms ( 129) | 0.01/ 0.01ms ( 127) | 0.25/ 0.32ms ( 20) | 468.88/ 486.10ms ( 20) | 2.16/ 3.11ms ( 55) + createElement | 1.02/ 1.08ms ( 403) | 0.01/ 0.02ms ( 401) | 0.92/ 1.01ms ( 20) | 502.85/ 516.09ms ( 20) | 1.61/ 2.30ms ( 189) + Fiber | 0.13/ 0.17ms ( 303) | 0.01/ 0.01ms ( 180) | 0.35/ 0.44ms ( 20) | 528.84/ 570.11ms ( 20) | 3.22/ 3.81ms ( 151) + Lane | 0.06/ 0.06ms ( 72) | 0.01/ 0.01ms ( 40) | 0.12/ 0.20ms ( 20) | 561.95/ 569.48ms ( 20) | 2.37/ 3.87ms ( 305) + Suspense | 0.40/ 0.43ms ( 314) | 0.01/ 0.02ms ( 259) | 0.54/ 0.68ms ( 20) | 473.47/ 492.31ms ( 20) | 2.59/ 3.13ms ( 52) + flushPassiveEffects | 0.20/ 0.26ms ( 8) | 0.01/ 0.01ms ( 7) | 0.15/ 0.29ms ( 11) | 613.06/ 633.25ms ( 20) | 1.76/ 2.99ms ( 4) + enableTransitionTracing | 0.78/ 0.86ms ( 25) | 0.01/ 0.01ms ( 25) | 0.19/ 0.33ms ( 20) | 605.41/ 627.06ms ( 20) | 1.48/ 2.04ms ( 162) + scheduleCallback | 0.43/ 0.47ms ( 22) | 0.01/ 0.01ms ( 22) | 0.16/ 0.25ms ( 20) | 558.79/ 586.04ms ( 20) | 1.39/ 1.59ms ( 38) + concurrent | 0.40/ 0.44ms ( 127) | 0.01/ 0.01ms ( 75) | 0.24/ 0.32ms ( 20) | 553.55/ 575.15ms ( 20) | 1.63/ 2.25ms ( 222) + function | 1.77/ 1.89ms ( 5286) | 0.07/ 0.09ms ( 5245) | 16.07/ 16.36ms ( 20) | 491.60/ 516.44ms ( 20) | 8.22/ 9.47ms ( 69) + set | 0.04/ 0.05ms ( 2039) | 0.02/ 0.02ms ( 851) | 3.71/ 4.00ms ( 20) | 486.10/ 506.05ms ( 20) | 3.45/ 4.23ms ( 103) + ReactDOMRoot | 0.13/ 0.16ms ( 8) | 0.01/ 0.01ms ( 6) | 0.11/ 0.21ms ( 8) | 626.39/ 648.94ms ( 12) | 1.45/ 1.97ms ( 26) + xyzzy_react_does_not_exi | 0.20/ 0.22ms ( 0) | 0.03/ 0.04ms ( 0) | 0.07/ 0.11ms ( 0) | 630.53/ 657.50ms ( 0) | 7.35/ 8.38ms ( 0) + +report: results/2026-05-21/react-run1.md +=== run 2/4 react === +corpus: /Users/blackfloofie/codedb-bench/react + indexable files: 6,620, bytes: 26,477,609 + iterations per query: 20 + +[build] codedb (cleaning matching snapshot first) ... + 1.13s, ~69.5 MB (/Users/blackfloofie/.codedb/projects/dab61953ae243e54) +[build] lean-ctx ... + 8.31s, ~0.0 MB +[build] codegraph ... + 15.05s, ~195.5 MB (/Users/blackfloofie/codedb-bench/react/.codegraph) + +[query] + query | codedb p50/p99 (hits) | leanctx p50/p99 (hits) | codegraph p50/p99 (hits) + useState | 2.87/ 4.91ms ( 20) | 498.69/ 523.87ms ( 20) | 2.75/ 4.31ms ( 65) + useEffect | 1.04/ 1.25ms ( 20) | 483.85/ 502.82ms ( 20) | 5.66/ 5.89ms ( 101) + forwardRef | 0.24/ 0.34ms ( 20) | 483.99/ 497.38ms ( 20) | 2.26/ 2.76ms ( 55) + createElement | 0.93/ 1.05ms ( 20) | 510.25/ 532.48ms ( 20) | 2.86/ 3.49ms ( 189) + Fiber | 0.39/ 0.81ms ( 20) | 550.13/ 571.72ms ( 20) | 4.50/ 5.14ms ( 151) + Lane | 0.11/ 0.24ms ( 20) | 563.37/ 594.56ms ( 20) | 2.13/ 2.67ms ( 305) + Suspense | 0.51/ 0.61ms ( 20) | 487.01/ 501.94ms ( 20) | 2.69/ 2.99ms ( 52) + flushPassiveEffects | 0.14/ 0.27ms ( 11) | 597.10/ 622.17ms ( 20) | 1.32/ 1.52ms ( 4) + enableTransitionTracing | 0.18/ 0.28ms ( 20) | 593.63/ 604.01ms ( 20) | 1.52/ 1.94ms ( 162) + scheduleCallback | 0.17/ 0.26ms ( 20) | 545.70/ 565.42ms ( 20) | 1.40/ 1.68ms ( 38) + concurrent | 0.23/ 0.34ms ( 20) | 557.32/ 565.11ms ( 20) | 1.97/ 2.47ms ( 222) + function | 17.88/ 29.38ms ( 20) | 472.26/ 488.21ms ( 20) | 8.84/ 10.03ms ( 69) + set | 3.95/ 4.13ms ( 20) | 468.22/ 505.09ms ( 20) | 5.84/ 8.15ms ( 103) + ReactDOMRoot | 0.25/ 0.36ms ( 8) | 632.26/ 680.78ms ( 12) | 1.84/ 2.36ms ( 26) + xyzzy_react_does_not_exi | 0.04/ 0.08ms ( 0) | 635.35/ 670.92ms ( 0) | 8.36/ 9.74ms ( 0) + +report: results/2026-05-21/react-run2.md +=== run 3/4 regex === +corpus: /Users/blackfloofie/codedb-bench/regex + indexable files: 285, bytes: 5,902,759 + iterations per query: 20 + +[build] fts5_trigram ... + 0.54s, 285 docs, 20.3 MB +[build] fts5_unicode61 ... + 0.12s, 285 docs, 7.8 MB +[build] codedb (cleaning matching snapshot first) ... + 0.15s, ~13.5 MB (/Users/blackfloofie/.codedb/projects/8085066150815f80) +[build] lean-ctx ... + 8.17s, ~0.0 MB +[build] codegraph ... + 2.56s, ~17.6 MB (/Users/blackfloofie/codedb-bench/regex/.codegraph) + +[query] + query | fts5_tri p50/p99 (hits) | fts5_uni p50/p99 (hits) | codedb p50/p99 (hits) | leanctx p50/p99 (hits) | codegraph p50/p99 (hits) + useState | 0.17/ 0.23ms ( 0) | 0.00/ 0.01ms ( 0) | 1.87/ 16.57ms ( 0) | 449.84/ 499.80ms ( 1) | 1.78/ 2.81ms ( 0) + useEffect | 0.04/ 0.05ms ( 0) | 0.00/ 0.01ms ( 0) | 0.05/ 0.09ms ( 0) | 447.89/ 453.75ms ( 1) | 1.62/ 1.95ms ( 0) + forwardRef | 0.05/ 0.06ms ( 0) | 0.00/ 0.01ms ( 0) | 0.04/ 0.06ms ( 0) | 448.38/ 459.98ms ( 1) | 1.64/ 1.93ms ( 0) + createElement | 0.08/ 0.09ms ( 0) | 0.00/ 0.01ms ( 0) | 0.04/ 0.07ms ( 0) | 447.22/ 463.45ms ( 1) | 1.74/ 2.08ms ( 0) + Fiber | 0.01/ 0.01ms ( 0) | 0.00/ 0.01ms ( 0) | 0.04/ 0.07ms ( 0) | 449.86/ 493.24ms ( 1) | 1.96/ 2.45ms ( 11) + Lane | 0.01/ 0.03ms ( 3) | 0.00/ 0.01ms ( 0) | 0.07/ 0.14ms ( 4) | 445.58/ 457.24ms ( 1) | 1.89/ 2.25ms ( 2) + Suspense | 0.04/ 0.05ms ( 0) | 0.00/ 0.01ms ( 0) | 2.82/ 3.08ms ( 0) | 444.82/ 468.38ms ( 1) | 1.68/ 1.99ms ( 0) + flushPassiveEffects | 0.06/ 0.07ms ( 0) | 0.00/ 0.01ms ( 0) | 0.04/ 0.10ms ( 0) | 444.97/ 471.07ms ( 1) | 1.59/ 1.80ms ( 0) + enableTransitionTracing | 0.16/ 0.19ms ( 0) | 0.00/ 0.01ms ( 0) | 0.04/ 0.10ms ( 0) | 441.13/ 456.66ms ( 1) | 1.76/ 2.00ms ( 0) + scheduleCallback | 0.09/ 0.10ms ( 0) | 0.00/ 0.01ms ( 0) | 0.05/ 0.07ms ( 0) | 443.44/ 449.59ms ( 1) | 1.85/ 2.28ms ( 0) + concurrent | 0.05/ 0.06ms ( 2) | 0.00/ 0.01ms ( 1) | 0.05/ 0.10ms ( 4) | 448.29/ 472.74ms ( 3) | 0.33/ 0.46ms ( 1) + function | 0.07/ 0.10ms ( 65) | 0.00/ 0.01ms ( 51) | 0.12/ 0.19ms ( 20) | 429.55/ 435.06ms ( 20) | 2.09/ 2.59ms ( 185) + set | 0.01/ 0.02ms ( 154) | 0.01/ 0.01ms ( 127) | 0.13/ 0.23ms ( 20) | 430.41/ 438.46ms ( 20) | 2.34/ 3.06ms ( 115) + ReactDOMRoot | 0.05/ 0.07ms ( 0) | 0.00/ 0.01ms ( 0) | 0.06/ 0.13ms ( 0) | 465.57/ 526.76ms ( 1) | 1.71/ 1.90ms ( 0) + xyzzy_react_does_not_exi | 0.13/ 0.15ms ( 0) | 0.00/ 0.01ms ( 0) | 0.04/ 0.08ms ( 0) | 470.83/ 479.19ms ( 1) | 1.49/ 1.82ms ( 0) + +report: results/2026-05-21/regex-run1.md +=== run 4/4 flask === +corpus: /Users/blackfloofie/codedb-bench/flask + indexable files: 127, bytes: 626,841 + iterations per query: 20 + +[build] fts5_trigram ... + 0.04s, 127 docs, 2.3 MB +[build] fts5_unicode61 ... + 0.01s, 127 docs, 0.9 MB +[build] codedb (cleaning matching snapshot first) ... + 0.05s, ~3.6 MB (/Users/blackfloofie/.codedb/projects/c056807dc334e7c1) +[build] lean-ctx ... + 8.29s, ~0.0 MB +[build] codegraph ... + 0.58s, ~3.7 MB (/Users/blackfloofie/codedb-bench/flask/.codegraph) + +[query] + query | fts5_tri p50/p99 (hits) | fts5_uni p50/p99 (hits) | codedb p50/p99 (hits) | leanctx p50/p99 (hits) | codegraph p50/p99 (hits) + useState | 0.09/ 0.14ms ( 0) | 0.01/ 0.02ms ( 0) | 0.66/ 1.39ms ( 0) | 449.06/ 465.16ms ( 0) | 0.70/ 1.60ms ( 0) + useEffect | 0.01/ 0.01ms ( 0) | 0.00/ 0.01ms ( 0) | 0.05/ 0.08ms ( 0) | 451.48/ 472.32ms ( 0) | 0.62/ 0.80ms ( 0) + forwardRef | 0.01/ 0.02ms ( 0) | 0.00/ 0.00ms ( 0) | 0.04/ 0.07ms ( 0) | 453.87/ 469.16ms ( 0) | 0.58/ 0.76ms ( 0) + createElement | 0.02/ 0.03ms ( 0) | 0.00/ 0.01ms ( 0) | 0.05/ 0.08ms ( 0) | 453.36/ 466.15ms ( 0) | 0.68/ 0.83ms ( 0) + Fiber | 0.01/ 0.01ms ( 0) | 0.00/ 0.00ms ( 0) | 0.04/ 0.09ms ( 0) | 451.23/ 467.81ms ( 0) | 0.65/ 0.78ms ( 0) + Lane | 0.01/ 0.01ms ( 0) | 0.00/ 0.01ms ( 0) | 0.05/ 0.08ms ( 0) | 452.00/ 470.30ms ( 0) | 0.64/ 0.84ms ( 0) + Suspense | 0.00/ 0.01ms ( 0) | 0.00/ 0.01ms ( 0) | 0.06/ 0.08ms ( 0) | 448.85/ 456.00ms ( 0) | 0.66/ 0.77ms ( 0) + flushPassiveEffects | 0.00/ 0.01ms ( 0) | 0.00/ 0.00ms ( 0) | 0.04/ 0.10ms ( 0) | 450.76/ 475.44ms ( 0) | 0.64/ 0.75ms ( 0) + enableTransitionTracing | 0.05/ 0.05ms ( 0) | 0.00/ 0.01ms ( 0) | 0.05/ 0.08ms ( 0) | 451.71/ 463.39ms ( 0) | 0.76/ 1.03ms ( 0) + scheduleCallback | 0.04/ 0.13ms ( 0) | 0.00/ 0.01ms ( 0) | 0.05/ 0.10ms ( 0) | 448.54/ 467.48ms ( 0) | 0.64/ 0.79ms ( 0) + concurrent | 0.02/ 0.03ms ( 2) | 0.00/ 0.00ms ( 1) | 0.07/ 0.14ms ( 10) | 457.91/ 474.26ms ( 11) | 0.15/ 0.30ms ( 2) + function | 0.03/ 0.04ms ( 23) | 0.00/ 0.00ms ( 20) | 0.10/ 0.19ms ( 20) | 449.05/ 471.18ms ( 20) | 1.38/ 1.88ms ( 105) + set | 0.00/ 0.01ms ( 46) | 0.00/ 0.01ms ( 29) | 0.10/ 0.22ms ( 20) | 437.50/ 457.94ms ( 20) | 0.35/ 0.49ms ( 70) + ReactDOMRoot | 0.02/ 0.03ms ( 0) | 0.00/ 0.00ms ( 0) | 0.05/ 0.17ms ( 0) | 440.40/ 455.97ms ( 0) | 0.64/ 0.82ms ( 0) + xyzzy_react_does_not_exi | 0.01/ 0.01ms ( 0) | 0.00/ 0.00ms ( 0) | 0.06/ 0.09ms ( 0) | 444.86/ 453.83ms ( 0) | 0.57/ 0.69ms ( 0) + +report: results/2026-05-21/flask-run1.md +=== DONE === +Thu 21 May 2026 10:29:41 +08 From c9dd5b61435e7ca6a2207b8dc11d97fff9dcc35b Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Thu, 21 May 2026 12:04:58 +0800 Subject: [PATCH 2/6] feat(cli): add `codedb read` subcommand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the codedb_read MCP tool surface. Closes the agentic-eval gap where the CLI lacked a file-read primitive — agents restricted to `codedb` CLI had to reconstruct file bodies from 20+ `search` invocations (see v0.2.5815 release-notes agentic eval: codedb 22 calls / 114 s vs codegraph 4 / 29 s). Usage: codedb [root] read # full file with line numbers codedb [root] read -L FROM-TO # line range (1-indexed, inclusive) codedb [root] read -L FROM-end # to EOF codedb [root] read --compact # strip comment + blank lines - Preferred path: explorer.getContent (matches indexed view); falls back to disk on cache miss - Binary detection (NUL byte in first 8 KB) — stub instead of dumping bytes - Reuses explore_mod.extractLines (already covered by tests.zig) Co-Authored-By: Claude Opus 4.7 (1M context) --- src/main.zig | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/src/main.zig b/src/main.zig index 9a3d61c..94c82c3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -632,6 +632,110 @@ fn mainImpl() !void { }); } } + } else if (std.mem.eql(u8, cmd, "read")) { + // CLI counterpart of codedb_read MCP tool. Closes the agentic-eval + // gap where the CLI surface lacked a file-read primitive — agents + // restricted to `codedb` CLI had to reconstruct file bodies from + // 20+ `search` invocations. + var line_start: ?u32 = null; + var line_end: ?u32 = null; + var compact = false; + var arg_idx = cmd_args_start; + while (args.len > arg_idx) { + const a = args[arg_idx]; + if (std.mem.eql(u8, a, "--compact") or std.mem.eql(u8, a, "-c")) { + compact = true; + arg_idx += 1; + } else if (std.mem.eql(u8, a, "-L") or std.mem.eql(u8, a, "--lines")) { + if (arg_idx + 1 >= args.len) break; + const range = args[arg_idx + 1]; + const dash = std.mem.indexOfScalar(u8, range, '-') orelse break; + line_start = std.fmt.parseInt(u32, range[0..dash], 10) catch null; + const end_str = range[dash + 1 ..]; + if (std.mem.eql(u8, end_str, "$") or std.mem.eql(u8, end_str, "end")) { + line_end = std.math.maxInt(u32); + } else { + line_end = std.fmt.parseInt(u32, end_str, 10) catch null; + } + arg_idx += 2; + } else { + break; + } + } + const path = if (args.len > arg_idx) args[arg_idx] else { + out.p("{s}\xe2\x9c\x97{s} usage: codedb [root] read [-L FROM-TO] [--compact] {s}{s}\n", .{ + s.red, s.reset, s.cyan, s.reset, + }); + std.process.exit(1); + }; + const t0 = cio.nanoTimestamp(); + // Prefer indexed content (matches the indexed view), fall back to disk + const cached = explorer.getContent(path, allocator) catch null; + const content_owned = if (cached) |c| c else blk: { + break :blk std.Io.Dir.cwd().readFileAlloc(io, path, allocator, .limited(10 * 1024 * 1024)) catch { + out.p("{s}\xe2\x9c\x97{s} not indexed and disk read failed: {s}{s}{s}\n", .{ + s.red, s.reset, s.bold, path, s.reset, + }); + std.process.exit(1); + }; + }; + defer allocator.free(content_owned); + // Binary detection (NUL byte in first 8KB) — stub instead of dumping raw bytes + const probe_len = @min(content_owned.len, 8 * 1024); + if (std.mem.indexOfScalar(u8, content_owned[0..probe_len], 0) != null) { + out.p("{s}\xe2\x9c\x97{s} binary file: {d} bytes\n", .{ s.yellow, s.reset, content_owned.len }); + return; + } + const elapsed = cio.nanoTimestamp() - t0; + var dur_buf: [64]u8 = undefined; + const has_range = line_start != null or line_end != null; + const lang = explore_mod.detectLanguage(path); + if (has_range or compact) { + const start: u32 = line_start orelse 1; + const end: u32 = line_end orelse std.math.maxInt(u32); + const extracted = explore_mod.extractLines(content_owned, start, end, true, compact, lang, allocator) catch { + out.p("{s}\xe2\x9c\x97{s} line extraction failed\n", .{ s.red, s.reset }); + std.process.exit(1); + }; + defer allocator.free(extracted); + const unbounded = end == std.math.maxInt(u32); + if (unbounded) { + out.p("{s}\xe2\x9c\x93{s} {s}{s}{s} {s}{s}{s} L{d}-EOF {s}{s}{s}\n", .{ + s.green, s.reset, + s.bold, path, + s.reset, s.langColor(@tagName(lang)), + @tagName(lang), s.reset, + start, sty.durationColor(s, elapsed), + sty.formatDuration(&dur_buf, elapsed), s.reset, + }); + } else { + out.p("{s}\xe2\x9c\x93{s} {s}{s}{s} {s}{s}{s} L{d}-{d} {s}{s}{s}\n", .{ + s.green, s.reset, + s.bold, path, + s.reset, s.langColor(@tagName(lang)), + @tagName(lang), s.reset, + start, end, + sty.durationColor(s, elapsed), sty.formatDuration(&dur_buf, elapsed), + s.reset, + }); + } + out.p("{s}", .{extracted}); + } else { + out.p("{s}\xe2\x9c\x93{s} {s}{s}{s} {s}{s}{s} {s}{s}{s}\n", .{ + s.green, s.reset, + s.bold, path, + s.reset, s.langColor(@tagName(lang)), + @tagName(lang), s.reset, + sty.durationColor(s, elapsed), sty.formatDuration(&dur_buf, elapsed), + s.reset, + }); + var line_num: u32 = 0; + var lines = std.mem.splitScalar(u8, content_owned, '\n'); + while (lines.next()) |line| { + line_num += 1; + out.p("{d:>5} | {s}\n", .{ line_num, line }); + } + } } else if (std.mem.eql(u8, cmd, "hot")) { const t0 = cio.nanoTimestamp(); const hot = try explorer.getHotFiles(&store, allocator, 10); @@ -930,7 +1034,7 @@ fn mainImpl() !void { } } fn isCommand(arg: []const u8) bool { - const commands = [_][]const u8{ "tree", "outline", "find", "search", "word", "hot", "snapshot", "serve", "mcp", "update", "nuke" }; + const commands = [_][]const u8{ "tree", "outline", "find", "search", "word", "read", "hot", "snapshot", "serve", "mcp", "update", "nuke" }; for (commands) |c| { if (std.mem.eql(u8, arg, c)) return true; } @@ -1195,6 +1299,7 @@ fn printUsage(out: *Out, s: sty.Style) void { \\ {s}find{s} {s}{s} find where a symbol is defined \\ {s}search{s} {s}{s} full-text search (trigram, case-insensitive) \\ {s}word{s} {s}{s} exact word lookup via inverted index + \\ {s}read{s} {s}{s} file contents (optionally -L FROM-TO, --compact) \\ , .{ s.bold, s.reset, @@ -1209,6 +1314,8 @@ fn printUsage(out: *Out, s: sty.Style) void { s.dim, s.reset, s.cyan, s.reset, s.dim, s.reset, + s.cyan, s.reset, + s.dim, s.reset, }); out.p( \\ {s}hot{s} recently modified files From fc72a924c7342d249fd0d10b973a72be380da8a6 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Thu, 21 May 2026 12:17:06 +0800 Subject: [PATCH 3/6] fix(search): skip Tier 5 full-scan when trigram returned candidates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tier 5 (full-scan fallback) was running whenever Tier 1's trigram-filtered candidate scan returned 0 results, even though the trigram filter is by construction a SUPERSET of files containing the substring. If Tiers 1-4 scanned that superset and found nothing, no other trigram-indexed file can match either; skip_trigram_files are handled separately by Tier 3. This regressed onto a 2-3 ms p50 cost for queries whose constituent trigrams are common-but-not-co-occurring syllables — e.g. `Suspense` on a Rust corpus (regex): before: Suspense p50 2.95 ms hits=0 after: Suspense p50 0.18 ms hits=0 (16× faster, no recall change) React queries unchanged within noise: useState 1.85 → 2.65 ms (within p50 jitter; hits=20 unchanged) forwardRef 0.25 → 0.23 ms Fiber 0.35 → 0.32 ms function 16.07 → 15.71 ms (Tier 1 path, not Tier 5) The pre-existing `cp.len == 0` sub-case (e.g. `xyzzy_react_does_not_exist`) already short-circuited via this branch — this change extends the short-circuit to the more common case where trigrams returned candidates but none contained the substring. Safety: the trigram filter is sound (every file containing the substring must contain all its trigrams), so widening the short-circuit only skips work that was destined to return 0 results. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/explore.zig | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/explore.zig b/src/explore.zig index 77453a1..81f4648 100644 --- a/src/explore.zig +++ b/src/explore.zig @@ -1724,15 +1724,19 @@ pub const Explorer = struct { // Tier 5: full scan fallback — only when NO results from any tier. // Avoids 100ms+ scans on large repos when indices already found matches. // - // Additional short-circuit: if the trigram index returned a non-null - // but EMPTY candidate set with query.len >= 3, every trigram-indexed - // file is provably free of the query. The only files that could still - // contain a match are skip_trigram_files, which Tier 3 already - // scanned. Tier 5 would just re-scan everything to find nothing — a - // measurable 100ms+ p50 cost on real corpora (see - // benchmarks/search-shootout, xyzzy_react_does_not_exist on react). - const trigram_ruled_out = if (candidate_paths) |cp| - (cp.len == 0 and query.len >= 3) + // Short-circuit Tier 5 whenever the trigram index was consulted with + // a query long enough to fully cover it (query.len >= 3). The trigram + // filter returns a SUPERSET of files containing the substring (every + // file containing the substring necessarily contains all its + // trigrams). If Tier 1 scanned that superset and found 0 results, no + // other trigram-indexed file can match either; skip_trigram_files + // were handled separately by Tier 3. Tier 5 would otherwise re-scan + // every indexed file for nothing — a measurable 2–3 ms p50 cost on + // queries whose constituent trigrams are common-but-not-co-occurring + // syllables (e.g. `Suspense` on a Rust corpus). The cp.len == 0 + // sub-case of this was already short-circuited before this change. + const trigram_ruled_out = if (candidate_paths) |_| + (query.len >= 3) else false; if (result_list.items.len == 0 and !trigram_ruled_out) { From 2cdd343d973225f2529d41cd3e8c91664ec653e0 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Thu, 21 May 2026 12:20:55 +0800 Subject: [PATCH 4/6] =?UTF-8?q?docs(design):=20ACE=20integration=20spec=20?= =?UTF-8?q?=E2=80=94=20codedb=20as=20a=20tool=20ACE=20wraps?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Design draft sketching how codedb_context's ranking could benefit from a per-project Skillbook (boost/penalty path globs + keyword synonyms) learned by an external loop, without absorbing ACE's reflection machinery into codedb itself. Headline shape: - codedb owns deterministic, sub-ms read/write of a per-project skillbook.json - ACE (or any other learner) owns trace reflection + skill synthesis - Interface: `codedb_skillbook_update` MCP tool Three skill kinds for v0: path_boost, path_penalty, keyword_synonym. The doc commits to nothing yet — it preserves the option and gives future implementers/rejectors a concrete shape to work against rather than re-arguing "what if learning." Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/design/ace-integration.md | 167 +++++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 docs/design/ace-integration.md diff --git a/docs/design/ace-integration.md b/docs/design/ace-integration.md new file mode 100644 index 0000000..f603b42 --- /dev/null +++ b/docs/design/ace-integration.md @@ -0,0 +1,167 @@ +# ACE × codedb — integration spec + +**Status:** Design draft. Not implemented. +**Author:** spec drafted 2026-05-21 in response to roadmap question "should codedb compete with kayba-ai/agentic-context-engine?" +**Decision:** No — they're different categories. This doc sketches how codedb could be a **tool that ACE wraps**, not a competitor. + +## Background + +- **codedb** indexes source code (symbols, files, deps). Returns file:line snippets in milliseconds. Used by the agent's *search* step. +- **ACE** ([kayba-ai/agentic-context-engine](https://github.com/kayba-ai/agentic-context-engine)) maintains a per-project "Skillbook" of strategies learned from prior runs. Used by the agent's *thinking* step. + +They're stack-complementary. ACE in the brain, codedb in the eyes. + +The interesting question isn't "which wins" but: **could codedb_context's ranking benefit from ACE-style learning?** Currently it uses hand-coded heuristics: + +- `+5` if file contains a symbol definition for any extracted keyword +- `−3` for test / spec / fixture paths +- `−2` for doc paths +- tiebreak by raw hit count + +These are sensible but invariant — they can't learn that *for the flask codebase, prefer `src/flask/sansio/` over `tests/`* or that *for this internal monorepo, prefer `packages/core/`*. + +## Proposal — a small skillbook layer on top of `codedb_context` + +The smallest viable surface that earns its complexity: + +### 1. Persistent storage + +A per-project `~/.codedb/projects//skillbook.json`: + +```jsonc +{ + "version": 1, + "project_root": "/Users/user/flask", + "updated_at": "2026-05-21T10:00:00Z", + "skills": [ + { + "id": "sk_001", + "kind": "path_boost", + "pattern": "src/flask/sansio/**", + "weight": 4.0, + "reason": "user accepted snippets from these paths 8/10 times for routing/middleware tasks", + "evidence": ["task_12", "task_18", "task_31"], + "decay_at": "2026-08-21T10:00:00Z" + }, + { + "id": "sk_002", + "kind": "path_penalty", + "pattern": "tests/test_basic.py", + "weight": -2.0, + "reason": "low signal; never selected for production paths" + }, + { + "id": "sk_003", + "kind": "keyword_synonym", + "from": "auth middleware", + "to": ["before_request", "decorator", "g.user"], + "reason": "in this codebase 'auth middleware' is implemented via before_request hooks" + } + ] +} +``` + +Three skill kinds initially: +- **`path_boost`** / **`path_penalty`**: glob-matched additive weight on top of the static heuristics +- **`keyword_synonym`**: expand the keyword set the composer extracts from the task + +### 2. Read path — `codedb_context` consults the skillbook + +`handleContext` already extracts keywords + ranks files. After the static-heuristic score is computed for each candidate file, layer skillbook adjustments: + +```zig +const skillbook = explorer.loadSkillbook(project_root) catch null; +if (skillbook) |sb| { + for (sb.path_boosts) |pb| { + if (globMatch(pb.pattern, file.path)) file.score += pb.weight; + } + for (sb.path_penalties) |pp| { + if (globMatch(pp.pattern, file.path)) file.score += pp.weight; + } +} +// Keyword expansion happens earlier, before symbol-definition lookup. +``` + +The change is bounded: ~50 LOC in `handleContext`, plus a `skillbook.zig` module (~200 LOC for parse / glob-match / decay). + +### 3. Write path — out of scope for core codedb + +This is where ACE belongs. codedb deliberately **does not** include: +- Trace collection +- Reflection / strategy synthesis +- An LLM client + +Instead, codedb exposes a write endpoint: + +``` +codedb_skillbook_update(skills: [...], project?: ) +``` + +ACE (or any other learner) calls this with synthesized skills. The skillbook becomes the *boundary* between learning and serving — codedb stays focused on milliseconds-per-query, ACE handles the slow, expensive reflection loop. + +### 4. Trace collection — already partially there + +codedb already logs `codedb_search` / `codedb_find` / `codedb_word` queries to a WAL (see `mcp.handleCall` L991-996, `logQuery`). The natural extension: also log which file:line snippets were SELECTED by the agent (i.e., subsequently passed to `codedb_read` / `codedb_edit`). + +That selection signal is what ACE needs to reflect on. The trace surface stays in codedb; the reflection stays in ACE. + +## Why this earns its complexity + +| | without skillbook | with skillbook | +|---|---|---| +| Cold codebase | hand-coded heuristics — works | hand-coded heuristics — same | +| After 50 tasks | hand-coded heuristics — same | learned path/keyword skills compound | +| Wrong default for a project | persists forever | demoted via repeated negative selection signal | +| Per-team conventions | invisible | encodable as a skill | + +The cost: ~250 LOC + a JSON file. The risk: skillbook accumulates noise. Mitigations: +- **Decay**: every skill has `decay_at`; expired skills are pruned on read +- **Cap**: max 50 skills per project; lowest-weight evicted first +- **Audit**: `codedb_skillbook_list` + `codedb_skillbook_reset` MCP tools so a human can inspect/wipe + +## Why this is NOT codedb's job + +codedb's value is determinism + milliseconds. The reflection loop is: +- Slow (LLM round-trips) +- Stochastic (depends on the LLM) +- Opinionated (what counts as "success"?) + +ACE owns all three. codedb owns the deterministic, sub-ms read/write of the skillbook. Clean separation. + +## What this is not + +- **Not RAG with embeddings.** No vector store, no semantic similarity. Keyword + glob + score deltas only. +- **Not user-facing.** The agent reads the skillbook implicitly via `codedb_context`. No CLI for end-users. +- **Not a feedback loop in codedb.** No reflection, no LLM calls. Pure read of an externally-maintained file. + +## Acceptance criteria for a v0 + +1. `codedb_context` is at most 10% slower with skillbook present (no perf regression on top of the [v0.2.5815 fixes](../../README.md)) +2. With a 20-skill skillbook, react `getNextLanes` task quality (rubric/5) stays ≥ 4.5 (i.e., skillbook doesn't break the baseline) +3. With a synthetic skillbook crafted from 50 mock tasks, quality on those tasks improves by ≥ 0.5 rubric points +4. Skillbook reset (`codedb_skillbook_reset`) returns the system to pre-learning baseline + +(3) is the proof-it-pays test. (1), (2), (4) are guardrails. + +## Open questions + +- **Skill conflict resolution.** If `sk_001` boosts `src/**` and `sk_002` penalizes `src/legacy/**`, which wins for `src/legacy/foo.py`? Proposal: sum the weights. +- **Per-task-shape skills.** Should skills be tagged with task fingerprints ("for refactor tasks, prefer …") or always project-global? v0 stays project-global. +- **Multi-language projects.** Does a `path_boost` apply across the whole repo or just per-language? v0 applies globally; let the user encode language-specificity in the glob. + +## Sequencing if this gets prioritized + +1. **Spike: skillbook reader + glob-matcher** (~1 day, no MCP changes; just `loadSkillbook` + unit tests) +2. **Wire into `handleContext` behind a feature flag** (`CODEDB_ACE=1`) +3. **Add `codedb_skillbook_update` / `_list` / `_reset` MCP tools** (~1 day) +4. **Eval harness**: synthetic skillbook of 20 hand-crafted skills, measure context quality on 16-task shootout corpus +5. **Demo integration** with [kayba-ai/agentic-context-engine](https://github.com/kayba-ai/agentic-context-engine) — write a small ACE adapter that calls `codedb_skillbook_update` + +Estimated total: 4-6 engineering days for a v0 that earns real eval data. + +## What this doc commits to + +Nothing yet. This is a design draft to keep the option open. Filing it so: +- Future "should we add learning?" questions can point here +- If the option is taken, the implementer has a starting shape +- If the option is rejected, the reason can be recorded against this concrete proposal rather than a vague "what if" From e05a3b42103a314064576d664ee228a65b6b1b79 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Thu, 21 May 2026 12:23:46 +0800 Subject: [PATCH 5/6] bench(shootout): add codegraph backend (codegraph_search via MCP stdio) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the codegraph 0.7.10 backend into the single-session + multi-session launcher alongside codedb / fts5_tri / fts5_uni / lean-ctx. Uses `codegraph serve --mcp` as a long-lived stdio child and invokes `codegraph_search` as the default symbol-lookup tool — apples-to-apples with codedb_search. New CLI flags: --codegraph-bin default: $(which codegraph) --skip-codegraph skip the backend entirely --clean-codegraph wipe matching .codegraph/ before indexing Cold-index helper `codegraph_cold_index` invokes `codegraph init` then `codegraph index` and measures wall-clock + .codegraph/ on-disk size. Smoke-tested codegraph-only on flask: cold build: 0.57 s, ~3.7 MB warm queries: 0.2–2 ms p50 (matches the bench numbers from the v0.2.5815 cross-corpus run committed in PR #483) Co-Authored-By: Claude Opus 4.7 (1M context) --- benchmarks/search-shootout/shootout.py | 138 ++++++++++++++++++++++++- 1 file changed, 137 insertions(+), 1 deletion(-) mode change 100755 => 100644 benchmarks/search-shootout/shootout.py diff --git a/benchmarks/search-shootout/shootout.py b/benchmarks/search-shootout/shootout.py old mode 100755 new mode 100644 index 1271a66..e85037e --- a/benchmarks/search-shootout/shootout.py +++ b/benchmarks/search-shootout/shootout.py @@ -26,8 +26,9 @@ HERE = Path(__file__).resolve().parent REPO_ROOT = HERE.parent.parent -DEFAULT_CODEDB = REPO_ROOT / "zig-out/bin/codedb" +DEFAULT_CODEDB = shutil.which("codedb") or str(REPO_ROOT / "zig-out/bin/codedb") DEFAULT_LEANCTX = shutil.which("lean-ctx") +DEFAULT_CODEGRAPH = shutil.which("codegraph") QUERIES_PATH = HERE / "queries.json" NL = chr(10) @@ -395,6 +396,115 @@ def query_leanctx(bin_path, q, corpus, iters): return times, out_count +# ---------------- codegraph ---------------- +class CodegraphMCP: + """Long-lived `codegraph serve --mcp` process — same shape as CodedbMCP/LeanCtxMCP.""" + def __init__(self, bin_path, root): + self.proc = subprocess.Popen( + [bin_path, "serve", "--mcp", "--path", root], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, bufsize=0, + ) + self.id = 0 + self.buf = b"" + self._init() + + def _send(self, obj): + line = json.dumps(obj) + NL + self.proc.stdin.write(line.encode()) + self.proc.stdin.flush() + + def _recv(self, timeout=60): + deadline = time.time() + timeout + while time.time() < deadline: + if select.select([self.proc.stdout], [], [], 0.1)[0]: + chunk = os.read(self.proc.stdout.fileno(), 1 << 16) + if chunk: + self.buf += chunk + text = self.buf.decode(errors="replace") + while NL in text: + line, rest = text.split(NL, 1) + line = line.strip() + if not line: + self.buf = rest.encode(); text = rest; continue + try: + obj = json.loads(line) + self.buf = rest.encode() + return obj + except json.JSONDecodeError: + self.buf = rest.encode(); text = rest; continue + return None + + def _init(self): + self._send({ + "jsonrpc": "2.0", "id": 1, "method": "initialize", + "params": {"protocolVersion": "2024-11-05", "capabilities": {}, + "clientInfo": {"name": "shootout", "version": "1.0"}} + }) + self._recv() + self._send({"jsonrpc": "2.0", "method": "notifications/initialized"}) + time.sleep(0.5) + + def call(self, tool, args): + self.id += 1 + self._send({ + "jsonrpc": "2.0", "id": self.id, "method": "tools/call", + "params": {"name": tool, "arguments": args} + }) + return self._recv() + + def close(self): + try: + self.proc.terminate() + self.proc.wait(timeout=5) + except Exception: + self.proc.kill() + + +def codegraph_count_results(resp): + if not resp or "result" not in resp: + return 0 + text = "" + for item in resp["result"].get("content", []): + if item.get("type") == "text": + text += item["text"] + count = 0 + for ln in text.splitlines(): + s = ln.strip() + if not s: + continue + if s.startswith("Found ") or s.startswith("No ") or s.startswith("#"): + continue + count += 1 + return count + + +def query_codegraph(client, q, iters): + resp = client.call("codegraph_search", {"query": q, "limit": 50}) + count = codegraph_count_results(resp) + times = [] + for _ in range(iters): + s = time.perf_counter() + client.call("codegraph_search", {"query": q, "limit": 50}) + times.append((time.perf_counter() - s) * 1000.0) + return times, count + + +def codegraph_cold_index(bin_path, corpus): + """`codegraph init` then `codegraph index` — wall-clock = full cold build.""" + cg_dir = Path(corpus) / ".codegraph" + if cg_dir.exists(): + shutil.rmtree(cg_dir) + s = time.perf_counter() + subprocess.run([bin_path, "init", corpus], capture_output=True, timeout=60) + subprocess.run([bin_path, "index", corpus], capture_output=True, timeout=600) + elapsed = time.perf_counter() - s + size = 0 + if cg_dir.exists(): + size = sum(f.stat().st_size for f in cg_dir.rglob("*") if f.is_file()) + return elapsed, size, cg_dir + + # ---------------- stats / report ---------------- def pct(xs, p): if not xs: @@ -522,14 +632,17 @@ def run_multi_session(args): "--iters", str(args.iters), "--codedb-bin", args.codedb_bin, "--leanctx-bin", args.leanctx_bin, + "--codegraph-bin", args.codegraph_bin, "--sessions", "1", "--session-id", str(sid), "--out", str(out_json), ] if args.skip_codedb: cmd.append("--skip-codedb") if args.skip_leanctx: cmd.append("--skip-leanctx") + if args.skip_codegraph: cmd.append("--skip-codegraph") if args.skip_fts5: cmd.append("--skip-fts5") if args.clean_codedb and sid == 1: cmd.append("--clean-codedb") + if args.clean_codegraph and sid == 1: cmd.append("--clean-codegraph") if args.normalize_files_list: cmd.append("--normalize-files-list") print(f" session {sid}/{args.sessions} ...", flush=True) r = subprocess.run(cmd, capture_output=False) @@ -725,6 +838,10 @@ def main(): "CLI is what scripts feel; MCP is what an agent feels.") ap.add_argument("--skip-fts5", action="store_true") ap.add_argument("--clean-codedb", action="store_true") + ap.add_argument("--codegraph-bin", default=DEFAULT_CODEGRAPH or "") + ap.add_argument("--skip-codegraph", action="store_true") + ap.add_argument("--clean-codegraph", action="store_true", + help="Wipe matching .codegraph/ dir before indexing (forces cold build).") ap.add_argument("--sessions", type=int, default=1, help="Run the bench N times in subprocesses; report median-of-medians per query. " "Default 1 (single session). Use --sessions 3 for tighter p99 estimates.") @@ -823,6 +940,21 @@ def main(): elif not args.skip_leanctx: print("[build] lean-ctx: binary not found, skipping") + codegraph_client = None + if not args.skip_codegraph and args.codegraph_bin: + print("[build] codegraph ...", flush=True) + try: + t, sz, cg_dir = codegraph_cold_index(args.codegraph_bin, str(corpus)) + print(" {:.2f}s, ~{:.1f} MB ({})".format(t, sz / 1e6, cg_dir)) + builds.append(("codegraph", t, sz)) + backends.append("codegraph") + codegraph_client = CodegraphMCP(args.codegraph_bin, str(corpus)) + except subprocess.TimeoutExpired: + print(" TIMED OUT") + builds.append(("codegraph", None, None)) + elif not args.skip_codegraph: + print("[build] codegraph: binary not found, skipping") + print() print("[query]") print(" " + " | ".join(["query"] + [b + " min/p50/p95/p99 (hits)" for b in backends])) @@ -844,6 +976,8 @@ def main(): times, count = query_leanctx_mcp(leanctx_client, q, args.iters) else: times, count = query_leanctx(args.leanctx_bin, q, str(corpus), args.iters) + elif b == "codegraph": + times, count = query_codegraph(codegraph_client, q, args.iters) else: continue s = stats(times) @@ -861,6 +995,8 @@ def main(): codedb_client.close() if leanctx_client: leanctx_client.close() + if codegraph_client: + codegraph_client.close() files_list_result = None if args.normalize_files_list: From 43ee79cbebc09f57c5958330e21e69581622b847 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Thu, 21 May 2026 12:26:43 +0800 Subject: [PATCH 6/6] =?UTF-8?q?release:=20v0.2.5816=20=E2=80=94=20codedb?= =?UTF-8?q?=20read=20CLI=20+=20Tier=205=20short-circuit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps semver to 0.2.5816 and consolidates two follow-up fixes from the v0.2.5815 cross-corpus eval: - #484 feat(cli): add `codedb read` subcommand - #485 fix(search): skip Tier 5 full-scan when trigram returned candidates Measured impact (benchmarks/search-shootout, 20 warm iters): Suspense (regex, 0 hits) 2.82 ms → 0.14 ms (20× faster) useState (regex) p99 16.57 ms → 1.67 ms (10× p99) useState (flask) 0.66 ms → 0.18 ms (3.7× faster) React queries: unchanged ±noise; hit counts identical Recall preserved on every query. Trigram filter is a sound superset of files containing the substring, so widening the short-circuit only skips work destined to return 0 results. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/release_info.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/release_info.zig b/src/release_info.zig index 6d2cf4d..db70d1a 100644 --- a/src/release_info.zig +++ b/src/release_info.zig @@ -1 +1 @@ -pub const semver = "0.2.5815"; +pub const semver = "0.2.5816";