From c9dd5b61435e7ca6a2207b8dc11d97fff9dcc35b Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Thu, 21 May 2026 12:04:58 +0800 Subject: [PATCH 1/2] feat(cli): add `codedb read` subcommand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the codedb_read MCP tool surface. Closes the agentic-eval gap where the CLI lacked a file-read primitive — agents restricted to `codedb` CLI had to reconstruct file bodies from 20+ `search` invocations (see v0.2.5815 release-notes agentic eval: codedb 22 calls / 114 s vs codegraph 4 / 29 s). Usage: codedb [root] read # full file with line numbers codedb [root] read -L FROM-TO # line range (1-indexed, inclusive) codedb [root] read -L FROM-end # to EOF codedb [root] read --compact # strip comment + blank lines - Preferred path: explorer.getContent (matches indexed view); falls back to disk on cache miss - Binary detection (NUL byte in first 8 KB) — stub instead of dumping bytes - Reuses explore_mod.extractLines (already covered by tests.zig) Co-Authored-By: Claude Opus 4.7 (1M context) --- src/main.zig | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/src/main.zig b/src/main.zig index 9a3d61c..94c82c3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -632,6 +632,110 @@ fn mainImpl() !void { }); } } + } else if (std.mem.eql(u8, cmd, "read")) { + // CLI counterpart of codedb_read MCP tool. Closes the agentic-eval + // gap where the CLI surface lacked a file-read primitive — agents + // restricted to `codedb` CLI had to reconstruct file bodies from + // 20+ `search` invocations. + var line_start: ?u32 = null; + var line_end: ?u32 = null; + var compact = false; + var arg_idx = cmd_args_start; + while (args.len > arg_idx) { + const a = args[arg_idx]; + if (std.mem.eql(u8, a, "--compact") or std.mem.eql(u8, a, "-c")) { + compact = true; + arg_idx += 1; + } else if (std.mem.eql(u8, a, "-L") or std.mem.eql(u8, a, "--lines")) { + if (arg_idx + 1 >= args.len) break; + const range = args[arg_idx + 1]; + const dash = std.mem.indexOfScalar(u8, range, '-') orelse break; + line_start = std.fmt.parseInt(u32, range[0..dash], 10) catch null; + const end_str = range[dash + 1 ..]; + if (std.mem.eql(u8, end_str, "$") or std.mem.eql(u8, end_str, "end")) { + line_end = std.math.maxInt(u32); + } else { + line_end = std.fmt.parseInt(u32, end_str, 10) catch null; + } + arg_idx += 2; + } else { + break; + } + } + const path = if (args.len > arg_idx) args[arg_idx] else { + out.p("{s}\xe2\x9c\x97{s} usage: codedb [root] read [-L FROM-TO] [--compact] {s}{s}\n", .{ + s.red, s.reset, s.cyan, s.reset, + }); + std.process.exit(1); + }; + const t0 = cio.nanoTimestamp(); + // Prefer indexed content (matches the indexed view), fall back to disk + const cached = explorer.getContent(path, allocator) catch null; + const content_owned = if (cached) |c| c else blk: { + break :blk std.Io.Dir.cwd().readFileAlloc(io, path, allocator, .limited(10 * 1024 * 1024)) catch { + out.p("{s}\xe2\x9c\x97{s} not indexed and disk read failed: {s}{s}{s}\n", .{ + s.red, s.reset, s.bold, path, s.reset, + }); + std.process.exit(1); + }; + }; + defer allocator.free(content_owned); + // Binary detection (NUL byte in first 8KB) — stub instead of dumping raw bytes + const probe_len = @min(content_owned.len, 8 * 1024); + if (std.mem.indexOfScalar(u8, content_owned[0..probe_len], 0) != null) { + out.p("{s}\xe2\x9c\x97{s} binary file: {d} bytes\n", .{ s.yellow, s.reset, content_owned.len }); + return; + } + const elapsed = cio.nanoTimestamp() - t0; + var dur_buf: [64]u8 = undefined; + const has_range = line_start != null or line_end != null; + const lang = explore_mod.detectLanguage(path); + if (has_range or compact) { + const start: u32 = line_start orelse 1; + const end: u32 = line_end orelse std.math.maxInt(u32); + const extracted = explore_mod.extractLines(content_owned, start, end, true, compact, lang, allocator) catch { + out.p("{s}\xe2\x9c\x97{s} line extraction failed\n", .{ s.red, s.reset }); + std.process.exit(1); + }; + defer allocator.free(extracted); + const unbounded = end == std.math.maxInt(u32); + if (unbounded) { + out.p("{s}\xe2\x9c\x93{s} {s}{s}{s} {s}{s}{s} L{d}-EOF {s}{s}{s}\n", .{ + s.green, s.reset, + s.bold, path, + s.reset, s.langColor(@tagName(lang)), + @tagName(lang), s.reset, + start, sty.durationColor(s, elapsed), + sty.formatDuration(&dur_buf, elapsed), s.reset, + }); + } else { + out.p("{s}\xe2\x9c\x93{s} {s}{s}{s} {s}{s}{s} L{d}-{d} {s}{s}{s}\n", .{ + s.green, s.reset, + s.bold, path, + s.reset, s.langColor(@tagName(lang)), + @tagName(lang), s.reset, + start, end, + sty.durationColor(s, elapsed), sty.formatDuration(&dur_buf, elapsed), + s.reset, + }); + } + out.p("{s}", .{extracted}); + } else { + out.p("{s}\xe2\x9c\x93{s} {s}{s}{s} {s}{s}{s} {s}{s}{s}\n", .{ + s.green, s.reset, + s.bold, path, + s.reset, s.langColor(@tagName(lang)), + @tagName(lang), s.reset, + sty.durationColor(s, elapsed), sty.formatDuration(&dur_buf, elapsed), + s.reset, + }); + var line_num: u32 = 0; + var lines = std.mem.splitScalar(u8, content_owned, '\n'); + while (lines.next()) |line| { + line_num += 1; + out.p("{d:>5} | {s}\n", .{ line_num, line }); + } + } } else if (std.mem.eql(u8, cmd, "hot")) { const t0 = cio.nanoTimestamp(); const hot = try explorer.getHotFiles(&store, allocator, 10); @@ -930,7 +1034,7 @@ fn mainImpl() !void { } } fn isCommand(arg: []const u8) bool { - const commands = [_][]const u8{ "tree", "outline", "find", "search", "word", "hot", "snapshot", "serve", "mcp", "update", "nuke" }; + const commands = [_][]const u8{ "tree", "outline", "find", "search", "word", "read", "hot", "snapshot", "serve", "mcp", "update", "nuke" }; for (commands) |c| { if (std.mem.eql(u8, arg, c)) return true; } @@ -1195,6 +1299,7 @@ fn printUsage(out: *Out, s: sty.Style) void { \\ {s}find{s} {s}{s} find where a symbol is defined \\ {s}search{s} {s}{s} full-text search (trigram, case-insensitive) \\ {s}word{s} {s}{s} exact word lookup via inverted index + \\ {s}read{s} {s}{s} file contents (optionally -L FROM-TO, --compact) \\ , .{ s.bold, s.reset, @@ -1209,6 +1314,8 @@ fn printUsage(out: *Out, s: sty.Style) void { s.dim, s.reset, s.cyan, s.reset, s.dim, s.reset, + s.cyan, s.reset, + s.dim, s.reset, }); out.p( \\ {s}hot{s} recently modified files From 1fa2bb7c90244a92c84ebb37f1c4653ea72f0f8a Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Thu, 21 May 2026 13:10:57 +0800 Subject: [PATCH 2/2] fix(cli): add path-safety guards + project-root anchoring to \`codedb read\` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Codex P1+P2 review on PR #484: - **P1** Block traversal + sensitive paths. The first version of `codedb read` went directly from user input to `explorer.getContent` / disk fallback with no path validation. Now uses `mcp_server.isPathSafe` (rejects absolute paths, `..` traversal, NUL bytes, backslashes) + `watcher.isSensitivePath` (blocks `.env`, `id_rsa`, `.ssh/*`, etc.) — same guards `codedb_read` MCP uses. - **P2** Anchor fallback reads to the configured project root, not cwd. Pre-fix: `codedb /path/to/project read foo.zig` would read `./foo.zig` from wherever the user invoked it, not `/path/to/project/foo.zig`. Now opens \`root\` as a Dir and reads relative to it. - Drive-by fix: `out.flush()` before every error-path `std.process.exit(1)`. The buffered `Out` writer doesn't flush on exit, so security messages were silently dropped — which is also the silent-exit-1 UX issue all 3 reader.md generation agents flagged. Verified manually: read /etc/passwd → "path must be relative to project root..." read ../../etc/passwd → same read .env → "access to sensitive file blocked..." read hello.zig → works (relative path under root) Co-Authored-By: Claude Opus 4.7 (1M context) --- src/main.zig | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/main.zig b/src/main.zig index 94c82c3..041ce5c 100644 --- a/src/main.zig +++ b/src/main.zig @@ -668,14 +668,45 @@ fn mainImpl() !void { }); std.process.exit(1); }; + // Same safety guards as codedb_read MCP — path must be project-relative + // (no leading `/`, no `..` traversal, no null bytes / backslashes) and + // must not target sensitive files like .env / id_rsa / .ssh/*. Without + // these guards the CLI happily reads /etc/passwd, secrets, or any file + // the codedb process can see. + if (!mcp_server.isPathSafe(path)) { + out.p("{s}\xe2\x9c\x97{s} path must be relative to the project root (no leading `/`, no `..` traversal): {s}{s}{s}\n", .{ + s.red, s.reset, s.bold, path, s.reset, + }); + out.flush(); + std.process.exit(1); + } + if (watcher.isSensitivePath(path)) { + out.p("{s}\xe2\x9c\x97{s} access to sensitive file blocked: {s}{s}{s}\n", .{ + s.red, s.reset, s.bold, path, s.reset, + }); + out.flush(); + std.process.exit(1); + } const t0 = cio.nanoTimestamp(); // Prefer indexed content (matches the indexed view), fall back to disk + // reads anchored at the resolved project root — NOT cwd. Pre-fix, an + // explicit `codedb /path/to/proj read foo.zig` would read `./foo.zig` + // from wherever the user happened to invoke it. const cached = explorer.getContent(path, allocator) catch null; const content_owned = if (cached) |c| c else blk: { - break :blk std.Io.Dir.cwd().readFileAlloc(io, path, allocator, .limited(10 * 1024 * 1024)) catch { + var root_dir = std.Io.Dir.cwd().openDir(io, root, .{}) catch { + out.p("{s}\xe2\x9c\x97{s} cannot open project root: {s}{s}{s}\n", .{ + s.red, s.reset, s.bold, root, s.reset, + }); + out.flush(); + std.process.exit(1); + }; + defer root_dir.close(io); + break :blk root_dir.readFileAlloc(io, path, allocator, .limited(10 * 1024 * 1024)) catch { out.p("{s}\xe2\x9c\x97{s} not indexed and disk read failed: {s}{s}{s}\n", .{ s.red, s.reset, s.bold, path, s.reset, }); + out.flush(); std.process.exit(1); }; };