diff --git a/.gitignore b/.gitignore index 35a8945..a6f7ab2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ .vscode/ *.iml CLAUDE.md +.claude/ .release-notes-*.md diff --git a/CHANGELOG.md b/CHANGELOG.md index a3722e6..3dae9ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## [0.11.4] - 2026-05-29 + +### Fixed + +- **`scanner/pi`: a single invalid-UTF-8 line no longer drops the rest of the file (or the whole session)** — `parse_session` used `reader.lines().map_while(Result::ok)`, which stops at the first `Err` and silently truncates every prompt summary that follows. Worse, when the bad line lands before the session header (a real failure mode for crash-truncated or rotation-racing writes), the header is never captured and the entire session is dropped from `agf list`. This is the same bug class as the `extract_first_prompt` `.ok()?` regression fixed in v0.11.3 for the Cursor scanner. Replaced with `let Ok(line) = line_result else { continue; };` so each bad line is skipped individually. Regression test: `parse_session_skips_invalid_utf8_lines`. +- **`scanner/codex`: `~/.codex/history.jsonl` no longer scales with the user's lifetime codex usage** — `read_history_summaries` streamed the entire file, parsed every line into a `HistoryEntry`, and accumulated `(f64, String)` tuples for every session_id ever seen — including thousands of sessions that no longer have a rollout JSONL on disk. For power users who run codex daily, the file reaches tens of MB; v0.11.3's `CACHE_VERSION` bump to 6 forces a cold rescan on every upgrader, which would otherwise pay this cost on first launch. The function now takes the same `live_session_ids` set already collected by `collect_live_session_ids` and short-circuits any line whose `session_id` is not in it. When `live_session_ids` is `None` (transient I/O on the sessions tree), the legacy "keep everything" behavior is preserved, mirroring `scan_sqlite`'s same-condition fallback. Regression tests: `read_history_summaries_pre_filters_against_live_session_ids` and `read_history_summaries_keeps_all_when_live_set_is_none`. + +### Docs + +- **README: Kiro row now surfaces "no per-session resume — always opens the latest session for the cwd"** — `kiro-cli` ignores `session_id`, so selecting a specific older Kiro entry in the TUI silently launches a different session. The caveat was previously only in `Agent::Kiro::resume_cmd`'s inline comment; it now appears in the top agents table where users actually read. +- **README: Hermes row now surfaces "cwd-independent — resumes in your current shell directory"** — documented in the expanded `Full session storage paths` section but missing from the discoverable top table. + +### Internal + +- **`.gitignore`: `/.claude/` added** — every contributor running Claude Code locally was seeing `~/.claude/` show up as untracked in `git status`, with the latent risk of an accidental `git add .` committing a personal agent state directory. + ## [0.11.3] - 2026-05-27 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 41feb79..e494a2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,7 +4,7 @@ version = 4 [[package]] name = "agf" -version = "0.11.3" +version = "0.11.4" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index bec218d..badbbed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "agf" -version = "0.11.3" +version = "0.11.4" edition = "2021" description = "Find and resume local AI coding-agent sessions across Claude Code, Codex, Gemini, Cursor CLI, OpenCode, Kiro, pi, and Hermes" license = "MIT" diff --git a/README.md b/README.md index 8bcf1d6..2fbf63b 100644 --- a/README.md +++ b/README.md @@ -48,9 +48,9 @@ Then you either dig through history files or start over. | [Gemini CLI](https://github.com/google-gemini/gemini-cli) | `gemini --resume ` | `~/.gemini/tmp//chats/session-*.json` | | [Cursor CLI](https://cursor.com/docs/cli/overview) | `cursor-agent --resume ` | `~/.cursor/projects/*/agent-transcripts//.jsonl` (Composer 2+)
`~/.cursor/projects/*/agent-transcripts/.txt` (legacy) | | [OpenCode](https://github.com/opencode-ai/opencode) | `opencode -s ` | `~/.local/share/opencode/opencode.db` | -| [Kiro](https://kiro.dev) | `kiro-cli chat --resume` | `~/Library/Application Support/kiro-cli/data.sqlite3` | +| [Kiro](https://kiro.dev) | `kiro-cli chat --resume` *(no per-session resume — always opens the latest session for the cwd)* | `~/Library/Application Support/kiro-cli/data.sqlite3` | | [pi](https://github.com/badlogic/pi-mono) | `pi --session ` | `~/.pi/agent/sessions//*.jsonl` | -| [Hermes](https://github.com/NousResearch/hermes-agent) | `hermes --resume ` | `~/.hermes/state.db` | +| [Hermes](https://github.com/NousResearch/hermes-agent) | `hermes --resume ` *(cwd-independent — resumes in your current shell directory)* | `~/.hermes/state.db` |
Full session storage paths diff --git a/src/scanner/codex.rs b/src/scanner/codex.rs index e16fa25..2ddd76e 100644 --- a/src/scanner/codex.rs +++ b/src/scanner/codex.rs @@ -12,9 +12,6 @@ use crate::scanner::{first_line_truncated, read_first_line}; pub fn scan() -> Result, AgfError> { let codex_dir = crate::config::codex_dir()?; - // Collect summaries from history.jsonl (keyed by session_id, newest-first) - let summaries = read_history_summaries(&codex_dir); - // Build the set of session IDs whose rollout JSONL still exists on disk. // Used to filter out — and prune from SQLite — `threads` rows that were // left behind when the JSONL was deleted manually (e.g. the user wiped @@ -27,6 +24,16 @@ pub fn scan() -> Result, AgfError> { // wiping live rows on a flaky filesystem read. let live_session_ids = collect_live_session_ids(&codex_dir); + // Collect summaries from history.jsonl, pre-filtered against the live set + // when known. `history.jsonl` is append-only and grows unbounded — power + // users hit tens of MB after months of daily use — so keeping every + // historical entry in memory wastes both RAM and post-loop sort time when + // only the currently-listed sessions need summaries. (Surfaced by the + // v0.11.3 post-ship audit; the `CACHE_VERSION=6` bump forces a cold + // rescan on every upgrader, which would otherwise pay this cost on first + // launch.) + let summaries = read_history_summaries(&codex_dir, live_session_ids.as_ref()); + // Primary: read from SQLite (state_*.sqlite) let mut sessions = scan_sqlite(&codex_dir, &summaries, live_session_ids.as_ref()); @@ -375,7 +382,20 @@ struct HistoryEntry { text: Option, } -fn read_history_summaries(codex_dir: &std::path::Path) -> HashMap> { +/// Read `~/.codex/history.jsonl` and group user-prompt summaries by +/// `session_id`, newest-first. +/// +/// `live_session_ids` is the set of session IDs whose rollout JSONL still +/// exists on disk; when `Some`, lines whose `session_id` is not in the set +/// are skipped early so historical entries for long-deleted sessions never +/// reach the HashMap. `None` means the caller couldn't enumerate the live set +/// reliably (permission denied / transient I/O); the legacy behavior — keep +/// every entry — is preserved to match `scan_sqlite`'s same-condition +/// fallback at `live_session_ids.is_none()`. +fn read_history_summaries( + codex_dir: &std::path::Path, + live_session_ids: Option<&HashSet>, +) -> HashMap> { let path = codex_dir.join("history.jsonl"); let mut summaries: HashMap> = HashMap::new(); @@ -401,6 +421,11 @@ fn read_history_summaries(codex_dir: &std::path::Path) -> HashMap id, _ => continue, }; + if let Some(live) = live_session_ids { + if !live.contains(&session_id) { + continue; + } + } let ts = entry.ts.unwrap_or(0.0); let text = match entry.text { Some(t) if !t.is_empty() => t, @@ -559,4 +584,58 @@ mod tests { assert!(live.contains("session-abc")); assert_eq!(live.len(), 1); } + + fn seed_history_jsonl(dir: &std::path::Path, entries: &[(&str, f64, &str)]) { + let mut f = fs::File::create(dir.join("history.jsonl")).unwrap(); + for (sid, ts, text) in entries { + writeln!(f, r#"{{"session_id":"{sid}","ts":{ts},"text":"{text}"}}"#).unwrap(); + } + } + + /// Pre-filter: when a live session id set is provided, history entries for + /// any other session id are skipped early so they never reach the + /// returned `HashMap`. Bounds memory growth on `history.jsonl`, which is + /// append-only and reaches tens of MB for power users. + #[test] + fn read_history_summaries_pre_filters_against_live_session_ids() { + let dir = make_codex_dir("hist-prefilter"); + seed_history_jsonl( + &dir, + &[ + ("live-a", 100.0, "kept-A1"), + ("dead-b", 110.0, "dropped"), + ("live-a", 120.0, "kept-A2"), + ("dead-c", 130.0, "dropped"), + ], + ); + let mut live = HashSet::new(); + live.insert("live-a".to_string()); + + let summaries = read_history_summaries(&dir, Some(&live)); + + assert_eq!(summaries.len(), 1); + // Newest-first order: 120.0 before 100.0. + assert_eq!( + summaries.get("live-a").map(|v| v.as_slice()), + Some(&["kept-A2".to_string(), "kept-A1".to_string()][..]) + ); + assert!(!summaries.contains_key("dead-b")); + assert!(!summaries.contains_key("dead-c")); + } + + /// `None` (caller could not enumerate the live set) preserves the legacy + /// "keep every entry" behavior — mirrors `scan_sqlite`'s same-condition + /// fallback so a transient I/O error on the sessions tree can't wipe + /// summaries from the listing. + #[test] + fn read_history_summaries_keeps_all_when_live_set_is_none() { + let dir = make_codex_dir("hist-no-filter"); + seed_history_jsonl(&dir, &[("a", 1.0, "kept-a"), ("b", 2.0, "kept-b")]); + + let summaries = read_history_summaries(&dir, None); + + assert_eq!(summaries.len(), 2); + assert!(summaries.contains_key("a")); + assert!(summaries.contains_key("b")); + } } diff --git a/src/scanner/pi.rs b/src/scanner/pi.rs index 903a0b8..0cdd93f 100644 --- a/src/scanner/pi.rs +++ b/src/scanner/pi.rs @@ -64,7 +64,16 @@ fn parse_session(path: &std::path::Path) -> Option { let mut summaries = Vec::new(); let mut bytes_read = 0usize; - for line in reader.lines().map_while(Result::ok) { + for line_result in reader.lines() { + // A single bad line (invalid UTF-8, transient IO error) must skip just + // that line — never abort the rest of the file. `map_while(Result::ok)` + // stops at the first `Err`, which silently truncates summaries (or + // drops the entire session if the bad line is line 1, since the header + // is not yet captured). Same bug class as the cursor `.ok()?` fix in + // v0.11.3 (`extract_first_prompt_skips_invalid_utf8_lines`). + let Ok(line) = line_result else { + continue; + }; // +1 approximates the newline stripped by `lines()`; we only need a // rough byte budget, not an exact count. bytes_read += line.len() + 1; @@ -230,6 +239,43 @@ mod tests { ); } + /// Regression: a single line with invalid UTF-8 bytes used to abort the + /// entire per-line loop via `reader.lines().map_while(Result::ok)`. If the + /// bad line preceded the session header, the whole session was silently + /// dropped. The fix skips just the bad line and keeps iterating, so the + /// header on line 2 is captured and the user prompt on line 3 still + /// surfaces. Same bug class as the cursor `.ok()?` fix in v0.11.3. + #[test] + fn parse_session_skips_invalid_utf8_lines() { + let pid = std::process::id(); + let ts = chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(); + let path = std::env::temp_dir().join(format!("agf-pi-badutf8-{pid}-{ts}.jsonl")); + let _ = fs::remove_file(&path); + + let mut bytes: Vec = Vec::new(); + // Line 1: a truncated multibyte sequence — `BufReader::lines()` returns + // `Err(InvalidData)` for this row. + bytes.extend_from_slice(&[0xC3, 0x28]); + bytes.push(b'\n'); + // Line 2: valid session header. + bytes.extend_from_slice( + br#"{"type":"session","id":"recovered","timestamp":"2026-05-29T00:00:00Z","cwd":"/tmp/project"}"#, + ); + bytes.push(b'\n'); + // Line 3: valid user message. + bytes.extend_from_slice( + br#"{"type":"message","message":{"role":"user","content":[{"type":"text","text":"after garbage"}]}}"#, + ); + fs::write(&path, &bytes).unwrap(); + + let session = parse_session(&path); + let _ = fs::remove_file(&path); + + let session = session.expect("session should surface despite invalid UTF-8 on line 1"); + assert_eq!(session.session_id, "recovered"); + assert_eq!(session.summaries, vec!["after garbage"]); + } + /// `scan()` walks `$HOME/.pi/agent/sessions`; redirecting the home dir via /// the `HOME` env var only works on Unix, so the dir-walk + multi-session /// (dedup-removal) coverage is gated to Unix.