From ff87bf352e194ab6e7a795cc62dda3c3f31806af Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Wed, 10 Jun 2026 10:03:50 -0500 Subject: [PATCH 01/24] chore: cleanup config examples --- config/examples/cross-system-flow-analyzer.yaml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/config/examples/cross-system-flow-analyzer.yaml b/config/examples/cross-system-flow-analyzer.yaml index 41e1751..66c8759 100644 --- a/config/examples/cross-system-flow-analyzer.yaml +++ b/config/examples/cross-system-flow-analyzer.yaml @@ -69,7 +69,7 @@ Orchestration: Models: heavy: - ModelId: claude-opus-4-7 + ModelId: claude-opus-4-8 scout: ModelId: claude-haiku-4-5-20251001 @@ -94,12 +94,15 @@ Orchestration: CutoverAt: 110000 # compact early; artifact handoffs eliminate the need for large live contexts FailureHandling: - RetryCount: 2 - OnAgentFailure: - - checkpoint - - compact - - retry - - escalate + MissingEvidence: + Action: Reinstruct + Threshold: 3 + ConflictingEvidence: + Action: Reinstruct + Threshold: 2 + NoProgress: + Action: Abort + Threshold: 3 Events: Path: .fuseraft/logs/events.jsonl From 1b8c686c9957f3d5a7575bc9e1e41c3fbfa65132 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Wed, 10 Jun 2026 10:49:54 -0500 Subject: [PATCH 02/24] feat(context): reduce lost-in-the-middle effect - ApplyWithManifest appends a [Context Manifest] listing active and superseded tool results so the model knows what it can still see without re-reading full history at the recency end of the prompt - ContextAssembler repeats a brief task reminder at the end of any assembled context exceeding 2 000 chars, sandwiching the objective at both the primacy and recency positions - Tombstones now include the evicted tool label and a 300-char content preview so the model can judge whether to re-read without issuing a blind full-file read --- src/Orchestration/AgentOrchestrator.cs | 13 +- src/Orchestration/ContextAssembler.cs | 12 + src/Orchestration/ToolResultWindowTrimmer.cs | 136 +++++++- .../ToolResultWindowTrimmerTests.cs | 317 ++++++++++++++++++ 4 files changed, 461 insertions(+), 17 deletions(-) create mode 100644 tests/FuseraftCli.Tests/ToolResultWindowTrimmerTests.cs diff --git a/src/Orchestration/AgentOrchestrator.cs b/src/Orchestration/AgentOrchestrator.cs index c512d6b..8e3958c 100644 --- a/src/Orchestration/AgentOrchestrator.cs +++ b/src/Orchestration/AgentOrchestrator.cs @@ -823,7 +823,18 @@ await EmitContextAssemblyAsync(eventEmitter, assembled.Metrics, turn, var contextList = context as IList ?? context.ToList(); if (config.ContextBudget is { MaxToolResultTokens: > 0 } toolBudget) - contextList = ToolResultWindowTrimmer.Apply(contextList, toolBudget); + { + var (trimmed, manifest) = ToolResultWindowTrimmer.ApplyWithManifest(contextList, toolBudget); + if (manifest is not null) + { + var withManifest = new List(trimmed) + { + new ChatMessage(ChatRole.User, manifest) + }; + return withManifest; + } + return trimmed; + } return contextList; } diff --git a/src/Orchestration/ContextAssembler.cs b/src/Orchestration/ContextAssembler.cs index e6aaa75..be74ced 100644 --- a/src/Orchestration/ContextAssembler.cs +++ b/src/Orchestration/ContextAssembler.cs @@ -202,6 +202,18 @@ public async Task> AssembleForAgentAsync( var pendingCorrections = ExtractPendingCorrections(agentName, sharedHistory); result.AddRange(pendingCorrections); + // 5. Task reminder — sandwich the objective at both ends of a non-trivial context. + // The task is already at position 0 (primacy effect); repeating a brief version at the + // very end exploits the recency effect so the agent's goal stays visible after a long + // assembled context block. Only injected when there is enough content between the two + // endpoints to make the reminder worthwhile. + int charsAfterTask = result.Skip(1).Sum(m => m.Text?.Length ?? 0); + if (task.Length > 50 && charsAfterTask > 2_000) + { + var preview = task.Length > 200 ? task[..200] + "…" : task; + result.Add(new ChatMessage(ChatRole.User, $"[Task Reminder]\n\n{preview}")); + } + return result; } diff --git a/src/Orchestration/ToolResultWindowTrimmer.cs b/src/Orchestration/ToolResultWindowTrimmer.cs index a374d95..fb83232 100644 --- a/src/Orchestration/ToolResultWindowTrimmer.cs +++ b/src/Orchestration/ToolResultWindowTrimmer.cs @@ -1,3 +1,4 @@ +using System.Text; using Microsoft.Extensions.AI; using fuseraft.Core.Models; @@ -11,7 +12,7 @@ namespace fuseraft.Orchestration; /// items in exceeds , /// the oldest results beyond the last /// are replaced with one-line tombstones of the form: -/// [tool result for read_file(graph.py) — evicted after tool window exceeded] +/// [tool result — evicted after tool window exceeded] /// /// /// @@ -24,24 +25,40 @@ namespace fuseraft.Orchestration; public static class ToolResultWindowTrimmer { // Characters per token estimate — consistent with the rest of the codebase. - private const int CharsPerToken = 4; + private const int CharsPerToken = 4; + // Number of original-content chars to include in a tombstone as a content preview. + // Bounded so tombstones stay cheap even for large files (~75 tokens). + private const int ExcerptChars = 300; + + internal const string TombstonePrefix = "[tool result — evicted"; /// /// Returns a new list with old tool results tombstoned when the budget is exceeded, /// or returns unchanged when trimming is not needed. + /// + /// + /// Each tombstone names the evicted tool and includes a short content preview so + /// the model can judge whether to re-read with a targeted range, without fetching + /// the full result again. + /// /// public static IList Apply(IList context, ContextBudgetConfig budget) { if (budget.MaxToolResultTokens <= 0) return context; - // Collect all ChatMessage indices that contain at least one FunctionResultContent, - // along with their estimated token cost. Walk in order so we can tombstone the oldest. + // Pass 1: collect budget info and build callId → label map for enriched tombstones. var resultMessages = new List<(int MsgIdx, int EstTokens)>(); int totalEstTokens = 0; + var callLabels = new Dictionary(StringComparer.OrdinalIgnoreCase); for (int i = 0; i < context.Count; i++) { - var msg = context[i]; + var msg = context[i]; + + foreach (var call in msg.Contents.OfType()) + if (call.CallId is not null) + callLabels[call.CallId] = FormatCallLabel(call); + int resultChars = msg.Contents .OfType() .Sum(fr => fr.Result?.ToString()?.Length ?? 0); @@ -65,11 +82,10 @@ public static IList Apply(IList context, ContextBudget var evictIndices = new HashSet( resultMessages.Take(evictUpTo).Select(r => r.MsgIdx)); - // Build the trimmed list, replacing evicted messages with a tombstone. + // Pass 2: build trimmed list with enriched tombstones. var trimmed = new List(context.Count); foreach (var msg in context) { - int idx = trimmed.Count; // index in source context if (evictIndices.Contains(trimmed.Count)) { // Replace tool result content with tombstones; keep function-call @@ -79,21 +95,27 @@ public static IList Apply(IList context, ContextBudget { if (item is FunctionResultContent fr) { - // Build a compact tombstone that names the tool and call ID. - var callId = fr.CallId ?? "unknown"; - tombstoned.Add(new FunctionResultContent(callId, - $"[tool result — evicted after tool window exceeded]")); + var callId = fr.CallId ?? "unknown"; + var label = callLabels.GetValueOrDefault(callId, callId); + var content = fr.Result?.ToString() ?? ""; + var excerpt = content.Length > 0 + ? (content.Length > ExcerptChars + ? content[..ExcerptChars].TrimEnd() + "…" + : content.Trim()) + : string.Empty; + + var tombstone = string.IsNullOrEmpty(excerpt) + ? $"{TombstonePrefix}: {label}. Re-read with targeted ranges if needed.]" + : $"{TombstonePrefix}: {label}. Preview: \"{excerpt}\". Re-read with targeted ranges if needed.]"; + + tombstoned.Add(new FunctionResultContent(callId, tombstone)); } else { tombstoned.Add(item); } } - var replacement = new ChatMessage(msg.Role, tombstoned) - { - AuthorName = msg.AuthorName - }; - trimmed.Add(replacement); + trimmed.Add(new ChatMessage(msg.Role, tombstoned) { AuthorName = msg.AuthorName }); } else { @@ -103,4 +125,86 @@ public static IList Apply(IList context, ContextBudget return trimmed; } + + /// + /// Applies the tool-result window budget and returns a context manifest alongside + /// the trimmed message list. The manifest is non-null only when evictions occurred; + /// it lists active tool results and superseded (evicted) ones so the model knows + /// which reads are still available and which must be re-issued with targeted ranges. + /// + public static (IList Messages, string? Manifest) ApplyWithManifest( + IList context, + ContextBudgetConfig budget) + { + var trimmed = Apply(context, budget); + + // Apply returned the same reference — nothing was evicted, no manifest needed. + if (ReferenceEquals(trimmed, context)) return (trimmed, null); + + // Build callId → label from the ORIGINAL context before eviction. + var callLabels = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var msg in context) + { + foreach (var call in msg.Contents.OfType()) + { + if (call.CallId is not null) + callLabels[call.CallId] = FormatCallLabel(call); + } + } + + var active = new List(); + var superseded = new List(); + + foreach (var msg in trimmed) + { + foreach (var fr in msg.Contents.OfType()) + { + var callId = fr.CallId ?? "unknown"; + var label = callLabels.GetValueOrDefault(callId, callId); + var result = fr.Result?.ToString() ?? ""; + + if (result.StartsWith(TombstonePrefix, StringComparison.Ordinal)) + superseded.Add(label); + else + active.Add(label); + } + } + + if (active.Count == 0 && superseded.Count == 0) return (trimmed, null); + + var sb = new StringBuilder(); + sb.AppendLine("[Context Manifest]"); + + if (active.Count > 0) + { + sb.AppendLine(); + sb.AppendLine($"Active tool results ({active.Count}):"); + foreach (var a in active) sb.AppendLine($"- {a}"); + } + + if (superseded.Count > 0) + { + sb.AppendLine(); + sb.AppendLine($"Superseded ({superseded.Count}) — evicted from context. Re-read with targeted ranges if needed:"); + foreach (var s in superseded) sb.AppendLine($"- {s}"); + } + + return (trimmed, sb.ToString().TrimEnd()); + } + + private static string FormatCallLabel(FunctionCallContent call) + { + var name = call.Name ?? "tool"; + if (call.Arguments is null || call.Arguments.Count == 0) return name; + + foreach (var key in new[] { "path", "command", "query", "content", "name" }) + { + if (call.Arguments.TryGetValue(key, out var val) && val is string s) + return $"{name}({(s.Length > 50 ? s[..50] + "…" : s)})"; + } + + var first = call.Arguments.Values.FirstOrDefault()?.ToString() ?? ""; + return string.IsNullOrEmpty(first) ? name + : $"{name}({(first.Length > 50 ? first[..50] + "…" : first)})"; + } } diff --git a/tests/FuseraftCli.Tests/ToolResultWindowTrimmerTests.cs b/tests/FuseraftCli.Tests/ToolResultWindowTrimmerTests.cs new file mode 100644 index 0000000..a5b16d8 --- /dev/null +++ b/tests/FuseraftCli.Tests/ToolResultWindowTrimmerTests.cs @@ -0,0 +1,317 @@ +using Microsoft.Extensions.AI; +using fuseraft.Core.Models; +using fuseraft.Orchestration; + +namespace FuseraftCli.Tests; + +/// +/// Unit tests for — both the original +/// Apply contract and the new ApplyWithManifest extension. +/// +public sealed class ToolResultWindowTrimmerTests +{ + // ── Helpers ─────────────────────────────────────────────────────────────── + + private static ChatMessage ToolCall(string callId, string name, + Dictionary? args = null) + => new(ChatRole.Assistant, + [new FunctionCallContent(callId, name, args)]); + + private static ChatMessage ToolResult(string callId, string content) + => new(ChatRole.Tool, + [new FunctionResultContent(callId, content)]); + + private static ContextBudgetConfig Budget(int maxTokens, int window = 1) + => new() { MaxToolResultTokens = maxTokens, InTurnToolWindow = window }; + + // ── Apply — existing contract (regression guard) ────────────────────────── + + [Fact] + public void Apply_returns_same_reference_when_budget_not_exceeded() + { + var context = new List + { + ToolCall("c1", "read_file"), + ToolResult("c1", new string('x', 100)), + }; + var budget = Budget(maxTokens: 1_000); + + var result = ToolResultWindowTrimmer.Apply(context, budget); + + Assert.Same(context, result); + } + + [Fact] + public void Apply_tombstones_oldest_results_when_budget_exceeded() + { + // Two results, each ~250 tokens (1 000 chars / 4). Budget = 300 tokens, + // window = 1 so the first result is evicted. + var context = new List + { + ToolCall("c1", "read_file"), + ToolResult("c1", new string('a', 1_000)), + ToolCall("c2", "shell_run"), + ToolResult("c2", new string('b', 1_000)), + }; + var budget = Budget(maxTokens: 300, window: 1); + + var result = ToolResultWindowTrimmer.Apply(context, budget); + + var first = result[1].Contents.OfType().Single(); + var second = result[3].Contents.OfType().Single(); + + Assert.StartsWith(ToolResultWindowTrimmer.TombstonePrefix, first.Result?.ToString()); + Assert.DoesNotContain(ToolResultWindowTrimmer.TombstonePrefix, second.Result?.ToString() ?? ""); + } + + // ── Apply — item 3: enriched tombstone includes tool label ──────────────── + + [Fact] + public void Apply_tombstone_includes_tool_label_from_preceding_call() + { + var context = new List + { + ToolCall("c1", "read_file", new() { ["path"] = "src/Foo.cs" }), + ToolResult("c1", new string('a', 1_000)), + ToolCall("c2", "shell_run"), + ToolResult("c2", new string('b', 200)), + }; + + var result = ToolResultWindowTrimmer.Apply(context, Budget(100, window: 1)); + + var tombstone = result[1].Contents.OfType().Single().Result?.ToString(); + Assert.Contains("read_file(src/Foo.cs)", tombstone); + } + + [Fact] + public void Apply_tombstone_falls_back_to_call_id_when_no_preceding_call() + { + var context = new List + { + ToolResult("orphan", new string('a', 1_000)), + ToolCall("c2", "shell_run"), + ToolResult("c2", new string('b', 200)), + }; + + var result = ToolResultWindowTrimmer.Apply(context, Budget(100, window: 1)); + + var tombstone = result[0].Contents.OfType().Single().Result?.ToString(); + Assert.Contains("orphan", tombstone); + } + + // ── Apply — item 4: tombstone includes content preview ──────────────────── + + [Fact] + public void Apply_tombstone_includes_content_preview() + { + const string distinctStart = "UNIQUE_CONTENT_START"; + var context = new List + { + ToolCall("c1", "read_file"), + ToolResult("c1", distinctStart + new string('x', 1_000)), + ToolCall("c2", "shell_run"), + ToolResult("c2", new string('b', 200)), + }; + + var result = ToolResultWindowTrimmer.Apply(context, Budget(100, window: 1)); + + var tombstone = result[1].Contents.OfType().Single().Result?.ToString(); + Assert.Contains(distinctStart, tombstone); + Assert.Contains("Preview:", tombstone); + } + + [Fact] + public void Apply_tombstone_truncates_preview_at_excerpt_limit() + { + // Content is much longer than ExcerptChars — tombstone must end with the ellipsis marker. + var longContent = new string('z', 2_000); + var context = new List + { + ToolCall("c1", "read_file"), + ToolResult("c1", longContent), + ToolCall("c2", "shell_run"), + ToolResult("c2", new string('b', 200)), + }; + + var result = ToolResultWindowTrimmer.Apply(context, Budget(100, window: 1)); + + var tombstone = result[1].Contents.OfType().Single().Result?.ToString(); + Assert.NotNull(tombstone); + Assert.Contains("…", tombstone); + // The full 2 000-char content must NOT appear verbatim in the tombstone. + Assert.DoesNotContain(longContent, tombstone); + } + + [Fact] + public void Apply_tombstone_includes_re_read_hint() + { + // Every tombstone should guide the model toward targeted reads. + var context = new List + { + ToolCall("c1", "read_file", new() { ["path"] = "src/Foo.cs" }), + ToolResult("c1", new string('a', 1_000)), + ToolCall("c2", "shell_run"), + ToolResult("c2", new string('b', 200)), + }; + + var result = ToolResultWindowTrimmer.Apply(context, Budget(100, window: 1)); + + var tombstone = result[1].Contents.OfType().Single().Result?.ToString(); + Assert.Contains("targeted ranges", tombstone); + } + + // ── ApplyWithManifest — null manifest when nothing evicted ──────────────── + + [Fact] + public void ApplyWithManifest_returns_null_manifest_when_budget_not_exceeded() + { + var context = new List + { + ToolCall("c1", "read_file"), + ToolResult("c1", new string('x', 40)), + }; + + var (messages, manifest) = ToolResultWindowTrimmer.ApplyWithManifest(context, Budget(1_000)); + + Assert.Same(context, messages); + Assert.Null(manifest); + } + + [Fact] + public void ApplyWithManifest_returns_null_manifest_when_budget_disabled() + { + var context = new List + { + ToolCall("c1", "read_file"), + ToolResult("c1", new string('x', 10_000)), + }; + + var (messages, manifest) = ToolResultWindowTrimmer.ApplyWithManifest(context, Budget(0)); + + Assert.Null(manifest); + } + + // ── ApplyWithManifest — manifest content when evictions occur ───────────── + + [Fact] + public void ApplyWithManifest_returns_non_null_manifest_when_evictions_occur() + { + var context = new List + { + ToolCall("c1", "read_file"), + ToolResult("c1", new string('a', 1_000)), + ToolCall("c2", "shell_run"), + ToolResult("c2", new string('b', 1_000)), + }; + + var (_, manifest) = ToolResultWindowTrimmer.ApplyWithManifest(context, Budget(300, window: 1)); + + Assert.NotNull(manifest); + } + + [Fact] + public void ApplyWithManifest_manifest_lists_superseded_call() + { + var context = new List + { + ToolCall("c1", "read_file"), + ToolResult("c1", new string('a', 1_000)), + ToolCall("c2", "shell_run"), + ToolResult("c2", new string('b', 1_000)), + }; + + var (_, manifest) = ToolResultWindowTrimmer.ApplyWithManifest(context, Budget(300, window: 1)); + + Assert.Contains("Superseded", manifest); + Assert.Contains("read_file", manifest); + } + + [Fact] + public void ApplyWithManifest_manifest_lists_active_call() + { + var context = new List + { + ToolCall("c1", "read_file"), + ToolResult("c1", new string('a', 1_000)), + ToolCall("c2", "shell_run"), + ToolResult("c2", new string('b', 1_000)), + }; + + var (_, manifest) = ToolResultWindowTrimmer.ApplyWithManifest(context, Budget(300, window: 1)); + + Assert.Contains("Active tool results", manifest); + Assert.Contains("shell_run", manifest); + } + + // ── Label formatting ────────────────────────────────────────────────────── + + [Fact] + public void ApplyWithManifest_formats_label_with_path_argument() + { + var context = new List + { + ToolCall("c1", "read_file", new() { ["path"] = "src/Foo.cs" }), + ToolResult("c1", new string('a', 1_000)), + ToolCall("c2", "read_file"), + ToolResult("c2", new string('b', 200)), + }; + + var (_, manifest) = ToolResultWindowTrimmer.ApplyWithManifest(context, Budget(100, window: 1)); + + Assert.Contains("read_file(src/Foo.cs)", manifest); + } + + [Fact] + public void ApplyWithManifest_formats_label_with_command_argument() + { + var context = new List + { + ToolCall("c1", "shell_run", new() { ["command"] = "dotnet build" }), + ToolResult("c1", new string('a', 1_000)), + ToolCall("c2", "shell_run"), + ToolResult("c2", new string('b', 200)), + }; + + var (_, manifest) = ToolResultWindowTrimmer.ApplyWithManifest(context, Budget(100, window: 1)); + + Assert.Contains("shell_run(dotnet build)", manifest); + } + + [Fact] + public void ApplyWithManifest_truncates_long_argument_in_label() + { + var longPath = new string('z', 80); + var context = new List + { + ToolCall("c1", "read_file", new() { ["path"] = longPath }), + ToolResult("c1", new string('a', 1_000)), + ToolCall("c2", "read_file"), + ToolResult("c2", new string('b', 200)), + }; + + var (_, manifest) = ToolResultWindowTrimmer.ApplyWithManifest(context, Budget(100, window: 1)); + + Assert.NotNull(manifest); + // Label must be truncated — the full 80-char path should not appear verbatim + Assert.DoesNotContain(longPath, manifest); + Assert.Contains("read_file(", manifest); + Assert.Contains("…", manifest); + } + + [Fact] + public void ApplyWithManifest_falls_back_to_call_id_when_no_matching_call_in_context() + { + // ToolResult with no preceding ToolCall in this slice — fallback to callId. + var context = new List + { + ToolResult("orphan-call", new string('a', 1_000)), + ToolCall("c2", "shell_run"), + ToolResult("c2", new string('b', 200)), + }; + + var (_, manifest) = ToolResultWindowTrimmer.ApplyWithManifest(context, Budget(100, window: 1)); + + Assert.NotNull(manifest); + Assert.Contains("orphan-call", manifest); + } +} From 1ab416e00d9eb7760d10ef434e5d5980c642aa17 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Wed, 10 Jun 2026 11:09:07 -0500 Subject: [PATCH 03/24] refactor(context): unify Apply/ApplyWithManifest via shared ApplyCore - ApplyWithManifest previously called Apply then rebuilt callLabels with a second scan of the original context; ApplyCore does one pass and returns the map + evicted flag, eliminating the redundant work - extract task-reminder thresholds into named constants in ContextAssembler to make the intent readable without inline comments - add tests for the all-evicted manifest path and the disabled-budget same-reference fast path --- src/Orchestration/ContextAssembler.cs | 14 +- src/Orchestration/ToolResultWindowTrimmer.cs | 141 +++++++++--------- .../ToolResultWindowTrimmerTests.cs | 37 +++++ 3 files changed, 114 insertions(+), 78 deletions(-) diff --git a/src/Orchestration/ContextAssembler.cs b/src/Orchestration/ContextAssembler.cs index be74ced..0eb3236 100644 --- a/src/Orchestration/ContextAssembler.cs +++ b/src/Orchestration/ContextAssembler.cs @@ -43,6 +43,10 @@ public sealed class ContextAssembler // contains more text, but still bounded so 4 verbose turns don't silently cost 80k chars. private const int DefaultMaxCharsOwnHistory = 8_000; + private const int TaskReminderMinContextChars = 2_000; + private const int TaskReminderMinTaskLength = 50; + private const int TaskReminderPreviewChars = 200; + private static readonly JsonSerializerOptions JsonOpts = new() { PropertyNameCaseInsensitive = true, @@ -202,15 +206,11 @@ public async Task> AssembleForAgentAsync( var pendingCorrections = ExtractPendingCorrections(agentName, sharedHistory); result.AddRange(pendingCorrections); - // 5. Task reminder — sandwich the objective at both ends of a non-trivial context. - // The task is already at position 0 (primacy effect); repeating a brief version at the - // very end exploits the recency effect so the agent's goal stays visible after a long - // assembled context block. Only injected when there is enough content between the two - // endpoints to make the reminder worthwhile. + // 5. Repeat task at recency end — exploits primacy+recency sandwich for long contexts. int charsAfterTask = result.Skip(1).Sum(m => m.Text?.Length ?? 0); - if (task.Length > 50 && charsAfterTask > 2_000) + if (task.Length > TaskReminderMinTaskLength && charsAfterTask > TaskReminderMinContextChars) { - var preview = task.Length > 200 ? task[..200] + "…" : task; + var preview = task.Length > TaskReminderPreviewChars ? task[..TaskReminderPreviewChars] + "…" : task; result.Add(new ChatMessage(ChatRole.User, $"[Task Reminder]\n\n{preview}")); } diff --git a/src/Orchestration/ToolResultWindowTrimmer.cs b/src/Orchestration/ToolResultWindowTrimmer.cs index fb83232..a990c03 100644 --- a/src/Orchestration/ToolResultWindowTrimmer.cs +++ b/src/Orchestration/ToolResultWindowTrimmer.cs @@ -32,6 +32,8 @@ public static class ToolResultWindowTrimmer internal const string TombstonePrefix = "[tool result — evicted"; + private static readonly string[] s_labelKeys = ["path", "command", "query", "content", "name"]; + /// /// Returns a new list with old tool results tombstoned when the budget is exceeded, /// or returns unchanged when trimming is not needed. @@ -44,7 +46,70 @@ public static class ToolResultWindowTrimmer /// public static IList Apply(IList context, ContextBudgetConfig budget) { - if (budget.MaxToolResultTokens <= 0) return context; + var (trimmed, _, _) = ApplyCore(context, budget); + return trimmed; + } + + /// + /// Applies the tool-result window budget and returns a context manifest alongside + /// the trimmed message list. The manifest is non-null only when evictions occurred; + /// it lists active tool results and superseded (evicted) ones so the model knows + /// which reads are still available and which must be re-issued with targeted ranges. + /// + public static (IList Messages, string? Manifest) ApplyWithManifest( + IList context, + ContextBudgetConfig budget) + { + var (trimmed, callLabels, evicted) = ApplyCore(context, budget); + if (!evicted) return (trimmed, null); + + var active = new List(); + var superseded = new List(); + + foreach (var msg in trimmed) + { + foreach (var fr in msg.Contents.OfType()) + { + var callId = fr.CallId ?? "unknown"; + var label = callLabels.GetValueOrDefault(callId, callId); + var result = fr.Result?.ToString() ?? ""; + + if (result.StartsWith(TombstonePrefix, StringComparison.Ordinal)) + superseded.Add(label); + else + active.Add(label); + } + } + + if (active.Count == 0 && superseded.Count == 0) return (trimmed, null); + + var sb = new StringBuilder(); + sb.AppendLine("[Context Manifest]"); + + if (active.Count > 0) + { + sb.AppendLine(); + sb.AppendLine($"Active tool results ({active.Count}):"); + foreach (var a in active) sb.AppendLine($"- {a}"); + } + + if (superseded.Count > 0) + { + sb.AppendLine(); + sb.AppendLine($"Superseded ({superseded.Count}) — evicted from context. Re-read with targeted ranges if needed:"); + foreach (var s in superseded) sb.AppendLine($"- {s}"); + } + + return (trimmed, sb.ToString().TrimEnd()); + } + + // Returns (trimmed list, callLabels map, evicted flag). + // When evicted is false, trimmed is the same reference as context and callLabels holds + // the map built during the scan (useful to ApplyWithManifest without a second pass). + private static (IList Trimmed, Dictionary CallLabels, bool Evicted) + ApplyCore(IList context, ContextBudgetConfig budget) + { + if (budget.MaxToolResultTokens <= 0) return (context, [], false); // Pass 1: collect budget info and build callId → label map for enriched tombstones. var resultMessages = new List<(int MsgIdx, int EstTokens)>(); @@ -71,13 +136,13 @@ public static IList Apply(IList context, ContextBudget } // Fast path — nothing to trim. - if (totalEstTokens <= budget.MaxToolResultTokens) return context; + if (totalEstTokens <= budget.MaxToolResultTokens) return (context, callLabels, false); // Determine how many of the oldest results to evict. // Always keep at least the last InTurnToolWindow results verbatim. int retainCount = Math.Max(0, budget.InTurnToolWindow); int evictUpTo = Math.Max(0, resultMessages.Count - retainCount); - if (evictUpTo == 0) return context; + if (evictUpTo == 0) return (context, callLabels, false); var evictIndices = new HashSet( resultMessages.Take(evictUpTo).Select(r => r.MsgIdx)); @@ -123,73 +188,7 @@ public static IList Apply(IList context, ContextBudget } } - return trimmed; - } - - /// - /// Applies the tool-result window budget and returns a context manifest alongside - /// the trimmed message list. The manifest is non-null only when evictions occurred; - /// it lists active tool results and superseded (evicted) ones so the model knows - /// which reads are still available and which must be re-issued with targeted ranges. - /// - public static (IList Messages, string? Manifest) ApplyWithManifest( - IList context, - ContextBudgetConfig budget) - { - var trimmed = Apply(context, budget); - - // Apply returned the same reference — nothing was evicted, no manifest needed. - if (ReferenceEquals(trimmed, context)) return (trimmed, null); - - // Build callId → label from the ORIGINAL context before eviction. - var callLabels = new Dictionary(StringComparer.OrdinalIgnoreCase); - foreach (var msg in context) - { - foreach (var call in msg.Contents.OfType()) - { - if (call.CallId is not null) - callLabels[call.CallId] = FormatCallLabel(call); - } - } - - var active = new List(); - var superseded = new List(); - - foreach (var msg in trimmed) - { - foreach (var fr in msg.Contents.OfType()) - { - var callId = fr.CallId ?? "unknown"; - var label = callLabels.GetValueOrDefault(callId, callId); - var result = fr.Result?.ToString() ?? ""; - - if (result.StartsWith(TombstonePrefix, StringComparison.Ordinal)) - superseded.Add(label); - else - active.Add(label); - } - } - - if (active.Count == 0 && superseded.Count == 0) return (trimmed, null); - - var sb = new StringBuilder(); - sb.AppendLine("[Context Manifest]"); - - if (active.Count > 0) - { - sb.AppendLine(); - sb.AppendLine($"Active tool results ({active.Count}):"); - foreach (var a in active) sb.AppendLine($"- {a}"); - } - - if (superseded.Count > 0) - { - sb.AppendLine(); - sb.AppendLine($"Superseded ({superseded.Count}) — evicted from context. Re-read with targeted ranges if needed:"); - foreach (var s in superseded) sb.AppendLine($"- {s}"); - } - - return (trimmed, sb.ToString().TrimEnd()); + return (trimmed, callLabels, true); } private static string FormatCallLabel(FunctionCallContent call) @@ -197,7 +196,7 @@ private static string FormatCallLabel(FunctionCallContent call) var name = call.Name ?? "tool"; if (call.Arguments is null || call.Arguments.Count == 0) return name; - foreach (var key in new[] { "path", "command", "query", "content", "name" }) + foreach (var key in s_labelKeys) { if (call.Arguments.TryGetValue(key, out var val) && val is string s) return $"{name}({(s.Length > 50 ? s[..50] + "…" : s)})"; diff --git a/tests/FuseraftCli.Tests/ToolResultWindowTrimmerTests.cs b/tests/FuseraftCli.Tests/ToolResultWindowTrimmerTests.cs index a5b16d8..904b471 100644 --- a/tests/FuseraftCli.Tests/ToolResultWindowTrimmerTests.cs +++ b/tests/FuseraftCli.Tests/ToolResultWindowTrimmerTests.cs @@ -314,4 +314,41 @@ public void ApplyWithManifest_falls_back_to_call_id_when_no_matching_call_in_con Assert.NotNull(manifest); Assert.Contains("orphan-call", manifest); } + + // ── ApplyWithManifest — all results evicted (window = 0) ────────────────── + + [Fact] + public void ApplyWithManifest_manifest_with_all_results_evicted_shows_only_superseded() + { + // window = 0 retains nothing — every result is evicted once budget is exceeded. + var context = new List + { + ToolCall("c1", "read_file"), + ToolResult("c1", new string('a', 1_000)), + ToolCall("c2", "shell_run"), + ToolResult("c2", new string('b', 1_000)), + }; + + var (_, manifest) = ToolResultWindowTrimmer.ApplyWithManifest(context, Budget(100, window: 0)); + + Assert.NotNull(manifest); + Assert.Contains("Superseded", manifest); + Assert.DoesNotContain("Active tool results", manifest); + } + + // ── Apply — returns same reference when budget disabled ─────────────────── + + [Fact] + public void Apply_returns_same_reference_when_budget_disabled() + { + var context = new List + { + ToolCall("c1", "read_file"), + ToolResult("c1", new string('x', 10_000)), + }; + + var result = ToolResultWindowTrimmer.Apply(context, Budget(0)); + + Assert.Same(context, result); + } } From a74d0ccaf7c75fc383c68fc76db7481b2112f686 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Wed, 10 Jun 2026 23:18:54 -0500 Subject: [PATCH 04/24] docs: add playwright-mcp example --- config/examples/playwright-mcp.yaml | 61 +++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 config/examples/playwright-mcp.yaml diff --git a/config/examples/playwright-mcp.yaml b/config/examples/playwright-mcp.yaml new file mode 100644 index 0000000..e409812 --- /dev/null +++ b/config/examples/playwright-mcp.yaml @@ -0,0 +1,61 @@ +## Playwright MCP example: a single browser-automation agent backed by the Playwright MCP server. +## Prerequisites: +## 1. Install the correct Chromium build for the MCP server's playwright-core version: +## node $(npx --yes @playwright/mcp@latest node -e "process.exit(0)" 2>/dev/null; \ +## find ~/.npm/_npx -name "cli.js" -path "*/playwright-core/*" | head -1) install chromium +## Or more simply, find the cli.js path and run: node install chromium +## 2. Ensure XAI_API_KEY is set in your environment. +## Run: fuseraft run --config config/examples/orchestration.yaml "Navigate to https://example.com and take a screenshot" +## Validate: fuseraft validate config/examples/orchestration.yaml + +Orchestration: + Name: PlaywrightExample + Description: >- + Single-agent setup that drives a browser via the Playwright MCP server. + The agent can navigate pages, click elements, fill forms, and capture screenshots. + + McpServers: + - Name: playwright + Transport: stdio + Command: npx + Args: + - "@playwright/mcp@latest" + - "--browser" + - "chromium" # must match the browser installed via playwright-core's cli.js + + Agents: + - Name: BrowserAgent + Description: Automates browser interactions using Playwright tools. + Instructions: | + You are a browser automation agent with access to Playwright tools. + + Use the playwright MCP tools to complete the requested task: + - Navigate to URLs with browser_navigate + - Click elements with browser_click + - Fill forms with browser_fill + - Take screenshots with browser_screenshot + - Read page content with browser_snapshot + + Be concise. Report what you did and what you observed. + Model: + ModelId: grok-4.3 + Endpoint: https://api.x.ai/v1 + ApiKeyEnvVar: XAI_API_KEY + MaxTokens: 4096 + Plugins: + - playwright + + Selection: + Type: roundrobin + + Termination: + Type: composite + MaxIterations: 10 + Strategies: + - Type: regex + Pattern: "(?i)\\bdone\\b" + AgentNames: + - BrowserAgent + + Events: + Path: .fuseraft/events.jsonl From 668d4b31a69e438b2542a549bea53eff2c7eb516 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Wed, 10 Jun 2026 23:19:55 -0500 Subject: [PATCH 05/24] docs(agents): document context shaping and tool-result trimming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Task Reminder and Context Manifest are non-obvious behavioral features that affect how agents perceive their own context; worth calling out so contributors know to preserve the primacy+recency sandwich invariant - Tombstone format changed (now includes label + preview) — example in the doc prevents stale assumptions about the old single-line format - Add ContextAssembler and ToolResultWindowTrimmer to Where-to-look table --- AGENTS.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index d20d029..e626c79 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -154,6 +154,23 @@ Validators must not call LLMs or external services. Violations collapse the dete --- +## Context shaping + +Two mechanisms reduce lost-in-the-middle effects for long agent contexts: + +**Task Reminder** (`ContextAssembler`): When the assembled context exceeds 2 000 characters and the task string is longer than 50 characters, `ContextAssembler.AssembleAsync` appends a `[Task Reminder]` `ChatRole.User` message (up to 200 chars of the task) at the recency end of the context list. This exploits the primacy+recency sandwich — the task appears both at the top (system prompt) and at the bottom (reminder). + +**Context Manifest** (`ToolResultWindowTrimmer` + `AgentOrchestrator`): When `MaxToolResultTokens` is exceeded, `ToolResultWindowTrimmer.ApplyWithManifest` tombstones old results and returns a manifest string listing active vs. superseded tool results. `AgentOrchestrator` appends this manifest as a final `ChatRole.User` message so the agent knows which reads are still in context and which must be re-issued with targeted ranges. + +Tombstones now include the evicted tool's name, a key argument label, and up to 300 characters of the original content as a preview: +``` +[tool result — evicted: read_file(src/Foo.cs). Preview: "using System;…". Re-read with targeted ranges if needed.] +``` + +`ToolResultWindowTrimmer.Apply` is still the zero-manifest entry point used by callers that don't need the manifest. Both delegate to the private `ApplyCore`. + +--- + ## Shared history invariant The system maintains two views of history: @@ -236,5 +253,7 @@ When adding a new `FailureAction` or `FailureType` value, update: | How does AgentFile loading work? | `src/Cli/OrchestratorBuilder.cs` → `ResolveAgentFiles` | | How does compaction work? | `src/Orchestration/ConversationCompactor.cs` | | How does change tracking work? | `src/Orchestration/ChangeTracker.cs` | +| How is agent context assembled? | `src/Orchestration/ContextAssembler.cs` | +| How are tool results trimmed / tombstoned? | `src/Orchestration/ToolResultWindowTrimmer.cs` | | Full architecture decisions | `docs/design.md` | | Hardening configs against hallucination | `docs/harness-engineering.md` | From 04cda0140ef337506c086455da63c76cc38e787a Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Wed, 10 Jun 2026 23:21:26 -0500 Subject: [PATCH 06/24] docs(context): update tombstone format and add task reminder to pipeline - Tombstone example was the old one-liner; now shows the enriched format with tool label and content preview that ships in this branch - Context Manifest (appended when evictions occur) was not documented - Task Reminder step was missing from both pipeline overview and diagram --- docs/context-management.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/context-management.md b/docs/context-management.md index 3548d72..02f0a43 100644 --- a/docs/context-management.md +++ b/docs/context-management.md @@ -18,6 +18,7 @@ Each agent turn — ContextAssemblyPipeline (always on) └─ Context window filter → per-agent history slice (ContextWindow config) └─ Session context injection → session summary prepended (if present) └─ Artifact offloading → tool results > 40k chars stored to disk; stub replaces inline (always on) + └─ Task Reminder → task repeated at recency end when context > 2 000 chars (primacy+recency sandwich) History too long └─ Compaction → replace old turns with a summary + tool-call trace @@ -582,12 +583,14 @@ ContextBudget: InTurnToolWindow: 20 # always retain at least the last 20 results verbatim ``` -When the cumulative estimated token cost of all tool-result messages in the context slice exceeds `MaxToolResultTokens`, the oldest results beyond the last `InTurnToolWindow` are replaced with one-line tombstones of the form: +When the cumulative estimated token cost of all tool-result messages in the context slice exceeds `MaxToolResultTokens`, the oldest results beyond the last `InTurnToolWindow` are replaced with enriched tombstones that include the tool name, a key argument label, and up to 300 characters of the original content as a preview: ``` -[tool result — evicted after tool window exceeded] +[tool result — evicted: read_file(src/LargeService.cs). Preview: "using System;…". Re-read with targeted ranges if needed.] ``` +When evictions occur, a `[Context Manifest]` message is also appended at the end of the context slice listing active tool results still in context alongside the superseded (evicted) ones, so the agent knows which reads are still available and which must be re-issued with targeted ranges. + **Key difference from `MaxInTurnToolPairs`:** `MaxInTurnToolPairs` is an agent-level count-based cap applied unconditionally before every inner LLM call. `MaxToolResultTokens` is a session-level token-budget cap applied at the `ContextBudget` layer — it only fires when the total tool-result token footprint actually exceeds the threshold, preserving full context for turns with few or small results. **Audit trail:** the full tool results remain in the shared conversation history and on-disk artifacts. Only the slice passed to the model is trimmed — compaction and session replay are unaffected. @@ -743,12 +746,13 @@ Here is the full sequence from session start through a long-running session: │ └─ SanitizeToolPairs — strip orphaned assistant tool-call frames (strict providers) ├─ Session context injection → context_summary.md prepended when present ├─ Knowledge artifact appended as [Pipeline Knowledge] user message + ├─ Task Reminder appended when context > 2 000 chars — primacy+recency sandwich reduces lost-in-the-middle drift └─ Assembled context → sent to LLM ├─ Session read cache — read_file returns hint instead of full content if file unchanged since last read/write this session ├─ Tool-result artifact offloading — results > 40k chars stored to disk; stub replaces inline content ├─ MaxInTurnToolPairs — sliding window: keep only last N tool pairs per inner call ├─ MaxInTurnContextTokens — budget-reactive: trim oldest pairs when over budget - ├─ MaxToolResultTokens / InTurnToolWindow — tombstone oldest tool results beyond token budget + ├─ MaxToolResultTokens / InTurnToolWindow — tombstone oldest results with label+preview; append [Context Manifest] when evictions occur └─ On context/413 error → adaptive trim retry (up to 3 stages) Post-turn From 20850c4d4f0b949777646983dcbb9c05a3142688 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Fri, 12 Jun 2026 08:27:09 -0500 Subject: [PATCH 07/24] fix(context): correct session placement, drain bug, and knowledge dedup - Session context was injected at position 1, burying it mid-history where models attend least; moved to recency boundary so it is read last - TrimToWindow could drain the final message pair: the assistant removal only checked start < list.Count, not start + 1 < list.Count, allowing both messages to be removed in a single loop iteration - Pipeline Knowledge was re-appended every turn regardless of whether identical content already existed in history, compounding context growth --- src/Orchestration/ContextAssemblyPipeline.cs | 20 +++++++++++++------- src/Orchestration/ConversationCompactor.cs | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/Orchestration/ContextAssemblyPipeline.cs b/src/Orchestration/ContextAssemblyPipeline.cs index 1df216d..db6ac0d 100644 --- a/src/Orchestration/ContextAssemblyPipeline.cs +++ b/src/Orchestration/ContextAssemblyPipeline.cs @@ -177,9 +177,16 @@ public async Task AssembleAsync( if (!hasExplicitBroker) { - knowledgeChars = knowledgeArtifact.Content.Length; - finalMessages.Add(new ChatMessage(ChatRole.User, - $"[Pipeline Knowledge]\n\n{knowledgeArtifact.Content}")); + var knowledgeText = $"[Pipeline Knowledge]\n\n{knowledgeArtifact.Content}"; + bool alreadyPresent = baseMessages.Any(m => + m.Role == ChatRole.User && + string.Equals(m.Text, knowledgeText, StringComparison.Ordinal)); + + if (!alreadyPresent) + { + knowledgeChars = knowledgeArtifact.Content.Length; + finalMessages.Add(new ChatMessage(ChatRole.User, knowledgeText)); + } } } @@ -330,8 +337,8 @@ private static string FormatKnowledgeBlock(IReadOnlyList items) return sb.ToString().TrimEnd(); } - // Injects the session context file content at position 1 (after the first history - // message) so the agent reads the current session state early in its context. + // Appends the session context file content after all history messages so it sits + // at the recency boundary, where models pay the most attention. private static IReadOnlyList BuildDefaultMessages( IReadOnlyList filtered, string? sessionCtx) @@ -339,9 +346,8 @@ private static IReadOnlyList BuildDefaultMessages( if (sessionCtx is null) return filtered; var result = new List(filtered.Count + 1); - if (filtered.Count > 0) result.Add(filtered[0]); + result.AddRange(filtered); result.Add(new ChatMessage(ChatRole.User, $"[Session Context]\n\n{sessionCtx.Trim()}")); - result.AddRange(filtered.Skip(1)); return result; } } diff --git a/src/Orchestration/ConversationCompactor.cs b/src/Orchestration/ConversationCompactor.cs index f6e71bf..297cbe1 100644 --- a/src/Orchestration/ConversationCompactor.cs +++ b/src/Orchestration/ConversationCompactor.cs @@ -125,7 +125,7 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess total -= (list[start].Content?.Length ?? 0) / 4; list.RemoveAt(start); } - if (start < list.Count && list[start].Role == "assistant") + if (start + 1 < list.Count && list[start].Role == "assistant") { total -= (list[start].Content?.Length ?? 0) / 4; list.RemoveAt(start); From 8c245b1ac055991762ed52145837f2cb7823dc0a Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Fri, 12 Jun 2026 08:40:00 -0500 Subject: [PATCH 08/24] fix(cli): resolve Windows ambiguous reference for Context commands - Roslyn on Windows sees two resolution paths for ContextAddCommand when both fuseraft.Cli.Commands and fuseraft.Cli.Commands.Context are imported, triggering an ambiguous reference error; type aliases make each name resolve to exactly one type on all platforms --- src/Program.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Program.cs b/src/Program.cs index f733131..689e8ab 100644 --- a/src/Program.cs +++ b/src/Program.cs @@ -10,7 +10,9 @@ using fuseraft.Cli; using fuseraft.Cli.Commands; using fuseraft.Cli.Display; -using fuseraft.Cli.Commands.Context; +using ContextAddCommand = fuseraft.Cli.Commands.Context.ContextAddCommand; +using ContextListCommand = fuseraft.Cli.Commands.Context.ContextListCommand; +using ContextRemoveCommand = fuseraft.Cli.Commands.Context.ContextRemoveCommand; using fuseraft.Cli.Commands.Log; using fuseraft.Cli.Commands.Repl; using fuseraft.Cli.Commands.Schedule; From 4fff73bea4484d60c44fd8c69cba411935ce2139 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Fri, 12 Jun 2026 20:55:25 -0500 Subject: [PATCH 09/24] docs: update AGENTS.md --- AGENTS.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index e626c79..9030ff2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -7,10 +7,10 @@ Guide for AI coding assistants working in this repository. Read this before maki ## Build and test ```bash +./build.sh # full build + test + bin output (Linux/macOS) +.\build.ps1 # full build + test + bin output (Windows) dotnet build # build only -dotnet test # build + run all tests (323 tests, ~1s) -./build.sh # full build + bin output (Linux/macOS) -.\build.ps1 # full build + bin output (Windows) +dotnet test # build + run all tests (681 tests, ~1s) ``` All tests must pass before committing. There are no integration tests that require a live LLM — everything is unit-testable with fakes. @@ -253,7 +253,7 @@ When adding a new `FailureAction` or `FailureType` value, update: | How does AgentFile loading work? | `src/Cli/OrchestratorBuilder.cs` → `ResolveAgentFiles` | | How does compaction work? | `src/Orchestration/ConversationCompactor.cs` | | How does change tracking work? | `src/Orchestration/ChangeTracker.cs` | -| How is agent context assembled? | `src/Orchestration/ContextAssembler.cs` | +| How is agent context assembled? | `src/Orchestration/ContextAssemblyPipeline.cs` (main entry point, stages 1–6); `src/Orchestration/ContextAssembler.cs` (per-agent assembled contexts) | | How are tool results trimmed / tombstoned? | `src/Orchestration/ToolResultWindowTrimmer.cs` | | Full architecture decisions | `docs/design.md` | | Hardening configs against hallucination | `docs/harness-engineering.md` | From b97d915727014dedfcdc67cc6e129afd5a350c59 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Fri, 12 Jun 2026 21:31:15 -0500 Subject: [PATCH 10/24] feat(viz): add per-turn token bar chart and events.jsonl enrichment - Bar chart at top of ctx_viz.html shows input and output tokens per turn with toggle buttons to show/hide each dataset independently - Events.jsonl is now loaded alongside ctx_snapshots.jsonl; validation_fail and tool_blocked events render as annotated vertical lines on both charts - Context assembly details (chars, tool count, assembly time) from events.jsonl surface in bar and line chart tooltips when present - Only four event types are loaded (turn_end, validation_fail, tool_blocked, context_assembly) to keep embedded HTML size reasonable --- src/Cli/Commands/RunCommand.cs | 5 +- src/Cli/Display/ContextWindowRenderer.cs | 317 ++++++++++++++++++++--- 2 files changed, 283 insertions(+), 39 deletions(-) diff --git a/src/Cli/Commands/RunCommand.cs b/src/Cli/Commands/RunCommand.cs index c97f1fb..079a65d 100644 --- a/src/Cli/Commands/RunCommand.cs +++ b/src/Cli/Commands/RunCommand.cs @@ -537,8 +537,9 @@ protected override async Task ExecuteAsync(CommandContext context, RunSetti } // Context window visualization — render after the run so all snapshot data is flushed. - var ctxVizPath = fuseraft.Core.FuseraftPaths.ExpandSessionId(fuseraft.Core.FuseraftPaths.LocalCtxViz, checkpoint.SessionId); - if (await fuseraft.Cli.Display.ContextWindowRenderer.RenderAsync(ctxSnapshotsPath, ctxVizPath, checkpoint.SessionId)) + var ctxVizPath = fuseraft.Core.FuseraftPaths.ExpandSessionId(fuseraft.Core.FuseraftPaths.LocalCtxViz, checkpoint.SessionId); + var ctxEventsPath = Path.Combine(Path.GetDirectoryName(ctxSnapshotsPath)!, "events.jsonl"); + if (await fuseraft.Cli.Display.ContextWindowRenderer.RenderAsync(ctxSnapshotsPath, ctxVizPath, checkpoint.SessionId, ctxEventsPath)) AnsiConsole.MarkupLine($"[dim]Context viz → {Markup.Escape(ctxVizPath)}[/]"); // Summary diff --git a/src/Cli/Display/ContextWindowRenderer.cs b/src/Cli/Display/ContextWindowRenderer.cs index 0b34379..3acfdb4 100644 --- a/src/Cli/Display/ContextWindowRenderer.cs +++ b/src/Cli/Display/ContextWindowRenderer.cs @@ -4,9 +4,10 @@ namespace fuseraft.Cli.Display; /// -/// Reads a context-window snapshot JSONL file produced by -/// and writes a self-contained -/// Chart.js HTML file showing cumulative input token growth per agent over time. +/// Reads context-window snapshot and event JSONL files and writes a self-contained +/// Chart.js HTML file with a per-turn token bar chart (top) and a cumulative input +/// token line chart (bottom). Event annotations (validation_fail, tool_blocked) are +/// overlaid on both charts when an events file is present. /// public static class ContextWindowRenderer { @@ -16,23 +17,29 @@ public static class ContextWindowRenderer PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower, }; + private static readonly HashSet UsefulEventTypes = new(StringComparer.OrdinalIgnoreCase) + { + "turn_end", "validation_fail", "tool_blocked", "context_assembly", + }; + /// - /// Reads , filters to , - /// and writes a Chart.js HTML visualization to . - /// Returns true if the file was written, false when there are no snapshots. - /// Never throws. + /// Reads (and optionally ), + /// filters to , and writes a Chart.js HTML visualization + /// to . Returns true if the file was written. /// public static async Task RenderAsync( - string snapshotsPath, - string outputPath, - string sessionId) + string snapshotsPath, + string outputPath, + string sessionId, + string? eventsPath = null) { try { var snapshots = await LoadSnapshotsAsync(snapshotsPath, sessionId); if (snapshots.Count == 0) return false; - var html = BuildHtml(snapshots, sessionId); + var events = await LoadEventsAsync(eventsPath, sessionId); + var html = BuildHtml(snapshots, events, sessionId); var dir = Path.GetDirectoryName(outputPath); if (!string.IsNullOrEmpty(dir)) Directory.CreateDirectory(dir); @@ -62,16 +69,44 @@ private static async Task> LoadSnapshotsAsync(string path, string return result; } - private static string BuildHtml(List snapshots, string sessionId) + private static async Task> LoadEventsAsync(string? path, string sessionId) + { + if (string.IsNullOrEmpty(path) || !File.Exists(path)) return []; + + var result = new List(); + foreach (var line in await File.ReadAllLinesAsync(path)) + { + if (string.IsNullOrWhiteSpace(line)) continue; + try + { + var e = JsonSerializer.Deserialize(line, JsonOpts); + if (e is not null + && string.Equals(e.Session, sessionId, StringComparison.OrdinalIgnoreCase) + && e.EventType is { } et + && UsefulEventTypes.Contains(et)) + result.Add(e); + } + catch { /* skip malformed lines */ } + } + return result; + } + + private static string BuildHtml(List snapshots, List events, string sessionId) { - // Snapshot data embedded as JSON (safe: values are numbers, bools, and ISO strings) var snapshotsJson = JsonSerializer.Serialize(snapshots, new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower, WriteIndented = false, }); - // Extract threshold values from the first snapshot that carries them + var eventsJson = events.Count > 0 + ? JsonSerializer.Serialize(events, new JsonSerializerOptions + { + PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower, + WriteIndented = false, + }) + : "[]"; + var warnAt = snapshots.FirstOrDefault(s => s.WarnAt is > 0)?.WarnAt ?? 0; var cutoverAt = snapshots.FirstOrDefault(s => s.CutoverAt is > 0)?.CutoverAt ?? 0; @@ -93,53 +128,252 @@ private static string BuildHtml(List snapshots, string sessionId) padding: 24px; min-height: 100vh; } - header { margin-bottom: 16px; } + header { margin-bottom: 20px; } header h1 { font-size: 15px; font-weight: 600; color: #e6edf3; } header p { font-size: 12px; color: #8b949e; margin-top: 4px; } + .section { margin-bottom: 20px; } + .section-header { + display: flex; + align-items: center; + gap: 12px; + margin-bottom: 10px; + } + .section-title { + font-size: 12px; + font-weight: 600; + color: #8b949e; + text-transform: uppercase; + letter-spacing: 0.05em; + } + .toggles { display: flex; gap: 6px; } + .toggle { + font-family: inherit; + font-size: 11px; + padding: 3px 10px; + border-radius: 4px; + border: 1px solid #30363d; + background: #161b22; + color: #8b949e; + cursor: pointer; + transition: background 0.15s, color 0.15s, border-color 0.15s; + } + .toggle.active { background: #21262d; color: #e6edf3; border-color: #484f58; } + .toggle:hover { background: #21262d; color: #e6edf3; } .chart-wrap { background: #161b22; border: 1px solid #21262d; border-radius: 6px; padding: 20px; - height: 520px; position: relative; } + .chart-wrap.bar-chart { height: 340px; } + .chart-wrap.line-chart { height: 520px; } footer { margin-top: 12px; font-size: 11px; color: #484f58; }

Context Window Visualization

-

Session {{sessionId}} — cumulative input tokens per agent over turns

+

Session {{sessionId}} — per-turn tokens and cumulative input token growth per agent

-
- + +
+
+ Per-Turn Tokens +
+ + +
+
+
+ +
+ +
+
+ Cumulative Input Tokens +
+
+ +
+
+
Generated by fuseraft-cli — compaction events shown as vertical markers. Requires internet for Chart.js CDN.
+