fuseraft · fuseraft · Jun 13, 2026 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -7,10 +7,10 @@ Guide for AI coding assistants working in this repository. Read this before maki
 ## Build and test
 
 ```bash
+./build.sh            # full build + test + bin output (Linux/macOS)
+.\build.ps1           # full build + test + bin output (Windows)
 dotnet build          # build only
-dotnet test           # build + run all tests (323 tests, ~1s)
-./build.sh            # full build + bin output (Linux/macOS)
-.\build.ps1           # full build + bin output (Windows)
+dotnet test           # build + run all tests (681 tests, ~1s)
 ```
 
 All tests must pass before committing. There are no integration tests that require a live LLM — everything is unit-testable with fakes.
@@ -154,6 +154,23 @@ Validators must not call LLMs or external services. Violations collapse the dete
 
 ---
 
+## Context shaping
+
+Two mechanisms reduce lost-in-the-middle effects for long agent contexts:
+
+**Task Reminder** (`ContextAssembler`): When the assembled context exceeds 2 000 characters and the task string is longer than 50 characters, `ContextAssembler.AssembleAsync` appends a `[Task Reminder]` `ChatRole.User` message (up to 200 chars of the task) at the recency end of the context list. This exploits the primacy+recency sandwich — the task appears both at the top (system prompt) and at the bottom (reminder).
+
+**Context Manifest** (`ToolResultWindowTrimmer` + `AgentOrchestrator`): When `MaxToolResultTokens` is exceeded, `ToolResultWindowTrimmer.ApplyWithManifest` tombstones old results and returns a manifest string listing active vs. superseded tool results. `AgentOrchestrator` appends this manifest as a final `ChatRole.User` message so the agent knows which reads are still in context and which must be re-issued with targeted ranges.
+
+Tombstones now include the evicted tool's name, a key argument label, and up to 300 characters of the original content as a preview:
+```
+[tool result — evicted: read_file(src/Foo.cs). Preview: "using System;…". Re-read with targeted ranges if needed.]
+```
+
+`ToolResultWindowTrimmer.Apply` is still the zero-manifest entry point used by callers that don't need the manifest. Both delegate to the private `ApplyCore`.
+
+---
+
 ## Shared history invariant
 
 The system maintains two views of history:
@@ -236,5 +253,7 @@ When adding a new `FailureAction` or `FailureType` value, update:
 | How does AgentFile loading work? | `src/Cli/OrchestratorBuilder.cs` → `ResolveAgentFiles` |
 | How does compaction work? | `src/Orchestration/ConversationCompactor.cs` |
 | How does change tracking work? | `src/Orchestration/ChangeTracker.cs` |
+| How is agent context assembled? | `src/Orchestration/ContextAssemblyPipeline.cs` (main entry point, stages 1–6); `src/Orchestration/ContextAssembler.cs` (per-agent assembled contexts) |
+| How are tool results trimmed / tombstoned? | `src/Orchestration/ToolResultWindowTrimmer.cs` |
 | Full architecture decisions | `docs/design.md` |
 | Hardening configs against hallucination | `docs/harness-engineering.md` |
diff --git a/config/examples/cross-system-flow-analyzer.yaml b/config/examples/cross-system-flow-analyzer.yaml
@@ -69,7 +69,7 @@ Orchestration:
 
   Models:
     heavy:
-      ModelId: claude-opus-4-7
+      ModelId: claude-opus-4-8
     scout:
       ModelId: claude-haiku-4-5-20251001
 
@@ -94,12 +94,15 @@ Orchestration:
     CutoverAt: 110000   # compact early; artifact handoffs eliminate the need for large live contexts
 
   FailureHandling:
-    RetryCount: 2
-    OnAgentFailure:
-      - checkpoint
-      - compact
-      - retry
-      - escalate
+    MissingEvidence:
+      Action: Reinstruct
+      Threshold: 3
+    ConflictingEvidence:
+      Action: Reinstruct
+      Threshold: 2
+    NoProgress:
+      Action: Abort
+      Threshold: 3
 
   Events:
     Path: .fuseraft/logs/events.jsonl

diff --git a/config/examples/playwright-mcp.yaml b/config/examples/playwright-mcp.yaml
@@ -0,0 +1,61 @@
+## Playwright MCP example: a single browser-automation agent backed by the Playwright MCP server.
+## Prerequisites:
+##   1. Install the correct Chromium build for the MCP server's playwright-core version:
+##        node $(npx --yes @playwright/mcp@latest node -e "process.exit(0)" 2>/dev/null; \
+##               find ~/.npm/_npx -name "cli.js" -path "*/playwright-core/*" | head -1) install chromium
+##      Or more simply, find the cli.js path and run: node <path> install chromium
+##   2. Ensure XAI_API_KEY is set in your environment.
+## Run:      fuseraft run --config config/examples/orchestration.yaml "Navigate to https://example.com and take a screenshot"
+## Validate: fuseraft validate config/examples/orchestration.yaml
+
+Orchestration:
+  Name: PlaywrightExample
+  Description: >-
+    Single-agent setup that drives a browser via the Playwright MCP server.
+    The agent can navigate pages, click elements, fill forms, and capture screenshots.
+
+  McpServers:
+    - Name: playwright
+      Transport: stdio
+      Command: npx
+      Args:
+        - "@playwright/mcp@latest"
+        - "--browser"
+        - "chromium"   # must match the browser installed via playwright-core's cli.js
+
+  Agents:
+    - Name: BrowserAgent
+      Description: Automates browser interactions using Playwright tools.
+      Instructions: |
+        You are a browser automation agent with access to Playwright tools.
+
+        Use the playwright MCP tools to complete the requested task:
+        - Navigate to URLs with browser_navigate
+        - Click elements with browser_click
+        - Fill forms with browser_fill
+        - Take screenshots with browser_screenshot
+        - Read page content with browser_snapshot
+
+        Be concise. Report what you did and what you observed.
+      Model:
+        ModelId: grok-4.3
+        Endpoint: https://api.x.ai/v1
+        ApiKeyEnvVar: XAI_API_KEY
+        MaxTokens: 4096
+      Plugins:
+        - playwright
+
+  Selection:
+    Type: roundrobin
+
+  Termination:
+    Type: composite
+    MaxIterations: 10
+    Strategies:
+      - Type: regex
+        Pattern: "(?i)\\bdone\\b"
+        AgentNames:
+          - BrowserAgent
+
+  Events:
+    Path: .fuseraft/events.jsonl
diff --git a/docs/context-management.md b/docs/context-management.md
@@ -18,6 +18,7 @@ Each agent turn — ContextAssemblyPipeline (always on)
   └─ Context window filter       → per-agent history slice (ContextWindow config)
   └─ Session context injection   → session summary prepended (if present)
   └─ Artifact offloading         → tool results > 40k chars stored to disk; stub replaces inline (always on)
+  └─ Task Reminder               → task repeated at recency end when context > 2 000 chars (primacy+recency sandwich)
 
 History too long
   └─ Compaction                  → replace old turns with a summary + tool-call trace
@@ -582,12 +583,14 @@ ContextBudget:
   InTurnToolWindow:    20      # always retain at least the last 20 results verbatim
 ```
 
-When the cumulative estimated token cost of all tool-result messages in the context slice exceeds `MaxToolResultTokens`, the oldest results beyond the last `InTurnToolWindow` are replaced with one-line tombstones of the form:
+When the cumulative estimated token cost of all tool-result messages in the context slice exceeds `MaxToolResultTokens`, the oldest results beyond the last `InTurnToolWindow` are replaced with enriched tombstones that include the tool name, a key argument label, and up to 300 characters of the original content as a preview:
 
 ```
-[tool result — evicted after tool window exceeded]
+[tool result — evicted: read_file(src/LargeService.cs). Preview: "using System;…". Re-read with targeted ranges if needed.]
 ```
 
+When evictions occur, a `[Context Manifest]` message is also appended at the end of the context slice listing active tool results still in context alongside the superseded (evicted) ones, so the agent knows which reads are still available and which must be re-issued with targeted ranges.
+
 **Key difference from `MaxInTurnToolPairs`:** `MaxInTurnToolPairs` is an agent-level count-based cap applied unconditionally before every inner LLM call. `MaxToolResultTokens` is a session-level token-budget cap applied at the `ContextBudget` layer — it only fires when the total tool-result token footprint actually exceeds the threshold, preserving full context for turns with few or small results.
 
 **Audit trail:** the full tool results remain in the shared conversation history and on-disk artifacts. Only the slice passed to the model is trimmed — compaction and session replay are unaffected.
@@ -743,12 +746,13 @@ Here is the full sequence from session start through a long-running session:
    │  └─ SanitizeToolPairs — strip orphaned assistant tool-call frames (strict providers)
    ├─ Session context injection → context_summary.md prepended when present
    ├─ Knowledge artifact appended as [Pipeline Knowledge] user message
+   ├─ Task Reminder appended when context > 2 000 chars — primacy+recency sandwich reduces lost-in-the-middle drift
    └─ Assembled context → sent to LLM
       ├─ Session read cache — read_file returns hint instead of full content if file unchanged since last read/write this session
       ├─ Tool-result artifact offloading — results > 40k chars stored to disk; stub replaces inline content
       ├─ MaxInTurnToolPairs — sliding window: keep only last N tool pairs per inner call
       ├─ MaxInTurnContextTokens — budget-reactive: trim oldest pairs when over budget
-      ├─ MaxToolResultTokens / InTurnToolWindow — tombstone oldest tool results beyond token budget
+      ├─ MaxToolResultTokens / InTurnToolWindow — tombstone oldest results with label+preview; append [Context Manifest] when evictions occur
       └─ On context/413 error → adaptive trim retry (up to 3 stages)
 
    Post-turn

diff --git a/docs/sessions.md b/docs/sessions.md
@@ -139,6 +139,97 @@ All REPL events are tagged with the session ID (`session` field in the JSONL), s
 
 ---
 
+**Orchestration event types** emitted to `events.jsonl` by `fuseraft run`:
+
+*Session / turn lifecycle*
+
+| Event type | When emitted |
+|------------|-------------|
+| `session_start` | Session begins |
+| `session_end` | Session completes successfully |
+| `session_error` | Unrecoverable session error |
+| `session_recovered` | Session resumed from a prior checkpoint |
+| `session_aborted` | Session stopped before completion |
+| `session_summary` | Post-run summary written |
+| `turn_start` | Agent turn begins |
+| `turn_end` | Agent turn completes |
+| `turn_timeout` | Agent turn exceeded its time limit |
+
+*Checkpointing / resume*
+
+| Event type | When emitted |
+|------------|-------------|
+| `checkpoint_created` | Seed checkpoint written for a new session |
+| `checkpoint_loaded` | Existing checkpoint loaded for a resume |
+| `resume_started` | Resumed session is about to begin streaming |
+| `resume_completed` | Resumed session ran to successful completion |
+| `event_replay_start` | Prior message history is being replayed as context |
+| `event_replay_complete` | Message history replay finished |
+| `event_corruption_detected` | A session file failed to deserialise — payload: `session`, `source`, `error` |
+
+*Agent execution*
+
+| Event type | When emitted |
+|------------|-------------|
+| `agent_start` | Individual agent begins its turn |
+| `agent_end` | Individual agent turn completes |
+| `agent_error` | Agent threw an unhandled error |
+| `agent_timeout` | Agent exceeded its time limit |
+| `agent_routed` | Routing selected the next agent |
+| `agent_blocked` | Agent declared an unrecoverable blocker |
+
+*Model invocation*
+
+| Event type | When emitted | Key payload fields |
+|------------|-------------|-------------------|
+| `model_call` | LLM HTTP request is about to be sent — payload: `model`, `attempt`, `message_count`, `call_seq` | correlates with `inner_call_context` via `call_seq` |
+| `model_response` | LLM response received — payload: `model`, `finish_reason`, `input_tokens`, `output_tokens`, `call_seq` | |
+| `model_error` | LLM call failed (non-timeout) — payload: `model`, `attempt`, `call_seq`, `error` | includes context-limit exhaustion |
+| `model_timeout` | LLM call or streaming response timed out — payload: `model`, `attempt`, `message` | |
+
+*Tool use*
+
+| Event type | When emitted |
+|------------|-------------|
+| `tool_call` | Tool invoked by an agent |
+| `tool_result` | Tool result returned |
+| `tool_blocked` | Tool call denied by governance |
+| `tool_error` | Tool threw an exception |
+| `tool_timeout` | Tool execution timed out |
+
+*Validation / governance*
+
+| Event type | When emitted |
+|------------|-------------|
+| `validation_fail` | Validator rejected an agent response |
+| `hitl_escalation` | Human-in-the-loop intervention required |
+| `hitl_approved` | HITL operator approved continuation |
+| `hitl_rejected` | HITL operator rejected continuation |
+| `circuit_breaker_open` | Circuit breaker tripped on consecutive LLM failures |
+| `retry_scheduled` | Retry attempt queued after a recoverable failure |
+| `retry_exhausted` | All retry attempts consumed |
+| `max_turns_exceeded` | Session hit the `MaxIterations` cap |
+| `termination_satisfied` | Termination condition met naturally |
+| `termination_forced` | Session forcibly stopped (budget, cap, etc.) |
+
+*Cancellation*
+
+| Event type | When emitted |
+|------------|-------------|
+| `cancellation_requested` | `OperationCanceledException` caught mid-turn (Ctrl+C during streaming) |
+| `cancellation_observed` | Cancellation token checked between turns and loop is stopping cleanly |
+
+*Compaction*
+
+| Event type | When emitted |
+|------------|-------------|
+| `compaction` | Compaction applied to reduce history size |
+| `compaction_resume_candidate` | Session paused to await resume after compaction |
+
+All orchestration events include `ts` (ISO 8601 timestamp), `session` (8-char hex ID), `agent`, and `turn` fields alongside the `event_type` and `payload`. Use `fuseraft log` to view them in a formatted table.
+
+---
+
 ## Orchestration sessions (`fuseraft run`)
 
 ## How sessions work

diff --git a/src/Cli/Commands/Eval/EvalCommand.cs b/src/Cli/Commands/Eval/EvalCommand.cs
@@ -247,7 +247,7 @@ internal static EvalCaseResult Score(EvalCase evalCase, SessionResult result, st
             failures.Add($"session did not succeed: {result.ErrorMessage ?? "unknown"}");
 
         var finalContent = result.Messages
-            .LastOrDefault(m => m.Role == "assistant")?.Content ?? string.Empty;
+            .LastOrDefault(m => m.Role == MessageRole.Assistant)?.Content ?? string.Empty;
 
         foreach (var kw in evalCase.ExpectKeywords)
             if (!finalContent.Contains(kw, StringComparison.OrdinalIgnoreCase))

diff --git a/src/Cli/Commands/Log/EventLogViewer.cs b/src/Cli/Commands/Log/EventLogViewer.cs
@@ -1,6 +1,7 @@
 using System.Text.Json;
 using System.Text.Json.Serialization;
 using Spectre.Console;
+using fuseraft.Orchestration;
 
 namespace fuseraft.Cli.Commands.Log;
 
@@ -123,18 +124,18 @@ internal static async Task<int> RenderAsync(
 
     private static string ColorizeEvent(string eventType) => eventType switch
     {
-        "session_start"             => "[cyan]session_start[/]",
-        "session_end"               => "[cyan]session_end[/]",
-        "session_error"             => "[red]session_error[/]",
-        "circuit_breaker_open"      => "[red]circuit_breaker_open[/]",
-        "tool_blocked"              => "[yellow]tool_blocked[/]",
-        "validation_fail"           => "[yellow]validation_fail[/]",
-        "hitl_escalation"           => "[yellow]hitl_escalation[/]",
-        "skill_curation_complete"   => "[green]skill_curation_complete[/]",
-        "skill_curation_start"      => "[dim]skill_curation_start[/]",
-        "turn_start" or "turn_end"  => $"[dim]{Markup.Escape(eventType)}[/]",
-        "command"                   => "[dim]command[/]",
-        _                           => Markup.Escape(eventType),
+        EventTypes.SessionStart                          => $"[cyan]{EventTypes.SessionStart}[/]",
+        EventTypes.SessionEnd                            => $"[cyan]{EventTypes.SessionEnd}[/]",
+        EventTypes.SessionError                          => $"[red]{EventTypes.SessionError}[/]",
+        EventTypes.CircuitBreakerOpen                    => $"[red]{EventTypes.CircuitBreakerOpen}[/]",
+        EventTypes.ToolBlocked                           => $"[yellow]{EventTypes.ToolBlocked}[/]",
+        EventTypes.ValidationFail                        => $"[yellow]{EventTypes.ValidationFail}[/]",
+        EventTypes.HitlEscalation                        => $"[yellow]{EventTypes.HitlEscalation}[/]",
+        EventTypes.SkillCurationComplete                 => $"[green]{EventTypes.SkillCurationComplete}[/]",
+        EventTypes.SkillCurationStart                    => $"[dim]{EventTypes.SkillCurationStart}[/]",
+        EventTypes.TurnStart or EventTypes.TurnEnd       => $"[dim]{Markup.Escape(eventType)}[/]",
+        EventTypes.Command                               => $"[dim]{EventTypes.Command}[/]",
+        _                                                => Markup.Escape(eventType),
     };
 
     private static string SummarizePayload(string? eventType, JsonElement? payload)
@@ -145,39 +146,39 @@ private static string SummarizePayload(string? eventType, JsonElement? payload)
         {
             return eventType switch
             {
-                "command" =>
+                EventTypes.Command =>
                     Get(p, "command") is { } cmd
                         ? $"[dim]{Markup.Escape(Truncate(cmd, 60))}[/]"
                         : string.Empty,
 
-                "skill_curation_complete" =>
+                EventTypes.SkillCurationComplete =>
                     (Get(p, "outcome"), Get(p, "slug")) is ({ } outcome, { } slug)
                         ? $"[dim]{Markup.Escape(outcome)}  {Markup.Escape(slug)}[/]"
                         : Get(p, "outcome") is { } o
                             ? $"[dim]{Markup.Escape(o)}[/]"
                             : string.Empty,
 
-                "session_error" =>
+                EventTypes.SessionError =>
                     Get(p, "error") is { } err
                         ? $"[dim red]{Markup.Escape(Truncate(err, 80))}[/]"
                         : string.Empty,
 
-                "tool_blocked" =>
+                EventTypes.ToolBlocked =>
                     Get(p, "tool") is { } tool
                         ? $"[dim]{Markup.Escape(tool)}[/]"
                         : string.Empty,
 
-                "validation_fail" =>
+                EventTypes.ValidationFail =>
                     Get(p, "validator") is { } v
                         ? $"[dim]{Markup.Escape(v)}[/]"
                         : string.Empty,
 
-                "session_start" =>
+                EventTypes.SessionStart =>
                     Get(p, "model") is { } model
                         ? $"[dim]{Markup.Escape(Truncate(model, 30))}[/]"
                         : string.Empty,
 
-                "turn_end" =>
+                EventTypes.TurnEnd =>
                     Get(p, "agent") is { } agent
                         ? $"[dim]{Markup.Escape(agent)}[/]"
                         : string.Empty,