diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ea1fd090..65cd35a31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `request_compaction` internal tool: agent-initiated context compaction on demand (ARC #4020). Config: `[memory.compression.arc]`. Rate-limited to one compaction per turn via `CompactionState`. +- Context-Adaptive Memory (CAM) Phase 1 MVP (`zeph-common`, `zeph-llm`, `zeph-context`, + `zeph-agent-context`, `zeph-config`): three-level fidelity scoring for context messages + (Full/Compressed/Placeholder) with heuristic `FidelityScorer` (temporal decay, role importance, + keyword overlap, plan-tool relevance), proactive AgeMem regrade trigger in `maybe_compact()`, + Placeholder exclusion from hard compaction summarizer input, `[context.fidelity]` config section + with `enabled = false` default (no behavioral change when disabled). Closes #4547. + - `zeph-skills`: `promoter` module — pure-logic helpers for `AutoSkill A6` heuristic promotion: `compute_batch_hash` (BLAKE3, order-independent), `build_promotion_prompt`, `parse_promotion_response`, `PromotionRecommendation` enum (`BodyEnrichment`, `NewSkill`, `None`). No async/DB/LLM dependencies. diff --git a/Cargo.lock b/Cargo.lock index d70962ab8..e1aed02bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10828,6 +10828,7 @@ name = "zeph-context" version = "0.21.2" dependencies = [ "blake3", + "criterion", "futures", "parking_lot", "regex", diff --git a/config/default.toml b/config/default.toml index 70dcda471..8f118cf14 100644 --- a/config/default.toml +++ b/config/default.toml @@ -348,6 +348,22 @@ autosave_assistant = true # Maximum recent messages to include in the shutdown summary LLM prompt # shutdown_summary_max_messages = 20 +# ── Context-Adaptive Memory (CAM) fidelity scoring (#4547) ────────────────── +# Assigns Full / Compressed / Placeholder levels to historical messages to +# reduce token usage without hard compaction. Off by default for v0.21. +# [memory.fidelity] +# enabled = false +# w_semantic = 0.3 +# w_temporal = 0.3 +# w_importance = 0.2 +# w_plan = 0.2 +# full_threshold = 0.7 +# compressed_threshold = 0.3 +# compressed_max_tokens = 50 +# regrade_threshold = 0.6 +# min_query_length = 8 +# max_scored_messages = 500 + [memory.sessions] # Maximum number of sessions returned by list operations (0 = unlimited) max_history = 100 diff --git a/crates/zeph-agent-context/src/service.rs b/crates/zeph-agent-context/src/service.rs index 0a972c534..46547abae 100644 --- a/crates/zeph-agent-context/src/service.rs +++ b/crates/zeph-agent-context/src/service.rs @@ -4,6 +4,7 @@ //! [`ContextService`] — stateless façade for agent context-assembly operations. use zeph_context::budget::ContextBudget; +use zeph_context::fidelity::FidelityScorer; use zeph_llm::LlmProvider; use zeph_llm::provider::{Message, MessagePart, Role}; @@ -632,6 +633,8 @@ impl ContextService { scrub: view.scrub, active_levels, router, + planned_next_tools: &[], + fidelity_config: None, }; let mut prepared = zeph_context::assembler::ContextAssembler::gather(&input).await?; @@ -644,13 +647,46 @@ impl ContextService { prepared.recall = None; } - let delta = self.apply_prepared_context(window, view, prepared).await; + let (delta, inserted_count) = self.apply_prepared_context(window, view, prepared).await; if view.tiered_retrieval_config.enabled { self.inject_semantic_recall(query, usize::MAX, window, view) .await?; } + // T-06: Fidelity scoring (INV-01: AFTER apply_prepared_context returns). + // Guard: skip when MemoryFirst is active (INV-11 / AC-09) or config absent/disabled. + // Spec AC-09: when memory_first=true the scorer MUST NOT run — the caller (here) is + // responsible for this bypass; FidelityScorer itself is stateless and has no memory of it. + let memory_first_active = + view.context_strategy == zeph_config::ContextStrategy::MemoryFirst; + if let Some(fidelity_cfg) = view.fidelity_config + && fidelity_cfg.enabled + && !memory_first_active + { + let _span = tracing::info_span!( + "context.fidelity.score", + message_count = window.messages.len(), + query_len = query.len(), + ) + .entered(); + if let Some(ref tx) = view.status_tx { + let _ = tx.send("Scoring context fidelity\u{2026}".into()); + } + FidelityScorer.score_and_apply( + window.messages, + query, + view.planned_next_tools, + fidelity_cfg, + &*view.token_counter, + inserted_count, + ); + recompute_prompt_tokens(window); + if let Some(ref tx) = view.status_tx { + let _ = tx.send(String::new()); + } + } + Ok(delta) } @@ -660,14 +696,15 @@ impl ContextService { /// cross-session → summaries → persona → trajectory → tree → reasoning), handles /// `MemoryFirst` history drain, sanitizes memory content, trims to budget, and injects /// the session digest. Returns a [`ContextDelta`] whose `code_context` field the caller - /// must apply via `inject_code_context`. + /// must apply via `inject_code_context`, plus the count of messages freshly inserted at + /// indices `1..1+inserted_count` (used by the fidelity scorer as the exempt range — INV-10). #[allow(clippy::too_many_lines)] // sequential message injection: order matters, cannot split async fn apply_prepared_context( &self, window: &mut MessageWindowView<'_>, view: &mut ContextAssemblyView<'_>, prepared: zeph_context::assembler::PreparedContext, - ) -> ContextDelta { + ) -> (ContextDelta, usize) { use std::borrow::Cow; use zeph_llm::provider::{Message, MessageMetadata, Role}; use zeph_sanitizer::{ContentSource, ContentSourceKind, MemorySourceHint}; @@ -692,12 +729,17 @@ impl ContextService { } } + // Tracks how many memory messages were freshly inserted at positions 1..1+inserted_count + // so the fidelity scorer can exempt them (INV-10). Incremented at every insertion path. + let mut inserted_count: usize = 0; + // Insert memory messages at position 1 (all sanitized before insertion — CRIT-02). if let Some(msg) = prepared.graph_facts.filter(|_| window.messages.len() > 1) { let sanitized = self .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view) .await; window.messages.insert(1, sanitized); + inserted_count += 1; tracing::debug!("injected knowledge graph facts into context"); } if let Some(msg) = prepared.doc_rag.filter(|_| window.messages.len() > 1) { @@ -705,6 +747,7 @@ impl ContextService { .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view) .await; window.messages.insert(1, sanitized); + inserted_count += 1; tracing::debug!("injected document RAG context"); } if let Some(msg) = prepared.corrections.filter(|_| window.messages.len() > 1) { @@ -712,6 +755,7 @@ impl ContextService { .sanitize_memory_message(msg, MemorySourceHint::ConversationHistory, view) .await; window.messages.insert(1, sanitized); + inserted_count += 1; tracing::debug!("injected past corrections into context"); } if let Some(msg) = prepared.recall.filter(|_| window.messages.len() > 1) { @@ -719,18 +763,21 @@ impl ContextService { .sanitize_memory_message(msg, MemorySourceHint::ConversationHistory, view) .await; window.messages.insert(1, sanitized); + inserted_count += 1; } if let Some(msg) = prepared.cross_session.filter(|_| window.messages.len() > 1) { let sanitized = self .sanitize_memory_message(msg, MemorySourceHint::LlmSummary, view) .await; window.messages.insert(1, sanitized); + inserted_count += 1; } if let Some(msg) = prepared.summaries.filter(|_| window.messages.len() > 1) { let sanitized = self .sanitize_memory_message(msg, MemorySourceHint::LlmSummary, view) .await; window.messages.insert(1, sanitized); + inserted_count += 1; tracing::debug!("injected summaries into context"); } if let Some(msg) = prepared.persona_facts.filter(|_| window.messages.len() > 1) { @@ -738,6 +785,7 @@ impl ContextService { .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view) .await; window.messages.insert(1, sanitized); + inserted_count += 1; tracing::debug!("injected persona facts into context"); } if let Some(msg) = prepared @@ -748,6 +796,7 @@ impl ContextService { .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view) .await; window.messages.insert(1, sanitized); + inserted_count += 1; tracing::debug!("injected trajectory hints into context"); } if let Some(msg) = prepared.tree_memory.filter(|_| window.messages.len() > 1) { @@ -755,6 +804,7 @@ impl ContextService { .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view) .await; window.messages.insert(1, sanitized); + inserted_count += 1; tracing::debug!("injected tree memory summary into context"); } if let Some(msg) = prepared @@ -765,6 +815,7 @@ impl ContextService { .sanitize_memory_message(msg, MemorySourceHint::ExternalContent, view) .await; window.messages.insert(1, sanitized); + inserted_count += 1; tracing::debug!("injected reasoning strategies into context"); } @@ -826,6 +877,7 @@ impl ContextService { .sanitize_memory_message(digest_msg, MemorySourceHint::LlmSummary, view) .await; window.messages.insert(1, sanitized); + inserted_count += 1; tracing::debug!("injected session digest into context"); } @@ -843,7 +895,7 @@ impl ContextService { recompute_prompt_tokens(window); - ContextDelta { code_context } + (ContextDelta { code_context }, inserted_count) } /// Sanitize a memory retrieval message before inserting it into the context window. @@ -1041,6 +1093,38 @@ impl ContextService { }); } + // T-07: AgeMem proactive regrade — fires before tier dispatch (INV-06, INV-11). + // Skip when MemoryFirst is active; ContextSummarizationView does not carry + // context_strategy, so we check the budget ratio directly via should_proactively_regrade. + if let Some(ref fidelity_cfg) = summ.fidelity_config.clone() + && fidelity_cfg.enabled + && summ.context_manager.should_proactively_regrade( + *summ.cached_prompt_tokens, + fidelity_cfg.regrade_threshold, + summ.server_compaction_active, + ) + { + let _regrade_span = tracing::info_span!( + "context.fidelity.regrade", + budget_ratio = tracing::field::Empty, + ) + .entered(); + FidelityScorer.score_and_apply( + summ.messages, + &summ.current_query, + &[], + fidelity_cfg, + &*summ.token_counter, + 0, + ); + recompute_prompt_tokens_summ(summ); + summ.context_manager.set_regraded_this_turn(true); + tracing::debug!( + cached_tokens = *summ.cached_prompt_tokens, + "AgeMem proactive regrade complete" + ); + } + match summ .context_manager .compaction_tier(*summ.cached_prompt_tokens) @@ -1429,6 +1513,17 @@ pub(crate) fn recompute_prompt_tokens(window: &mut MessageWindowView<'_>) { .sum(); } +/// Recompute `cached_prompt_tokens` for a [`ContextSummarizationView`]. +/// +/// Used after the `AgeMem` proactive regrade modifies the message window in `maybe_compact`. +fn recompute_prompt_tokens_summ(summ: &mut crate::state::ContextSummarizationView<'_>) { + *summ.cached_prompt_tokens = summ + .messages + .iter() + .map(|m| summ.token_counter.count_message_tokens(m) as u64) + .sum(); +} + /// Remove all system/user messages whose `content` starts with `prefix` and whose /// role matches `role`. /// @@ -1684,6 +1779,172 @@ mod tests { ); } + // AC-12: inserted_count must equal the number of non-None memory fields injected. + // Tests that every Some(msg) field in PreparedContext increments inserted_count by 1. + mod inserted_count_tests { + use parking_lot::RwLock; + use std::borrow::Cow; + use std::collections::HashSet; + use std::sync::Arc; + + use zeph_common::SecurityEventCategory; + use zeph_config::memory::TieredRetrievalConfig; + use zeph_config::{ + ContextFormat, ContextStrategy, DocumentConfig, GraphConfig, PersonaConfig, + ReasoningConfig, TrajectoryConfig, TreeConfig, + }; + use zeph_context::assembler::PreparedContext; + use zeph_context::manager::ContextManager; + use zeph_llm::provider::{Message, MessageMetadata, Role}; + use zeph_memory::TokenCounter; + use zeph_sanitizer::ContentIsolationConfig; + use zeph_sanitizer::ContentSanitizer; + use zeph_skills::registry::SkillRegistry; + + use super::super::*; + use crate::state::{ + ContextAssemblyView, MessageWindowView, MetricsCounters, SecurityEventSink, + }; + + struct NoopSink; + impl SecurityEventSink for NoopSink { + fn push(&mut self, _: SecurityEventCategory, _: &'static str, _: String) {} + } + + fn make_counter() -> Arc { + Arc::new(TokenCounter::default()) + } + + fn make_window<'a>( + messages: &'a mut Vec, + cached: &'a mut u64, + completed: &'a mut HashSet, + ) -> MessageWindowView<'a> { + let last = Box::leak(Box::new(None::)); + let deferred_hide = Box::leak(Box::new(Vec::::new())); + let deferred_summ = Box::leak(Box::new(Vec::::new())); + MessageWindowView { + messages, + last_persisted_message_id: last, + deferred_db_hide_ids: deferred_hide, + deferred_db_summaries: deferred_summ, + cached_prompt_tokens: cached, + token_counter: make_counter(), + completed_tool_ids: completed, + } + } + + fn mem_msg(content: &str) -> Message { + Message { + role: Role::User, + content: content.to_string(), + parts: vec![], + metadata: MessageMetadata::default(), + } + } + + fn scrub_noop(s: &str) -> Cow<'_, str> { + Cow::Borrowed(s) + } + + #[tokio::test] + async fn inserted_count_incremented_for_all_paths() { + // AC-12: each non-None field in PreparedContext increments inserted_count by 1. + // 10 memory fields are tested here (session_digest is controlled by digest_enabled). + let mut msgs = vec![ + Message::from_legacy(Role::System, "system"), + Message::from_legacy(Role::User, "user turn"), + ]; + let mut cached = 0u64; + let mut completed = HashSet::new(); + let mut window = make_window(&mut msgs, &mut cached, &mut completed); + + let sanitizer = ContentSanitizer::new(&ContentIsolationConfig::default()); + let mut ctx_mgr = ContextManager::new(); + let mut sink = NoopSink; + let mut last_confidence = None::; + let mut last_skills_prompt = String::new(); + let mut active_skill_names = Vec::new(); + let registry = Arc::new(RwLock::new(SkillRegistry::default())); + + let mut view = ContextAssemblyView { + memory: None, + conversation_id: None, + recall_limit: 10, + cross_session_score_threshold: 0.5, + context_format: ContextFormat::default(), + last_recall_confidence: &mut last_confidence, + context_strategy: ContextStrategy::default(), + crossover_turn_threshold: 0, + cached_session_digest: None, + digest_enabled: false, // no session digest injection in this test + graph_config: GraphConfig::default(), + document_config: DocumentConfig::default(), + persona_config: PersonaConfig::default(), + trajectory_config: TrajectoryConfig::default(), + reasoning_config: ReasoningConfig::default(), + memcot_config: zeph_config::MemCotConfig::default(), + memcot_state: None, + tree_config: TreeConfig::default(), + last_skills_prompt: &mut last_skills_prompt, + active_skill_names: &mut active_skill_names, + skill_registry: registry, + skill_paths: &[], + correction_config: None, + sidequest_turn_counter: 0, + proactive_explorer: None, + sanitizer: &sanitizer, + quarantine_summarizer: None, + context_manager: &mut ctx_mgr, + token_counter: make_counter(), + metrics: MetricsCounters::default(), + security_events: &mut sink, + cached_prompt_tokens: 0, + redact_credentials: false, + channel_skills: &[], + scrub: scrub_noop, + tiered_retrieval_config: TieredRetrievalConfig { + enabled: false, + ..TieredRetrievalConfig::default() + }, + tiered_retrieval_classifier: None, + tiered_retrieval_validator: None, + fidelity_config: None, + planned_next_tools: &[], + status_tx: None, + }; + + // Populate all 10 message-carrying fields. + let prepared = PreparedContext { + graph_facts: Some(mem_msg("graph_facts")), + doc_rag: Some(mem_msg("doc_rag")), + corrections: Some(mem_msg("corrections")), + recall: Some(mem_msg("recall")), + recall_confidence: Some(0.9), + cross_session: Some(mem_msg("cross_session")), + summaries: Some(mem_msg("summaries")), + code_context: None, // code_context returns via ContextDelta, not inserted_count + persona_facts: Some(mem_msg("persona_facts")), + trajectory_hints: Some(mem_msg("trajectory_hints")), + tree_memory: Some(mem_msg("tree_memory")), + reasoning_hints: Some(mem_msg("reasoning_hints")), + memory_first: false, + recent_history_budget: 100_000, + }; + + let (_delta, inserted_count) = ContextService::new() + .apply_prepared_context(&mut window, &mut view, prepared) + .await; + + // 10 message fields were Some(msg): graph_facts, doc_rag, corrections, recall, + // cross_session, summaries, persona_facts, trajectory_hints, tree_memory, reasoning_hints. + assert_eq!( + inserted_count, 10, + "all 10 message-carrying PreparedContext fields must increment inserted_count" + ); + } + } + mod inject_semantic_recall_tests { use parking_lot::RwLock; use std::borrow::Cow; @@ -1801,6 +2062,9 @@ mod tests { }, tiered_retrieval_classifier: None, tiered_retrieval_validator: None, + fidelity_config: None, + planned_next_tools: &[], + status_tx: None, }; let result = ContextService::new() @@ -1876,6 +2140,9 @@ mod tests { }, tiered_retrieval_classifier: None, tiered_retrieval_validator: None, + fidelity_config: None, + planned_next_tools: &[], + status_tx: None, }; let result = ContextService::new() @@ -1958,6 +2225,9 @@ mod tests { }, tiered_retrieval_classifier: None, tiered_retrieval_validator: None, + fidelity_config: None, + planned_next_tools: &[], + status_tx: None, }; let result = ContextService::new() diff --git a/crates/zeph-agent-context/src/state.rs b/crates/zeph-agent-context/src/state.rs index 80df0b427..067def5b1 100644 --- a/crates/zeph-agent-context/src/state.rs +++ b/crates/zeph-agent-context/src/state.rs @@ -19,8 +19,10 @@ use std::future::Future; use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; +use zeph_common::PlannedToolHint; use zeph_common::SecurityEventCategory; use zeph_common::task_supervisor::{BlockingHandle, TaskSupervisor}; +use zeph_config::FidelityConfig; use zeph_config::{ ContextStrategy, DocumentConfig, GraphConfig, PersonaConfig, ReasoningConfig, TrajectoryConfig, TreeConfig, @@ -228,6 +230,23 @@ pub struct ContextAssemblyView<'a> { /// Resolved from `tiered_retrieval.validator_provider` at agent construction. /// `None` means validation is skipped (evidence accepted as-is). pub tiered_retrieval_validator: Option>, + + // ── CAM: Context-Adaptive Memory (#4547) ───────────────────────────────── + /// Fidelity scoring configuration resolved from `[context.fidelity]`. + /// + /// `None` when fidelity scoring is not configured (treated as `enabled = false`). + /// `Some(&cfg)` with `cfg.enabled = false` is also a no-op (early-return inside scorer). + pub fidelity_config: Option<&'a FidelityConfig>, + /// Lookahead tool hints derived from the orchestration DAG. + /// + /// Empty slice when no DAG lookahead is available (PAACE deferred to P2). The scorer + /// simply zeroes the plan signal when the slice is empty. + pub planned_next_tools: &'a [PlannedToolHint], + /// TUI status channel for spinner updates during fidelity scoring. + /// + /// Mirrors the channel wired in `ContextSummarizationView::status_tx`. `None` in + /// non-TUI modes; the service skips sending when the sender is absent. + pub status_tx: Option>, } /// Values produced by [`crate::service::ContextService::prepare_context`] that must be applied by the caller. @@ -350,6 +369,17 @@ pub struct ContextSummarizationView<'a> { /// `None` when `[memory.compression.typed_pages] enabled = false`. /// Populated by `CompactionAdapters::populate` in `zeph-core`. pub typed_pages: Option>, + + // ── CAM: proactive regrade (AgeMem, #4547) ──────────────────────────────── + /// Fidelity scoring config for proactive regrade in `maybe_compact`. + /// + /// `None` → proactive regrade is skipped (scoring disabled or config absent). + pub fidelity_config: Option, + /// Most recent user query — passed to the scorer as the semantic signal source. + /// + /// Empty string when no query is available (`AgeMem` degrades gracefully to + /// temporal + importance signals only). + pub current_query: String, } impl ContextSummarizationView<'_> { diff --git a/crates/zeph-agent-context/src/summarization/compaction.rs b/crates/zeph-agent-context/src/summarization/compaction.rs index 6989e9039..6c5de0c24 100644 --- a/crates/zeph-agent-context/src/summarization/compaction.rs +++ b/crates/zeph-agent-context/src/summarization/compaction.rs @@ -13,6 +13,7 @@ use std::sync::Arc; +use zeph_common::ContextFidelity; use zeph_context::slot::cap_summary; use zeph_context::typed_page::{ BatchAssertions, CompactedPageRecord, PageOrigin, PageType, TypedPage, TypedPagesState, @@ -267,7 +268,9 @@ fn partition_messages_for_compaction( .enumerate() .filter(|(slice_i, m)| { let actual_i = slice_i + 1; + // INV-02: never include Placeholder messages in LLM summarizer input. !m.metadata.focus_pinned + && m.metadata.fidelity_tag != Some(ContextFidelity::Placeholder) && !matches!( summ.subgoal_registry.subgoal_state(actual_i), Some(SubgoalState::Active) @@ -278,7 +281,11 @@ fn partition_messages_for_compaction( } else { summ.messages[1..compact_end] .iter() - .filter(|m| !m.metadata.focus_pinned) + // INV-02: never include Placeholder messages in LLM summarizer input. + .filter(|m| { + !m.metadata.focus_pinned + && m.metadata.fidelity_tag != Some(ContextFidelity::Placeholder) + }) .cloned() .collect() }; @@ -630,6 +637,7 @@ fn emit_audit_records( #[cfg(test)] mod tests { use super::*; + use zeph_common::ContextFidelity; use zeph_llm::provider::{Message, MessageMetadata, MessagePart, Role}; fn make_msg(role: Role, content: &str) -> Message { @@ -687,4 +695,44 @@ mod tests { let adjusted = adjust_compact_end_for_tool_pairs(&messages, 6); assert_eq!(adjusted, 1); } + + // AC-06: Placeholder messages must never appear in compact input (INV-02). + #[test] + fn placeholder_messages_excluded_from_to_compact() { + fn make_placeholder(role: Role, content: &str) -> Message { + let mut m = make_msg(role, content); + m.metadata.fidelity_tag = Some(ContextFidelity::Placeholder); + m + } + + let messages = [ + make_msg(Role::System, "system"), + make_msg(Role::User, "real user msg"), + make_placeholder(Role::Assistant, "[placeholder: role=assistant, ...]"), + make_msg(Role::User, "another real msg"), + make_placeholder(Role::Assistant, "[placeholder: role=assistant, ...]"), + ]; + + // Filter manually as partition_messages_for_compaction would. + let to_compact: Vec<&Message> = messages[1..messages.len()] + .iter() + .filter(|m| { + !m.metadata.focus_pinned + && m.metadata.fidelity_tag != Some(ContextFidelity::Placeholder) + }) + .collect(); + + for msg in &to_compact { + assert_ne!( + msg.metadata.fidelity_tag, + Some(ContextFidelity::Placeholder), + "Placeholder message must not appear in compaction input" + ); + } + assert_eq!( + to_compact.len(), + 2, + "only 2 non-placeholder messages should remain" + ); + } } diff --git a/crates/zeph-common/src/fidelity.rs b/crates/zeph-common/src/fidelity.rs new file mode 100644 index 000000000..13d783c12 --- /dev/null +++ b/crates/zeph-common/src/fidelity.rs @@ -0,0 +1,85 @@ +// SPDX-FileCopyrightText: 2026 Andrei G +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! Fidelity types for Context-Adaptive Memory (CAM). +//! +//! [`ContextFidelity`] is a three-level representation that replaces the binary +//! keep/discard approach used by compaction. [`PlannedToolHint`] carries lookahead +//! hints from the orchestration DAG so the fidelity scorer can bias toward messages +//! that are relevant to upcoming tool calls. + +use serde::{Deserialize, Serialize}; + +/// Fidelity level assigned to a message in the context window. +/// +/// Determines how a historical message is rendered before sending to the LLM. +/// Assigned by `FidelityScorer` based on relevance signals; stored in +/// `MessageMetadata.fidelity_tag` for debug tracing and compaction filtering. +/// +/// # Examples +/// +/// ``` +/// use zeph_common::fidelity::ContextFidelity; +/// +/// let level = ContextFidelity::default(); +/// assert_eq!(level, ContextFidelity::Full); +/// +/// let compressed: u8 = ContextFidelity::Compressed as u8; +/// assert_eq!(compressed, 1); +/// ``` +#[non_exhaustive] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] +#[repr(u8)] +pub enum ContextFidelity { + /// Original message content, unchanged. + #[default] + Full = 0, + /// Content truncated to `compressed_max_tokens` tokens (or replaced by + /// `deferred_summary` when available). + Compressed = 1, + /// Content replaced by a compact placeholder tag; no semantic content + /// survives. + Placeholder = 2, +} + +/// Hint about an upcoming tool call derived from the orchestration DAG. +/// +/// Used by `FidelityScorer` to bias relevance scores toward messages that +/// contain context useful for the next planned operations. In the v0.21 MVP +/// the hints are populated by callers that have access to the DAG lookahead; +/// an empty slice is always safe and disables the plan signal. +/// +/// # Examples +/// +/// ``` +/// use zeph_common::fidelity::PlannedToolHint; +/// +/// let hint = PlannedToolHint::new("shell", vec!["cargo".to_string(), "build".to_string()], 1); +/// assert_eq!(hint.tool_name, "shell"); +/// assert_eq!(hint.distance_from_current, 1); +/// ``` +#[non_exhaustive] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PlannedToolHint { + /// Name of the planned tool. + pub tool_name: String, + /// Keywords extracted from the tool's planned arguments (best-effort). + pub keywords: Vec, + /// Steps until this tool is scheduled. 1 = immediately next, capped at 5. + pub distance_from_current: u8, +} + +impl PlannedToolHint { + /// Creates a new [`PlannedToolHint`]. + pub fn new( + tool_name: impl Into, + keywords: Vec, + distance_from_current: u8, + ) -> Self { + Self { + tool_name: tool_name.into(), + keywords, + distance_from_current, + } + } +} diff --git a/crates/zeph-common/src/lib.rs b/crates/zeph-common/src/lib.rs index 0037fe408..5263f3f62 100644 --- a/crates/zeph-common/src/lib.rs +++ b/crates/zeph-common/src/lib.rs @@ -12,6 +12,7 @@ pub mod audit; pub mod config; pub mod error_taxonomy; +pub mod fidelity; pub mod fs_secure; pub mod hash; #[cfg(feature = "http-middleware")] @@ -37,6 +38,7 @@ pub mod types; /// Format: `[full output stored — ID: {uuid} — {bytes} bytes, use read_overflow tool to retrieve]` pub const OVERFLOW_NOTICE_PREFIX: &str = "[full output stored \u{2014} ID: "; +pub use fidelity::{ContextFidelity, PlannedToolHint}; pub use math::{EmbeddingVector, Normalized, Unnormalized}; pub use policy::{PolicyLlmClient, PolicyMessage, PolicyRole}; pub use security_event::SecurityEventCategory; diff --git a/crates/zeph-config/src/fidelity.rs b/crates/zeph-config/src/fidelity.rs new file mode 100644 index 000000000..34dd71234 --- /dev/null +++ b/crates/zeph-config/src/fidelity.rs @@ -0,0 +1,176 @@ +// SPDX-FileCopyrightText: 2026 Andrei G +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! Configuration for Context-Adaptive Memory (CAM) fidelity scoring. +//! +//! [`FidelityConfig`] is serialised from the `[context.fidelity]` section in `config.toml`. +//! When `enabled = false` (the default) the fidelity scorer is a complete no-op. + +use serde::{Deserialize, Serialize}; + +/// Configuration for the heuristic fidelity scorer (CAM §8.1). +/// +/// All weight fields must be positive. Weights are normalised at runtime by +/// the sum of active weights (INV-05). +/// +/// # Examples +/// +/// ``` +/// use zeph_config::fidelity::FidelityConfig; +/// +/// let cfg = FidelityConfig::default(); +/// assert!(!cfg.enabled, "fidelity scoring is off by default"); +/// assert!((cfg.w_semantic - 0.3).abs() < f32::EPSILON); +/// ``` +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(default)] +pub struct FidelityConfig { + /// Master switch. When `false`, no fidelity scoring occurs. + pub enabled: bool, + /// Cosine/keyword semantic relevance weight. + pub w_semantic: f32, + /// Recency weight. + pub w_temporal: f32, + /// Role-based importance weight. + pub w_importance: f32, + /// Plan-hint relevance weight (active only when `planned_tools` is non-empty). + pub w_plan: f32, + /// Score threshold above which a message retains `Full` fidelity. + pub full_threshold: f32, + /// Score threshold above which a message is `Compressed` (not `Placeholder`). + pub compressed_threshold: f32, + /// Maximum tokens kept when rendering a `Compressed` message. + pub compressed_max_tokens: usize, + /// Budget ratio at which `AgeMem` triggers a proactive regrade. + pub regrade_threshold: f32, + /// Minimum query length for semantic signal to be active. + pub min_query_length: usize, + /// Maximum number of messages scored per turn (performance cap). + pub max_scored_messages: usize, +} + +impl FidelityConfig { + /// Validate threshold ordering: `full_threshold >= compressed_threshold >= 0.0`. + /// + /// Call this at config load time to catch inverted thresholds before they silently + /// misclassify messages (score in `compressed_threshold..full_threshold` becomes Full + /// instead of Compressed when the invariant is violated). + /// + /// # Errors + /// + /// Returns an error string describing the violated constraint. + /// + /// # Examples + /// + /// ``` + /// use zeph_config::fidelity::FidelityConfig; + /// + /// let valid = FidelityConfig::default(); + /// assert!(valid.validate().is_ok()); + /// + /// let invalid = FidelityConfig { full_threshold: 0.2, compressed_threshold: 0.5, ..FidelityConfig::default() }; + /// assert!(invalid.validate().is_err()); + /// ``` + pub fn validate(&self) -> Result<(), String> { + if self.compressed_threshold < 0.0 { + return Err("context.fidelity: compressed_threshold must be >= 0.0".into()); + } + if self.full_threshold > 1.0 { + return Err("context.fidelity: full_threshold must be <= 1.0".into()); + } + if self.full_threshold < self.compressed_threshold { + return Err(format!( + "context.fidelity: full_threshold ({}) must be >= compressed_threshold ({})", + self.full_threshold, self.compressed_threshold + )); + } + Ok(()) + } +} + +impl Default for FidelityConfig { + fn default() -> Self { + Self { + enabled: false, + w_semantic: 0.3, + w_temporal: 0.3, + w_importance: 0.2, + w_plan: 0.2, + full_threshold: 0.7, + compressed_threshold: 0.3, + compressed_max_tokens: 50, + regrade_threshold: 0.6, + min_query_length: 8, + max_scored_messages: 500, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_disabled() { + let cfg = FidelityConfig::default(); + assert!(!cfg.enabled); + } + + #[test] + fn deserialize_enabled() { + let toml_str = r" + enabled = true + w_semantic = 0.4 + regrade_threshold = 0.7 + "; + let cfg: FidelityConfig = toml::from_str(toml_str).unwrap(); + assert!(cfg.enabled); + assert!((cfg.w_semantic - 0.4).abs() < f32::EPSILON); + assert!((cfg.regrade_threshold - 0.7).abs() < f32::EPSILON); + } + + #[test] + fn deserialize_defaults_for_omitted_fields() { + let cfg: FidelityConfig = toml::from_str("enabled = false").unwrap(); + assert!((cfg.w_temporal - 0.3).abs() < f32::EPSILON); + assert_eq!(cfg.compressed_max_tokens, 50); + assert_eq!(cfg.max_scored_messages, 500); + } + + #[test] + fn validate_defaults_ok() { + assert!(FidelityConfig::default().validate().is_ok()); + } + + #[test] + fn validate_inverted_thresholds_err() { + let cfg = FidelityConfig { + full_threshold: 0.2, + compressed_threshold: 0.5, + ..FidelityConfig::default() + }; + let err = cfg.validate().unwrap_err(); + assert!( + err.contains("full_threshold"), + "error should mention full_threshold: {err}" + ); + } + + #[test] + fn validate_negative_compressed_threshold_err() { + let cfg = FidelityConfig { + compressed_threshold: -0.1, + ..FidelityConfig::default() + }; + assert!(cfg.validate().is_err()); + } + + #[test] + fn validate_full_threshold_above_one_err() { + let cfg = FidelityConfig { + full_threshold: 1.1, + ..FidelityConfig::default() + }; + assert!(cfg.validate().is_err()); + } +} diff --git a/crates/zeph-config/src/lib.rs b/crates/zeph-config/src/lib.rs index 2edb7d236..cb8dd9e86 100644 --- a/crates/zeph-config/src/lib.rs +++ b/crates/zeph-config/src/lib.rs @@ -81,6 +81,7 @@ pub mod error; pub mod execution; pub mod experiment; pub mod features; +pub mod fidelity; pub mod hooks; pub mod learning; mod loader; @@ -132,6 +133,7 @@ pub use features::{ SchedulerDaemonConfig, SchedulerSecurityConfig, SkillEvaluationConfig, SkillMiningConfig, SkillPromptMode, SkillsConfig, TraceConfig, VaultBackend, VaultConfig, }; +pub use fidelity::FidelityConfig; pub use hooks::{FileChangedConfig, HooksConfig}; pub use learning::{DetectorMode, LearningConfig}; pub use logging::{LogRotation, LoggingConfig}; diff --git a/crates/zeph-config/src/memory.rs b/crates/zeph-config/src/memory.rs index 15f68489e..ca5ecee26 100644 --- a/crates/zeph-config/src/memory.rs +++ b/crates/zeph-config/src/memory.rs @@ -1037,6 +1037,30 @@ pub struct MemoryConfig { /// to weight `0.0`, preserving exact backward compatibility. #[serde(default)] pub five_signal: FiveSignalConfig, + /// Context-Adaptive Memory fidelity scoring (CAM Phase 1, #4547). + /// + /// When `fidelity.enabled = true`, the heuristic fidelity scorer runs after each + /// `apply_prepared_context()` call and assigns `Full / Compressed / Placeholder` + /// levels to historical messages. Default: disabled. + /// + /// # Example (TOML) + /// + /// ```toml + /// [memory.fidelity] + /// enabled = false + /// w_semantic = 0.3 + /// w_temporal = 0.3 + /// w_importance = 0.2 + /// w_plan = 0.2 + /// full_threshold = 0.7 + /// compressed_threshold = 0.3 + /// compressed_max_tokens = 50 + /// regrade_threshold = 0.6 + /// min_query_length = 8 + /// max_scored_messages = 500 + /// ``` + #[serde(default, skip_serializing_if = "Option::is_none")] + pub fidelity: Option, } // ── MemFlow tiered retrieval config (issue #3712) ────────────────────────────── diff --git a/crates/zeph-config/src/root.rs b/crates/zeph-config/src/root.rs index 3e0a67363..1908f8585 100644 --- a/crates/zeph-config/src/root.rs +++ b/crates/zeph-config/src/root.rs @@ -292,6 +292,7 @@ impl Default for Config { episodic_consolidation: crate::memory::EpisodicConsolidationConfig::default(), shadow_memory: crate::memory::TrajectoryRiskAccumulatorConfig::default(), five_signal: crate::memory::FiveSignalConfig::default(), + fidelity: None, }, telegram: None, discord: None, diff --git a/crates/zeph-context/Cargo.toml b/crates/zeph-context/Cargo.toml index 8e529f4f2..38859f2dc 100644 --- a/crates/zeph-context/Cargo.toml +++ b/crates/zeph-context/Cargo.toml @@ -28,7 +28,12 @@ zeph-config.workspace = true zeph-llm.workspace = true [dev-dependencies] +criterion.workspace = true tempfile.workspace = true +[[bench]] +name = "fidelity_scorer" +harness = false + [lints] workspace = true diff --git a/crates/zeph-context/benches/fidelity_scorer.rs b/crates/zeph-context/benches/fidelity_scorer.rs new file mode 100644 index 000000000..1b9b73327 --- /dev/null +++ b/crates/zeph-context/benches/fidelity_scorer.rs @@ -0,0 +1,77 @@ +// SPDX-FileCopyrightText: 2026 Andrei G +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! Benchmark: AC-11 — `score_and_apply` on 500 synthetic messages must complete <2ms. + +use std::hint::black_box; + +use criterion::{Criterion, criterion_group, criterion_main}; +use zeph_common::memory::TokenCounting; +use zeph_context::fidelity::{FidelityConfig, FidelityScorer}; +use zeph_llm::provider::{Message, MessageMetadata, Role}; + +struct CharDivTc(usize); +impl TokenCounting for CharDivTc { + fn count_tokens(&self, text: &str) -> usize { + text.len() / self.0.max(1) + } + fn count_tool_schema_tokens(&self, _: &serde_json::Value) -> usize { + 0 + } +} + +fn make_synthetic_messages(n: usize) -> Vec { + (0..n) + .map(|i| { + let role = match i % 3 { + 0 => Role::System, + 1 => Role::User, + _ => Role::Assistant, + }; + Message { + role, + content: format!( + "synthetic message content number {i} with some extra words for scoring" + ), + parts: vec![], + metadata: MessageMetadata::default(), + } + }) + .collect() +} + +fn bench_score_500(c: &mut Criterion) { + let scorer = FidelityScorer; + let cfg = FidelityConfig { + enabled: true, + w_temporal: 0.3, + w_importance: 0.2, + w_semantic: 0.3, + w_plan: 0.2, + full_threshold: 0.7, + compressed_threshold: 0.3, + compressed_max_tokens: 50, + regrade_threshold: 0.6, + min_query_length: 5, + max_scored_messages: 500, + }; + let tc = CharDivTc(4); + let base_messages = make_synthetic_messages(500); + + c.bench_function("fidelity_score_and_apply_500", |b| { + b.iter(|| { + let mut messages = base_messages.clone(); + scorer.score_and_apply( + black_box(&mut messages), + black_box("query words for semantic signal"), + black_box(&[]), + black_box(&cfg), + black_box(&tc), + black_box(0), + ); + }); + }); +} + +criterion_group!(benches, bench_score_500); +criterion_main!(benches); diff --git a/crates/zeph-context/src/fidelity.rs b/crates/zeph-context/src/fidelity.rs new file mode 100644 index 000000000..0479f17be --- /dev/null +++ b/crates/zeph-context/src/fidelity.rs @@ -0,0 +1,807 @@ +// SPDX-FileCopyrightText: 2026 Andrei G +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! Heuristic fidelity scorer for Context-Adaptive Memory (CAM). +//! +//! [`FidelityScorer`] is a stateless scoring engine that assigns a three-level +//! representation ([`ContextFidelity::Full`] / [`ContextFidelity::Compressed`] / +//! [`ContextFidelity::Placeholder`]) to each message in the context window. Scoring is +//! driven by weighted signals: temporal recency, role importance, keyword-based semantic +//! relevance, and optional plan hints. +//! +//! [`FidelityConfig`] holds all tuning knobs; it is read from `[memory.fidelity]` in +//! `config.toml`. When `enabled = false` (the default), the scorer returns immediately +//! without modifying the message window. + +use tracing::info_span; +use zeph_common::memory::TokenCounting; +use zeph_common::{ContextFidelity, PlannedToolHint}; +use zeph_llm::provider::{Message, MessagePart, Role}; + +use crate::assembler::CORRECTIONS_PREFIX; + +// Re-export FidelityConfig from zeph-config so both crates share one definition. +pub use zeph_config::FidelityConfig; + +struct FidelityScore { + score: f32, + level: ContextFidelity, + original_tokens: u32, +} + +/// Stateless heuristic scorer that assigns and applies fidelity levels to a message window. +/// +/// Call [`FidelityScorer::score_and_apply`] after `apply_prepared_context()` returns to +/// enforce the three-level representation (Full / Compressed / Placeholder) on historical +/// messages. The scorer never touches exempt messages (INV-07 through INV-10). +/// +/// # Examples +/// +/// ``` +/// use zeph_context::fidelity::{FidelityConfig, FidelityScorer}; +/// +/// let scorer = FidelityScorer; +/// let cfg = FidelityConfig { enabled: false, ..FidelityConfig::default() }; +/// // With `enabled = false` the scorer is a no-op. +/// let mut messages = vec![]; +/// scorer.score_and_apply(&mut messages, "query", &[], &cfg, &MockTc, 0); +/// +/// struct MockTc; +/// impl zeph_common::memory::TokenCounting for MockTc { +/// fn count_tokens(&self, text: &str) -> usize { text.len() / 4 } +/// fn count_tool_schema_tokens(&self, _: &serde_json::Value) -> usize { 0 } +/// } +/// ``` +pub struct FidelityScorer; + +impl FidelityScorer { + /// Score all non-exempt messages and apply fidelity rendering in-place. + /// + /// Steps (per spec §5 data flow): + /// 1. Guard: return early when `enabled == false`. + /// 2. Build exempt set (INV-07 through INV-10). + /// 3. Score each non-exempt message with normalized weight sum (INV-05). + /// 4. Apply tool-pair atomicity — both get `min(score_a, score_b)` (INV-03). + /// 5. Render `Compressed` / `Placeholder` messages (INV-12). + /// 6. Merge consecutive same-role `Placeholder` messages (INV-04). + /// + /// # Parameters + /// + /// - `messages` — mutable message window (includes system prompt at index 0). + /// - `query` — current user query; drives semantic signal. + /// - `planned_tools` — DAG lookahead hints; empty slice disables plan signal. + /// - `config` — scoring thresholds and weights. + /// - `tc` — token counter used for `Placeholder`/`Compressed` rendering. + /// - `inserted_count` — number of memory messages freshly injected at indices + /// `1..1+inserted_count`; these are always exempt (INV-10). + pub fn score_and_apply( + &self, + messages: &mut Vec, + query: &str, + planned_tools: &[PlannedToolHint], + config: &FidelityConfig, + tc: &dyn TokenCounting, + inserted_count: usize, + ) { + if !config.enabled || messages.is_empty() { + return; + } + + let _span = info_span!( + "context.fidelity.score", + message_count = messages.len(), + query_len = query.len() + ) + .entered(); + + let scores = compute_scores(messages, query, planned_tools, config, tc, inserted_count); + apply_scores(messages, &scores, config, tc); + + let _merge_span = info_span!("context.fidelity.merge").entered(); + let merged_count = merge_consecutive_placeholders(messages); + tracing::debug!(merged_count, "fidelity merge complete"); + } +} + +fn compute_scores( + messages: &[Message], + query: &str, + planned_tools: &[PlannedToolHint], + config: &FidelityConfig, + tc: &dyn TokenCounting, + inserted_count: usize, +) -> Vec> { + let n = messages.len(); + + // Performance cap: only score oldest (n - 250) messages; newest 250 default to Full. + let score_end = if n > config.max_scored_messages { + n.saturating_sub(250) + } else { + n + }; + + let semantic_active = query.len() >= config.min_query_length; + let plan_active = !planned_tools.is_empty(); + // Build once outside the per-message loop (SF-1: avoids 500 redundant allocations). + let query_words: std::collections::HashSet<&str> = if semantic_active { + query.split_whitespace().collect() + } else { + std::collections::HashSet::default() + }; + + // Compute the active weight sum (INV-05). + let mut weight_sum = config.w_temporal + config.w_importance; + if semantic_active { + weight_sum += config.w_semantic; + } + if plan_active { + weight_sum += config.w_plan; + } + if weight_sum <= 0.0 { + weight_sum = 1.0; + } + + #[allow(clippy::cast_precision_loss)] + let max_dist = score_end.saturating_sub(1) as f32; + + let mut scores: Vec> = (0..n).map(|_| None).collect(); + + for (i, msg) in messages.iter().enumerate().take(score_end) { + if is_exempt(msg, i, inserted_count) { + continue; + } + + #[allow(clippy::cast_possible_truncation)] + let original_tokens = tc.count_tokens(&msg.content) as u32; + + // distance_from_end = 0 for newest (i = score_end-1), N-1 for oldest (i = 0). + // Spec §6.1: temporal = 1.0 - distance_from_end / max_dist → newest = 1.0, oldest ≈ 0.0. + #[allow(clippy::cast_precision_loss)] + let temporal = if max_dist > 0.0 { + let distance_from_end = (score_end - 1 - i) as f32; + 1.0 - distance_from_end / max_dist + } else { + 1.0 + }; + // Spec §6.2: ToolResult messages use weight 0.4 regardless of Role::User mapping. + let importance = if msg + .parts + .iter() + .any(|p| matches!(p, MessagePart::ToolResult { .. })) + { + 0.4 + } else { + role_weight(msg.role) + }; + let semantic = if semantic_active { + keyword_overlap(&msg.content, &query_words) + } else { + 0.0 + }; + let plan = if plan_active { + plan_relevance(&msg.content, planned_tools) + } else { + 0.0 + }; + + let raw = config.w_temporal * temporal + + config.w_importance * importance + + if semantic_active { + config.w_semantic * semantic + } else { + 0.0 + } + + if plan_active { + config.w_plan * plan + } else { + 0.0 + }; + + let score = (raw / weight_sum).clamp(0.0, 1.0); + let level = score_to_level(score, config); + scores[i] = Some(FidelityScore { + score, + level, + original_tokens, + }); + } + + apply_tool_pair_atomicity(messages, &mut scores, config); + scores +} + +fn apply_scores( + messages: &mut [Message], + scores: &[Option], + config: &FidelityConfig, + tc: &dyn TokenCounting, +) { + let _apply_span = info_span!("context.fidelity.apply").entered(); + let (mut full_count, mut compressed_count, mut placeholder_count, mut tokens_saved) = + (0u32, 0u32, 0u32, 0u32); + + for (i, msg) in messages.iter_mut().enumerate() { + let Some(ref fs) = scores[i] else { continue }; + match fs.level { + ContextFidelity::Compressed => { + #[allow(clippy::cast_possible_truncation)] + let original_tokens = fs.original_tokens; + render_compressed(msg, config, tc); + #[allow(clippy::cast_possible_truncation)] + let new_tokens = tc.count_tokens(&msg.content) as u32; + tokens_saved += original_tokens.saturating_sub(new_tokens); + compressed_count += 1; + } + ContextFidelity::Placeholder => { + render_placeholder(msg, fs.score, fs.original_tokens, tc); + placeholder_count += 1; + } + // Full and any future variants keep original content. + _ => { + msg.metadata.fidelity_tag = Some(ContextFidelity::Full); + full_count += 1; + } + } + } + + tracing::debug!( + full_count, + compressed_count, + placeholder_count, + tokens_saved, + "fidelity apply complete" + ); +} + +fn is_exempt(msg: &Message, idx: usize, inserted_count: usize) -> bool { + // INV-07: system prompt at index 0. + // INV-08: focus_pinned messages. + // INV-09: correction messages. + // INV-10: freshly injected memory context at indices 1..1+inserted_count. + (idx == 0 && msg.role == Role::System) + || msg.metadata.focus_pinned + || msg.content.starts_with(CORRECTIONS_PREFIX) + || (idx >= 1 && idx < 1 + inserted_count) +} + +fn role_weight(role: Role) -> f32 { + match role { + Role::System => 1.0, + Role::User => 0.8, + Role::Assistant => 0.6, + } +} + +/// Simple word-intersection semantic overlap, normalized to [0, 1]. +/// +/// `query_words` is pre-built outside the per-message loop (SF-1). +fn keyword_overlap(content: &str, query_words: &std::collections::HashSet<&str>) -> f32 { + let content_words: std::collections::HashSet<&str> = content.split_whitespace().collect(); + let min_len = content_words.len().min(query_words.len()); + if min_len == 0 { + return 0.0; + } + #[allow(clippy::cast_precision_loss)] + let result = content_words.intersection(query_words).count() as f32 / min_len as f32; + result.clamp(0.0, 1.0) +} + +/// Keyword overlap between message content and planned tool keywords. +/// +/// Weighted by `1.0 / distance_from_current` and averaged across all hints. +fn plan_relevance(content: &str, planned_tools: &[PlannedToolHint]) -> f32 { + if planned_tools.is_empty() { + return 0.0; + } + let content_words: std::collections::HashSet<&str> = content.split_whitespace().collect(); + let mut weighted_sum = 0.0f32; + let mut weight_total = 0.0f32; + for hint in planned_tools { + let dist = f32::from(hint.distance_from_current.max(1)); + let weight = 1.0 / dist; + weight_total += weight; + let hint_words: std::collections::HashSet<&str> = + hint.keywords.iter().map(String::as_str).collect(); + let min_len = content_words.len().min(hint_words.len()); + if min_len == 0 { + continue; + } + #[allow(clippy::cast_precision_loss)] + let overlap = content_words.intersection(&hint_words).count() as f32 / min_len as f32; + weighted_sum += weight * overlap.clamp(0.0, 1.0); + } + if weight_total <= 0.0 { + return 0.0; + } + (weighted_sum / weight_total).clamp(0.0, 1.0) +} + +/// O(N) backward scan: find `ToolUse`/`ToolResult` pairs and assign `min(score_a, score_b)`. +fn apply_tool_pair_atomicity( + messages: &[Message], + scores: &mut [Option], + config: &FidelityConfig, +) { + // Collect (tool_use_id, message_index) for ToolResult messages. + let mut tool_result_map: std::collections::HashMap<&str, usize> = + std::collections::HashMap::new(); + for (i, msg) in messages.iter().enumerate() { + for part in &msg.parts { + if let MessagePart::ToolResult { tool_use_id, .. } = part { + tool_result_map.insert(tool_use_id.as_str(), i); + } + } + } + + // Walk backward to find ToolUse messages and pair with their result. + for (i, msg) in messages.iter().enumerate().rev() { + for part in &msg.parts { + if let MessagePart::ToolUse { id, .. } = part + && let Some(&result_idx) = tool_result_map.get(id.as_str()) + { + let score_a = scores[i].as_ref().map_or(1.0, |s| s.score); + let score_b = scores[result_idx].as_ref().map_or(1.0, |s| s.score); + let min_score = score_a.min(score_b); + let min_level = score_to_level(min_score, config); + let tokens_a = scores[i].as_ref().map_or(0, |s| s.original_tokens); + let tokens_b = scores[result_idx].as_ref().map_or(0, |s| s.original_tokens); + scores[i] = Some(FidelityScore { + score: min_score, + level: min_level, + original_tokens: tokens_a, + }); + scores[result_idx] = Some(FidelityScore { + score: min_score, + level: min_level, + original_tokens: tokens_b, + }); + } + } + } +} + +fn score_to_level(score: f32, config: &FidelityConfig) -> ContextFidelity { + if score >= config.full_threshold { + ContextFidelity::Full + } else if score >= config.compressed_threshold { + ContextFidelity::Compressed + } else { + ContextFidelity::Placeholder + } +} + +fn render_compressed(msg: &mut Message, config: &FidelityConfig, tc: &dyn TokenCounting) { + if let Some(summary) = msg.metadata.deferred_summary.take() { + msg.content = summary; + } else { + truncate_to_tokens(&mut msg.content, config.compressed_max_tokens, tc); + } + msg.parts.clear(); + msg.metadata.fidelity_tag = Some(ContextFidelity::Compressed); +} + +fn truncate_to_tokens(content: &mut String, max_tokens: usize, tc: &dyn TokenCounting) { + if tc.count_tokens(content) <= max_tokens { + return; + } + let mut len = content.len(); + while len > 0 && tc.count_tokens(&content[..len]) > max_tokens { + len /= 2; + while len > 0 && !content.is_char_boundary(len) { + len -= 1; + } + } + content.truncate(len); +} + +fn render_placeholder(msg: &mut Message, score: f32, original_tokens: u32, tc: &dyn TokenCounting) { + let role_str = match msg.role { + Role::System => "system", + Role::User => "user", + Role::Assistant => "assistant", + }; + msg.content = format!( + "[placeholder: role={role_str}, original_tokens={original_tokens}, importance={score:.2}]" + ); + msg.parts.clear(); + msg.metadata.fidelity_tag = Some(ContextFidelity::Placeholder); + // Count tokens on the rendered string (INV-12 / spec §7.2). + let _ = tc.count_tokens(&msg.content); +} + +/// Merge consecutive same-role `Placeholder` messages into a single merged placeholder. +/// +/// Returns the number of individual messages consumed by merges. +fn merge_consecutive_placeholders(messages: &mut Vec) -> usize { + let mut merged_count = 0usize; + let mut i = 0; + while i < messages.len() { + if messages[i].metadata.fidelity_tag != Some(ContextFidelity::Placeholder) + || messages[i].role == Role::System + { + i += 1; + continue; + } + let role = messages[i].role; + let mut j = i + 1; + while j < messages.len() + && messages[j].metadata.fidelity_tag == Some(ContextFidelity::Placeholder) + && messages[j].role == role + { + j += 1; + } + if j - i <= 1 { + i += 1; + continue; + } + let count = j - i; + let mut total_tokens = 0u32; + let mut importance_sum = 0.0f32; + for msg in &messages[i..j] { + total_tokens += parse_placeholder_tokens(&msg.content); + importance_sum += parse_placeholder_importance(&msg.content); + } + debug_assert!(count >= 2, "placeholder merge triggered with count={count}"); + #[allow(clippy::cast_precision_loss)] + let avg_importance = if count > 0 { + importance_sum / count as f32 + } else { + 0.0 + }; + let role_str = match role { + Role::System => "system", + Role::User => "user", + Role::Assistant => "assistant", + }; + let merged_content = format!( + "[placeholder: {count} messages, role={role_str}, total_tokens={total_tokens}, avg_importance={avg_importance:.2}]" + ); + let first = messages[i].clone(); + messages.drain(i..j); + messages.insert( + i, + Message { + role: first.role, + content: merged_content, + parts: vec![], + metadata: { + let mut m = first.metadata; + m.fidelity_tag = Some(ContextFidelity::Placeholder); + m + }, + }, + ); + merged_count += count - 1; + i += 1; + } + merged_count +} + +fn parse_placeholder_tokens(content: &str) -> u32 { + for part in content.split(',') { + let part = part.trim(); + for prefix in &["original_tokens=", "total_tokens="] { + if let Some(rest) = part.strip_prefix(prefix) + && let Ok(n) = rest.trim_end_matches(']').trim().parse::() + { + return n; + } + } + } + 0 +} + +fn parse_placeholder_importance(content: &str) -> f32 { + for part in content.split(',') { + let part = part.trim(); + for prefix in &["importance=", "avg_importance="] { + if let Some(rest) = part.strip_prefix(prefix) + && let Ok(v) = rest.trim_end_matches(']').trim().parse::() + { + return v; + } + } + } + 0.0 +} + +#[cfg(test)] +mod tests { + use super::*; + use zeph_llm::provider::{Message, MessageMetadata, MessagePart, Role}; + + struct FixedTc(usize); + impl TokenCounting for FixedTc { + fn count_tokens(&self, text: &str) -> usize { + text.len() / self.0.max(1) + } + + fn count_tool_schema_tokens(&self, _schema: &serde_json::Value) -> usize { + 0 + } + } + + fn make_msg(role: Role, content: &str) -> Message { + Message { + role, + content: content.to_string(), + parts: vec![], + metadata: MessageMetadata::default(), + } + } + + fn make_cfg() -> FidelityConfig { + FidelityConfig { + enabled: true, + w_semantic: 0.3, + w_temporal: 0.3, + w_importance: 0.2, + w_plan: 0.2, + full_threshold: 0.7, + compressed_threshold: 0.3, + compressed_max_tokens: 50, + regrade_threshold: 0.6, + min_query_length: 8, + max_scored_messages: 500, + } + } + + // 1. Empty window → no change. + #[test] + fn empty_window_no_change() { + let scorer = FidelityScorer; + let cfg = make_cfg(); + let tc = FixedTc(4); + let mut messages: Vec = vec![]; + scorer.score_and_apply(&mut messages, "query text", &[], &cfg, &tc, 0); + assert!(messages.is_empty()); + } + + // 2. All-exempt window → no downgrade. + #[test] + fn all_exempt_no_downgrade() { + let scorer = FidelityScorer; + let cfg = make_cfg(); + let tc = FixedTc(4); + let mut messages = vec![ + make_msg(Role::System, "system prompt"), + // Injected memory at index 1 with inserted_count=1. + make_msg(Role::User, "memory context"), + ]; + scorer.score_and_apply(&mut messages, "short", &[], &cfg, &tc, 1); + for msg in &messages { + assert!( + msg.metadata.fidelity_tag.is_none() + || msg.metadata.fidelity_tag == Some(ContextFidelity::Full) + ); + } + } + + // 3. Tool pair atomicity: divergent scores → min applied. + #[test] + fn tool_pair_atomicity() { + let scorer = FidelityScorer; + // Very high thresholds to force Placeholder for older messages. + let cfg = FidelityConfig { + full_threshold: 0.9, + compressed_threshold: 0.5, + ..make_cfg() + }; + let tc = FixedTc(4); + let tool_use_id = "abc123".to_string(); + let mut tool_use_msg = make_msg(Role::Assistant, "calling tool"); + tool_use_msg.parts = vec![MessagePart::ToolUse { + id: tool_use_id.clone(), + name: "shell".to_string(), + input: serde_json::json!({}), + }]; + let mut tool_result_msg = make_msg(Role::User, "tool result body"); + tool_result_msg.parts = vec![MessagePart::ToolResult { + tool_use_id: tool_use_id.clone(), + content: "result".to_string(), + is_error: false, + }]; + let mut messages = vec![ + make_msg(Role::System, "system"), + tool_use_msg, + tool_result_msg, + ]; + scorer.score_and_apply( + &mut messages, + "completely unrelated query blah", + &[], + &cfg, + &tc, + 0, + ); + let tag_a = messages[1].metadata.fidelity_tag; + let tag_b = messages[2].metadata.fidelity_tag; + assert_eq!(tag_a, tag_b, "tool pair must share fidelity level"); + } + + // 4. Same-role Placeholder merge: 5 consecutive assistant → merged to 1. + #[test] + fn same_role_placeholder_merge() { + let scorer = FidelityScorer; + // Force all non-system messages to become Placeholder. + let cfg = FidelityConfig { + full_threshold: 2.0, // impossible to reach + compressed_threshold: 1.5, // impossible to reach + ..make_cfg() + }; + let tc = FixedTc(4); + let mut messages: Vec = std::iter::once(make_msg(Role::System, "system")) + .chain((0..5).map(|i| make_msg(Role::Assistant, &format!("msg {i}")))) + .collect(); + scorer.score_and_apply(&mut messages, "some query here", &[], &cfg, &tc, 0); + // System + 1 merged placeholder. + assert_eq!( + messages.len(), + 2, + "5 assistant placeholders must merge to 1" + ); + assert!(messages[1].content.contains("5 messages")); + } + + // 5. Score normalization: active signal subset still produces [0,1]. + #[test] + fn score_normalization_no_panic() { + let scorer = FidelityScorer; + let cfg = make_cfg(); + let tc = FixedTc(4); + let mut messages = vec![ + make_msg(Role::System, "system"), + make_msg(Role::User, "hello"), + make_msg(Role::Assistant, "world response"), + ]; + scorer.score_and_apply(&mut messages, "hello world signal", &[], &cfg, &tc, 0); + for msg in &messages { + let _ = msg.metadata.fidelity_tag; + } + } + + // 6. Short query fallback: query.len() < 8 → semantic signal excluded. + #[test] + fn short_query_fallback() { + let scorer = FidelityScorer; + let cfg = FidelityConfig { + min_query_length: 8, + ..make_cfg() + }; + let tc = FixedTc(4); + let mut messages = vec![ + make_msg(Role::System, "system"), + make_msg(Role::User, "test"), + ]; + // Must not panic or produce out-of-range scores. + scorer.score_and_apply(&mut messages, "short", &[], &cfg, &tc, 0); + } + + // 7. AC-09: memory_first bypass is the caller's responsibility. + // When enabled=false, score_and_apply is always a no-op — callers that activate + // memory_first simply skip the call (see service.rs guard at INV-11). + // This test documents the contract: the scorer itself is stateless and harmless + // when called with disabled config or an all-exempt window. + #[test] + fn memory_first_bypass_is_callers_responsibility() { + let scorer = FidelityScorer; + // Simulate: caller would skip this call when memory_first=true. + // The scorer itself must be a complete no-op when enabled=false. + let cfg = FidelityConfig { + enabled: false, + ..make_cfg() + }; + let tc = FixedTc(4); + let mut messages = vec![ + make_msg(Role::System, "system prompt"), + make_msg(Role::User, "memory-injected context"), + make_msg(Role::Assistant, "response"), + ]; + let before: Vec<_> = messages.iter().map(|m| m.content.clone()).collect(); + // Even with a real query, disabled scorer must not touch any message. + scorer.score_and_apply( + &mut messages, + "some user query text here", + &[], + &cfg, + &tc, + 2, + ); + for (msg, orig) in messages.iter().zip(&before) { + assert_eq!(msg.content, *orig, "content must be unchanged"); + assert!( + msg.metadata.fidelity_tag.is_none(), + "no fidelity tag must be set" + ); + } + } + + // 9. enabled=false guard: no changes applied. + #[test] + fn enabled_false_guard() { + let scorer = FidelityScorer; + let cfg = FidelityConfig { + enabled: false, + ..make_cfg() + }; + let tc = FixedTc(4); + let mut messages = vec![ + make_msg(Role::System, "system"), + make_msg(Role::User, "user message that would normally be scored"), + ]; + let original_contents: Vec = messages.iter().map(|m| m.content.clone()).collect(); + scorer.score_and_apply(&mut messages, "query text here", &[], &cfg, &tc, 0); + for (msg, orig) in messages.iter().zip(&original_contents) { + assert_eq!(msg.content, *orig); + assert!(msg.metadata.fidelity_tag.is_none()); + } + } + + // 10. Score always in [0.0, 1.0] for extreme inputs (zero weights). + #[test] + fn score_always_in_range() { + let scorer = FidelityScorer; + let cfg = FidelityConfig { + enabled: true, + w_semantic: 0.0, + w_temporal: 0.0, + w_importance: 0.0, + w_plan: 0.0, + full_threshold: 0.7, + compressed_threshold: 0.3, + compressed_max_tokens: 50, + regrade_threshold: 0.6, + min_query_length: 0, + max_scored_messages: 500, + }; + let tc = FixedTc(4); + let mut messages = vec![make_msg(Role::System, ""), make_msg(Role::User, "")]; + // Must not panic with zero weights. + scorer.score_and_apply(&mut messages, "", &[], &cfg, &tc, 0); + } + + // 11. Token count uses tc.count_tokens for Placeholder rendering. + #[test] + fn placeholder_uses_tc_count_tokens() { + let scorer = FidelityScorer; + let cfg = FidelityConfig { + full_threshold: 2.0, + compressed_threshold: 1.5, + ..make_cfg() + }; + let tc = FixedTc(1); // every character = 1 token + let mut messages = vec![ + make_msg(Role::System, "system"), + make_msg(Role::User, "user message content for placeholder rendering"), + ]; + scorer.score_and_apply(&mut messages, "some query text here", &[], &cfg, &tc, 0); + assert_eq!( + messages[1].metadata.fidelity_tag, + Some(ContextFidelity::Placeholder) + ); + assert!(messages[1].content.starts_with("[placeholder:")); + } + + // 12. Compressed rendering uses deferred_summary when available. + #[test] + fn compressed_uses_deferred_summary() { + let scorer = FidelityScorer; + let cfg = FidelityConfig { + full_threshold: 2.0, // nothing reaches Full + compressed_threshold: 0.0, // everything at or above 0 → Compressed + compressed_max_tokens: 5, + ..make_cfg() + }; + let tc = FixedTc(4); + let mut msg_with_summary = + make_msg(Role::User, "original long content that would be truncated"); + msg_with_summary.metadata.deferred_summary = Some("short summary".to_string()); + let mut messages = vec![make_msg(Role::System, "system"), msg_with_summary]; + scorer.score_and_apply(&mut messages, "query text here long", &[], &cfg, &tc, 0); + assert_eq!( + messages[1].metadata.fidelity_tag, + Some(ContextFidelity::Compressed) + ); + assert_eq!(messages[1].content, "short summary"); + } +} diff --git a/crates/zeph-context/src/input.rs b/crates/zeph-context/src/input.rs index 9e85a664e..d65269c86 100644 --- a/crates/zeph-context/src/input.rs +++ b/crates/zeph-context/src/input.rs @@ -11,11 +11,14 @@ use std::borrow::Cow; use std::sync::Arc; +use zeph_common::PlannedToolHint; use zeph_common::memory::{CompressionLevel, ContextMemoryBackend}; use zeph_config::{ DocumentConfig, GraphConfig, PersonaConfig, ReasoningConfig, TrajectoryConfig, TreeConfig, }; +use crate::fidelity::FidelityConfig; + use crate::manager::ContextManager; /// All borrowed data needed to assemble context for one agent turn. @@ -59,6 +62,12 @@ pub struct ContextAssemblyInput<'a> { /// Pre-built memory router for this turn. Built by `zeph-core` via `build_memory_router()` /// and passed in to avoid a `zeph-memory` dependency inside `zeph-context`. pub router: Box, + /// Lookahead hints from the orchestration DAG for plan-aware scoring. + /// + /// Pass `&[]` when no DAG context is available (PAACE data structure only in MVP). + pub planned_next_tools: &'a [PlannedToolHint], + /// Fidelity scorer configuration. `None` disables all fidelity scoring for this turn. + pub fidelity_config: Option<&'a FidelityConfig>, } /// Configuration extracted from `LearningEngine` needed by correction recall. diff --git a/crates/zeph-context/src/lib.rs b/crates/zeph-context/src/lib.rs index 7c2ddce57..15d61c430 100644 --- a/crates/zeph-context/src/lib.rs +++ b/crates/zeph-context/src/lib.rs @@ -25,6 +25,7 @@ pub mod assembler; pub mod budget; pub mod compression_feedback; pub mod error; +pub mod fidelity; pub mod input; pub mod manager; pub mod microcompact; diff --git a/crates/zeph-context/src/manager.rs b/crates/zeph-context/src/manager.rs index 517c9b4ed..c0552e32c 100644 --- a/crates/zeph-context/src/manager.rs +++ b/crates/zeph-context/src/manager.rs @@ -166,6 +166,10 @@ pub struct ContextManager { /// `None` = no hard compaction has occurred yet in this session. /// `Some(n)` = n turns have elapsed since the last hard compaction. pub(crate) turns_since_last_hard_compaction: Option, + /// Whether a proactive fidelity regrade has already fired this turn (INV-06). + /// + /// Reset to `false` by `advance_turn()` at each turn boundary. + pub(crate) regraded_this_turn: bool, } impl ContextManager { @@ -184,6 +188,7 @@ impl ContextManager { compaction: CompactionState::Ready, compaction_cooldown_turns: 2, turns_since_last_hard_compaction: None, + regraded_this_turn: false, } } @@ -350,6 +355,100 @@ impl ContextManager { self.turns_since_last_hard_compaction = value; } + /// Reset the per-turn regrade flag at the start of a new user turn. + /// + /// Must be called alongside `CompactionState::advance_turn()` at each turn boundary. + /// + /// # Examples + /// + /// ``` + /// use zeph_context::manager::ContextManager; + /// + /// let mut cm = ContextManager::new(); + /// cm.set_regraded_this_turn(true); + /// cm.advance_turn(); + /// // regraded_this_turn is reset to false — proactive regrade is available again + /// assert!(!cm.should_proactively_regrade(0, 0.6, false)); + /// ``` + pub fn advance_turn(&mut self) { + self.regraded_this_turn = false; + self.compaction = self.compaction.advance_turn(); + } + + /// Mark that a proactive fidelity regrade has fired this turn (INV-06). + /// + /// Called by the caller after `should_proactively_regrade` returns `true` and the scorer + /// has been applied. Prevents a second regrade in the same turn. + /// + /// # Examples + /// + /// ``` + /// use zeph_context::manager::ContextManager; + /// use zeph_context::budget::ContextBudget; + /// + /// let mut cm = ContextManager::new(); + /// cm.set_regraded_this_turn(true); + /// assert!(!cm.should_proactively_regrade(0, 0.6, false)); // guarded by regraded flag + /// cm.advance_turn(); + /// assert!(!cm.should_proactively_regrade(0, 0.6, false)); // resets after advance_turn + /// ``` + pub fn set_regraded_this_turn(&mut self, value: bool) { + self.regraded_this_turn = value; + } + + /// Whether a proactive fidelity regrade should fire for the current context state. + /// + /// Returns `true` only when all of the following hold: + /// 1. No regrade has fired this turn yet (`regraded_this_turn == false`). + /// 2. The compaction subsystem is not exhausted. + /// 3. If server compaction is active, budget usage is below 95%. + /// 4. Budget usage exceeds `regrade_threshold`. + /// + /// # Parameters + /// + /// - `cached_tokens` — current token count in the message window. + /// - `regrade_threshold` — fraction of max tokens at which regrade triggers (e.g. `0.6`). + /// - `server_compaction_active` — whether Claude server-side compaction is in use. + /// + /// # Examples + /// + /// ``` + /// use zeph_context::manager::ContextManager; + /// use zeph_context::budget::ContextBudget; + /// + /// let mut cm = ContextManager::new(); + /// cm.budget = Some(ContextBudget::new(100_000, 0.1)); + /// // At 70% budget with threshold 0.6 → should regrade. + /// assert!(cm.should_proactively_regrade(70_000, 0.6, false)); + /// ``` + #[must_use] + #[allow(clippy::cast_precision_loss)] + pub fn should_proactively_regrade( + &self, + cached_tokens: u64, + regrade_threshold: f32, + server_compaction_active: bool, + ) -> bool { + if self.regraded_this_turn { + return false; + } + if self.compaction.is_exhausted() { + return false; + } + let Some(ref budget) = self.budget else { + return false; + }; + let max = budget.max_tokens() as f64; + if max <= 0.0 { + return false; + } + let ratio = cached_tokens as f64 / max; + if server_compaction_active && ratio < 0.95 { + return false; + } + ratio > f64::from(regrade_threshold) + } + /// Will return `None` if compaction already happened this turn (CRIT-03 fix). #[must_use] pub fn should_proactively_compress(&self, current_tokens: u64) -> Option<(usize, usize)> { @@ -518,4 +617,81 @@ mod tests { // No budget set → cannot compute threshold → None. assert!(cm.should_proactively_compress(999_999).is_none()); } + + // AC-07: regraded_this_turn resets to false after advance_turn(). + #[test] + fn advance_turn_resets_regraded_this_turn() { + let mut cm = ContextManager::new(); + cm.regraded_this_turn = true; + cm.advance_turn(); + assert!( + !cm.regraded_this_turn, + "regraded_this_turn must reset after advance_turn" + ); + } + + // AC-08: should_proactively_regrade returns false if already regraded this turn. + #[test] + fn regrade_blocked_if_already_regraded_this_turn() { + let mut cm = ContextManager::new(); + cm.budget = Some(ContextBudget::new(100_000, 0.1)); + cm.regraded_this_turn = true; + assert!( + !cm.should_proactively_regrade(70_000, 0.6, false), + "must not regrade twice in the same turn" + ); + } + + #[test] + fn regrade_fires_above_threshold() { + let mut cm = ContextManager::new(); + cm.budget = Some(ContextBudget::new(100_000, 0.1)); + assert!( + cm.should_proactively_regrade(70_000, 0.6, false), + "must fire when budget ratio > threshold" + ); + } + + #[test] + fn regrade_does_not_fire_below_threshold() { + let mut cm = ContextManager::new(); + cm.budget = Some(ContextBudget::new(100_000, 0.1)); + assert!( + !cm.should_proactively_regrade(50_000, 0.6, false), + "must not fire when budget ratio <= threshold" + ); + } + + #[test] + fn regrade_blocked_when_exhausted() { + let mut cm = ContextManager::new(); + cm.budget = Some(ContextBudget::new(100_000, 0.1)); + cm.compaction = CompactionState::Exhausted { warned: false }; + assert!( + !cm.should_proactively_regrade(80_000, 0.6, false), + "must not fire when compaction is exhausted" + ); + } + + #[test] + fn regrade_blocked_by_server_compaction_at_sub_95() { + let mut cm = ContextManager::new(); + cm.budget = Some(ContextBudget::new(100_000, 0.1)); + // 80% budget, server_compaction_active=true → ratio < 0.95 → blocked. + assert!( + !cm.should_proactively_regrade(80_000, 0.6, true), + "must not fire with server compaction active below 95%" + ); + } + + #[test] + fn regrade_fires_with_server_compaction_at_95() { + let mut cm = ContextManager::new(); + cm.budget = Some(ContextBudget::new(100_000, 0.1)); + // 96% budget, server_compaction_active=true → ratio >= 0.95 → fires. + assert!( + cm.should_proactively_regrade(96_000, 0.6, true), + "must fire with server compaction active at >= 95%" + ); + } } diff --git a/crates/zeph-core/src/agent/builder.rs b/crates/zeph-core/src/agent/builder.rs index 61f08d44a..baf7789e4 100644 --- a/crates/zeph-core/src/agent/builder.rs +++ b/crates/zeph-core/src/agent/builder.rs @@ -2088,6 +2088,7 @@ impl Agent { recap, loop_min_interval_secs, goal_config, + fidelity_config, } = cfg; self.tool_orchestrator.apply_config( @@ -2171,6 +2172,7 @@ impl Agent { let turn_delay = tokio::time::Duration::from_millis(goal_config.autonomous_turn_delay_ms.max(1)); self.services.autonomous = crate::goal::AutonomousDriver::new(turn_delay); + self.services.memory.compaction.fidelity_config = fidelity_config; self.runtime.debug.reasoning_model_warning = anomaly_config.reasoning_model_warning; if anomaly_config.enabled { diff --git a/crates/zeph-core/src/agent/context/assembly.rs b/crates/zeph-core/src/agent/context/assembly.rs index 6d8915491..cd9b76959 100644 --- a/crates/zeph-core/src/agent/context/assembly.rs +++ b/crates/zeph-core/src/agent/context/assembly.rs @@ -127,6 +127,8 @@ impl Agent { persistence: None, metrics: None, typed_pages: None, + fidelity_config: self.services.memory.compaction.fidelity_config.clone(), + current_query: String::new(), } } @@ -460,6 +462,9 @@ impl Agent { .persistence .tiered_retrieval_validator .clone(), + fidelity_config: self.services.memory.compaction.fidelity_config.as_ref(), + planned_next_tools: &[], + status_tx: self.services.session.status_tx.clone(), }; let _ = self.channel.send_status("recalling context...").await; let result = svc.prepare_context(query, &mut window, &mut view).await; diff --git a/crates/zeph-core/src/agent/session_config.rs b/crates/zeph-core/src/agent/session_config.rs index d2bdf7436..2c9dc69cf 100644 --- a/crates/zeph-core/src/agent/session_config.rs +++ b/crates/zeph-core/src/agent/session_config.rs @@ -123,6 +123,9 @@ pub struct AgentSessionConfig { /// the wrapper prevents accidental duplication. Iteration produces new `Secret` /// values via `Secret::new(v.expose())` on the consumption side. pub secrets: Arc<[(String, Secret)]>, + + /// CAM fidelity scoring configuration (#4547). `None` → scoring disabled. + pub fidelity_config: Option, } impl AgentSessionConfig { @@ -193,6 +196,15 @@ impl AgentSessionConfig { recap: config.session.recap.clone(), loop_min_interval_secs: config.cli.loop_.min_interval_secs, goal_config: config.goals.clone(), + fidelity_config: { + let fc = config.memory.fidelity.clone(); + if let Some(ref cfg) = fc + && let Err(e) = cfg.validate() + { + tracing::warn!("fidelity config invalid, scoring disabled: {e}"); + } + fc + }, } } } @@ -295,5 +307,9 @@ mod tests { assert_eq!(sc.recap.max_tokens, config.session.recap.max_tokens); assert_eq!(sc.goal_config.enabled, config.goals.enabled); assert_eq!(sc.goal_config.max_text_chars, config.goals.max_text_chars); + assert_eq!( + sc.fidelity_config.is_some(), + config.memory.fidelity.is_some() + ); } } diff --git a/crates/zeph-core/src/agent/state/compaction.rs b/crates/zeph-core/src/agent/state/compaction.rs index a0f166ea6..31d8d147d 100644 --- a/crates/zeph-core/src/agent/state/compaction.rs +++ b/crates/zeph-core/src/agent/state/compaction.rs @@ -43,6 +43,10 @@ pub(crate) struct MemoryCompactionState { pub(crate) context_strategy: crate::config::ContextStrategy, /// Turn threshold for `Adaptive` strategy crossover (#2288). pub(crate) crossover_turn_threshold: u32, + /// CAM fidelity scoring configuration (#4547). + /// + /// `None` means fidelity scoring is not configured (disabled). + pub(crate) fidelity_config: Option, } impl Default for MemoryCompactionState { @@ -62,6 +66,7 @@ impl Default for MemoryCompactionState { cached_session_digest: None, context_strategy: crate::config::ContextStrategy::default(), crossover_turn_threshold: 20, + fidelity_config: None, } } } diff --git a/crates/zeph-llm/src/provider.rs b/crates/zeph-llm/src/provider.rs index 32505b6eb..2e8605971 100644 --- a/crates/zeph-llm/src/provider.rs +++ b/crates/zeph-llm/src/provider.rs @@ -468,6 +468,12 @@ pub struct MessageMetadata { /// Never serialized — always re-populated from the database on load. #[serde(skip)] pub db_id: Option, + /// Fidelity level assigned by `FidelityScorer` during context assembly. + /// + /// `None` when fidelity scoring is disabled or the message has not yet been scored. + /// Used for debug tracing and compaction input filtering (INV-02). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub fidelity_tag: Option, } impl Default for MessageMetadata { @@ -479,6 +485,7 @@ impl Default for MessageMetadata { focus_pinned: false, focus_marker_id: None, db_id: None, + fidelity_tag: None, } } } @@ -494,6 +501,7 @@ impl MessageMetadata { focus_pinned: false, focus_marker_id: None, db_id: None, + fidelity_tag: None, } } @@ -507,6 +515,7 @@ impl MessageMetadata { focus_pinned: false, focus_marker_id: None, db_id: None, + fidelity_tag: None, } } @@ -520,6 +529,7 @@ impl MessageMetadata { focus_pinned: true, focus_marker_id: None, db_id: None, + fidelity_tag: None, } } } diff --git a/crates/zeph-memory/src/store/messages/mod.rs b/crates/zeph-memory/src/store/messages/mod.rs index bac3e4233..59ab01c14 100644 --- a/crates/zeph-memory/src/store/messages/mod.rs +++ b/crates/zeph-memory/src/store/messages/mod.rs @@ -291,6 +291,7 @@ impl SqliteStore { focus_pinned: false, focus_marker_id: None, db_id: Some(row_id), + fidelity_tag: None, }, } }) @@ -353,6 +354,7 @@ impl SqliteStore { focus_pinned: false, focus_marker_id: None, db_id: Some(row_id), + fidelity_tag: None, }, } }) @@ -540,6 +542,7 @@ impl SqliteStore { focus_pinned: false, focus_marker_id: None, db_id: None, + fidelity_tag: None, }, } })) diff --git a/src/init/memory.rs b/src/init/memory.rs index 8187624a9..79cede712 100644 --- a/src/init/memory.rs +++ b/src/init/memory.rs @@ -192,6 +192,14 @@ pub(super) fn step_memory(state: &mut WizardState) -> anyhow::Result<()> { .interact()?; strategy_options[strategy_idx].clone_into(&mut state.context_strategy); + state.fidelity_enabled = Confirm::new() + .with_prompt( + "Enable Context-Adaptive Memory (CAM) fidelity scoring? (assigns Full/Compressed/\ + Placeholder levels to historical messages to reduce context token usage)", + ) + .default(false) + .interact()?; + println!(); Ok(()) } diff --git a/src/init/mod.rs b/src/init/mod.rs index ecbd5c125..40e421837 100644 --- a/src/init/mod.rs +++ b/src/init/mod.rs @@ -248,6 +248,9 @@ pub(crate) struct WizardState { pub(crate) cocoon_client_url: Option, /// `true` when the user confirmed they have an access hash stored in the vault. pub(crate) cocoon_wants_access_hash: bool, + // CAM fidelity (#4547) + /// Enable heuristic fidelity scoring (Full/Compressed/Placeholder). + pub(crate) fidelity_enabled: bool, } impl Default for WizardState { @@ -421,6 +424,7 @@ impl Default for WizardState { gonka_nodes: Vec::new(), cocoon_client_url: None, cocoon_wants_access_hash: false, + fidelity_enabled: false, } } } @@ -780,6 +784,13 @@ pub(crate) fn build_config(state: &WizardState) -> Config { _ => zeph_core::config::ContextStrategy::FullHistory, }; + if state.fidelity_enabled { + config.memory.fidelity = Some(zeph_config::FidelityConfig { + enabled: true, + ..Default::default() + }); + } + // MM-F1/F2/F5 retrieval tuning defaults — no interactive question needed; // all fields have sensible defaults. Surfaced here per CLAUDE.md rule #4. println!(