mateaix · MISTLXC · Jun 26, 2026
diff --git a/mateclaw-server/src/main/java/vip/mate/agent/AgentGraphBuilder.java b/mateclaw-server/src/main/java/vip/mate/agent/AgentGraphBuilder.java
@@ -132,6 +132,11 @@ public class AgentGraphBuilder {
     private final vip.mate.goal.service.GoalFollowupService goalFollowupService;
     private final vip.mate.goal.config.GoalProperties goalProperties;
 
+    // Context Intelligence v2 components (design doc §10.2)
+    private final org.springframework.context.ApplicationEventPublisher eventPublisher;
+    private final vip.mate.context.intelligence.snapshot.EnvSnapshotStore envSnapshotStore;
+    private final vip.mate.context.intelligence.budget.TokenBudgetPlanner tokenBudgetPlanner;
+
     /**
      * Auto-grant resolver wired into the executor so an active
      * {@code mate_approval_grant} row can skip {@code createPending()} for matching
@@ -650,6 +655,9 @@ CompiledGraph buildPlanExecuteGraph(AgentToolSet toolSet, ChatModel chatModel, i
                     .addStrategy(MateClawStateKeys.LLM_CALL_COUNT, KeyStrategy.REPLACE)
                     .addStrategy(MateClawStateKeys.RUNTIME_MODEL_NAME, KeyStrategy.REPLACE)
                     .addStrategy(MateClawStateKeys.RUNTIME_PROVIDER_ID, KeyStrategy.REPLACE)
+                    // v1 Bug 3 fix: RUNTIME_MODEL_TYPE was missing registration, causing it to be lost on multi-node merges;
+                    // ContextProfile.fromModelType fell back to defaults. See design doc §10.3.
+                    .addStrategy(MateClawStateKeys.RUNTIME_MODEL_TYPE, KeyStrategy.REPLACE)
                     // SourceEvidenceLedger: ActionNode 把每轮 ToolResponse 抽取出的
                     // (sourcePaths, sourceSymbols, failedPaths) merge 进这个 ledger，
                     // 后续 ReasoningNode / FinalAnswerNode 调 validateAnswer 校验
@@ -856,7 +864,8 @@ CompiledGraph buildReActGraph(AgentToolSet toolSet, ChatModel chatModel, int max
             ReasoningNode reasoningNode = new ReasoningNode(chatModel, toolSet, reasoningEffort,
                     supportsReasoningEffort,
                     streamingHelper, conversationWindowManager, streamTracker, 0, wikiContextService,
-                    skillCatalogRenderer, toolDisclosureService, progressLedgerService);
+                    skillCatalogRenderer, toolDisclosureService, progressLedgerService,
+                    eventPublisher, envSnapshotStore, tokenBudgetPlanner);
             ActionNode actionNode = new ActionNode(executor, streamTracker);
             ObservationProcessor observationProcessor = new ObservationProcessor(graphObservationProperties);
             ObservationNode observationNode = new ObservationNode(observationProcessor, streamTracker);
@@ -937,6 +946,9 @@ CompiledGraph buildReActGraph(AgentToolSet toolSet, ChatModel chatModel, int max
                     .addStrategy(MateClawStateKeys.COMPLETION_TOKENS, KeyStrategy.REPLACE)
                     .addStrategy(MateClawStateKeys.RUNTIME_MODEL_NAME, KeyStrategy.REPLACE)
                     .addStrategy(MateClawStateKeys.RUNTIME_PROVIDER_ID, KeyStrategy.REPLACE)
+                    // v1 Bug 3 fix: RUNTIME_MODEL_TYPE was missing registration, causing it to be lost on multi-node merges;
+                    // ContextProfile.fromModelType fell back to defaults. See design doc §10.3.
+                    .addStrategy(MateClawStateKeys.RUNTIME_MODEL_TYPE, KeyStrategy.REPLACE)
                     // SourceEvidenceLedger: ActionNode 把每轮 ToolResponse 抽取出的
                     // (sourcePaths, sourceSymbols, failedPaths) merge 进这个 ledger，
                     // 后续 ReasoningNode / FinalAnswerNode 调 validateAnswer 校验

diff --git a/mateclaw-server/src/main/java/vip/mate/agent/context/LoopBudgetConfig.java b/mateclaw-server/src/main/java/vip/mate/agent/context/LoopBudgetConfig.java
@@ -1,5 +1,7 @@
 package vip.mate.agent.context;
 
+import vip.mate.context.intelligence.budget.TokenBudget;
+
 /**
  * Configuration for per-reasoning-loop message budgeting.
  *
@@ -137,4 +139,27 @@ public LoopBudgetConfig withReservedPrefixTokens(int reservedPrefixTokens) {
         return new LoopBudgetConfig(triggerTokens, keepTailTokens, minTailMessages,
                 tailSoftCeilingRatio, reservedPrefixTokens, targetMaxMessages);
     }
+
+    /**
+     * v2 factory method: derive LoopBudgetConfig from {@link TokenBudget} (design doc §5.5 / §10.1).
+     * <p>
+     * Replaces v1's fromAllocation. {@code triggerTokens} and {@code keepTailTokens} are taken directly from
+     * TokenBudget (computed multi-factorially by {@code TokenBudgetPlanner.plan}); other fields reuse
+     * the sensible defaults from {@link #forContext}.
+     * <p>
+     * <b>Boundary protection</b>: TokenBudget may produce values below MIN_TRIGGER_TOKENS /
+     * MIN_TAIL_TOKENS under very small windows (e.g. 4K); Math.max floors them here, and when tail ≥ trigger
+     * falls back to {@code trigger - MIN_TRIGGER_TOKENS} (same protection logic as {@link #forContext}).
+     *
+     * @param budget budget planned by v2 TokenBudgetPlanner (non-null)
+     * @return LoopBudgetConfig (validated via compact constructor)
+     */
+    public static LoopBudgetConfig fromBudget(TokenBudget budget) {
+        int trigger = Math.max(MIN_TRIGGER_TOKENS, budget.compactTriggerTokens());
+        int tail = Math.max(MIN_TAIL_TOKENS, budget.keepTailTokens());
+        if (tail >= trigger) {
+            tail = Math.max(MIN_TAIL_TOKENS, trigger - MIN_TRIGGER_TOKENS);
+        }
+        return new LoopBudgetConfig(trigger, tail, 4, 1.5, 0, 200);
+    }
 }
diff --git a/mateclaw-server/src/main/java/vip/mate/agent/graph/StateGraphReActAgent.java b/mateclaw-server/src/main/java/vip/mate/agent/graph/StateGraphReActAgent.java
@@ -524,6 +524,8 @@ private Map<String, Object> buildInitialState(String userMessage, String convers
         inputs.put(COMPLETION_TOKENS, 0);
         inputs.put(RUNTIME_MODEL_NAME, modelName != null ? modelName : "");
         inputs.put(RUNTIME_PROVIDER_ID, runtimeProviderId != null ? runtimeProviderId : "");
+        // Context Intelligence v2: inject modelType for ContextProfile.fromModelType() threshold check
+        inputs.put(RUNTIME_MODEL_TYPE, runtimeModelConfig != null && runtimeModelConfig.getModelType() != null ? runtimeModelConfig.getModelType() : "");
         inputs.put(TRACE_ID, UUID.randomUUID().toString().substring(0, 8));
 
         // Multimodal sidecar routing — null when the turn carries no media or

diff --git a/mateclaw-server/src/main/java/vip/mate/agent/graph/node/ReasoningNode.java b/mateclaw-server/src/main/java/vip/mate/agent/graph/node/ReasoningNode.java
@@ -15,6 +15,7 @@
 import org.springframework.ai.model.tool.ToolCallingChatOptions;
 import org.springframework.ai.openai.OpenAiChatOptions;
 import org.springframework.ai.tool.ToolCallback;
+import org.springframework.context.ApplicationEventPublisher;
 import org.springframework.util.StringUtils;
 import vip.mate.agent.AgentToolSet;
 import vip.mate.agent.GraphEventPublisher;
@@ -29,6 +30,12 @@
 import vip.mate.agent.graph.state.MateClawStateAccessor;
 import vip.mate.agent.graph.state.MateClawStateKeys;
 import vip.mate.agent.graph.state.SourceEvidenceLedger;
+import vip.mate.context.intelligence.budget.TokenBudget;
+import vip.mate.context.intelligence.budget.TokenBudgetPlanner;
+import vip.mate.context.intelligence.event.LlmOverflowSignal;
+import vip.mate.context.intelligence.event.LlmSuccessSignal;
+import vip.mate.context.intelligence.snapshot.EnvSnapshot;
+import vip.mate.context.intelligence.snapshot.EnvSnapshotStore;
 
 import vip.mate.channel.web.ChatStreamTracker;
 
@@ -350,6 +357,14 @@ private boolean hasWikiTool() {
      */
     private final vip.mate.agent.progress.ProgressLedgerService progressLedgerService;
 
+    /**
+     * Context Intelligence v2 components (design doc §10.1). All three can be null:
+     * legacy callers / test constructors that don't inject them fall back to the yml fallback in {@link #loopContextWindowTokens()}.
+     */
+    private final ApplicationEventPublisher eventPublisher;
+    private final EnvSnapshotStore envSnapshotStore;
+    private final TokenBudgetPlanner tokenBudgetPlanner;
+
     public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoningEffort,
                          NodeStreamingChatHelper streamingHelper,
                          ConversationWindowManager conversationWindowManager,
@@ -448,6 +463,31 @@ public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoning
                          vip.mate.skill.runtime.SkillCatalogRenderer skillCatalogRenderer,
                          vip.mate.tool.disclosure.ToolDisclosureService toolDisclosureService,
                          vip.mate.agent.progress.ProgressLedgerService progressLedgerService) {
+        this(chatModel, toolSet, reasoningEffort, supportsReasoningEffort, streamingHelper,
+                conversationWindowManager, streamTracker, maxOutputTokens, wikiContextService,
+                skillCatalogRenderer, toolDisclosureService, progressLedgerService,
+                null, null, null);
+    }
+
+    /**
+     * Context Intelligence v2 primary constructor (design doc §10.2).
+     * <p>
+     * Adds 3 parameters: {@code eventPublisher} / {@code envSnapshotStore} / {@code tokenBudgetPlanner}.
+     * All three can be null (legacy callers / test scenarios); when null, falls back to yml + hardcoded fallback,
+     * with behavior identical to v1.
+     */
+    public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoningEffort,
+                         boolean supportsReasoningEffort,
+                         NodeStreamingChatHelper streamingHelper,
+                         ConversationWindowManager conversationWindowManager,
+                         ChatStreamTracker streamTracker, int maxOutputTokens,
+                         vip.mate.wiki.service.WikiContextService wikiContextService,
+                         vip.mate.skill.runtime.SkillCatalogRenderer skillCatalogRenderer,
+                         vip.mate.tool.disclosure.ToolDisclosureService toolDisclosureService,
+                         vip.mate.agent.progress.ProgressLedgerService progressLedgerService,
+                         ApplicationEventPublisher eventPublisher,
+                         EnvSnapshotStore envSnapshotStore,
+                         TokenBudgetPlanner tokenBudgetPlanner) {
         this.chatModel = chatModel;
         this.toolSet = toolSet;
         this.toolCallbacks = toolSet.callbacks();
@@ -461,6 +501,9 @@ public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoning
         this.wikiContextService = wikiContextService;
         this.skillCatalogRenderer = skillCatalogRenderer;
         this.progressLedgerService = progressLedgerService;
+        this.eventPublisher = eventPublisher;
+        this.envSnapshotStore = envSnapshotStore;
+        this.tokenBudgetPlanner = tokenBudgetPlanner;
     }
 
     /**
@@ -477,6 +520,74 @@ private int loopContextWindowTokens() {
         return DEFAULT_LOOP_CONTEXT_WINDOW_TOKENS;
     }
 
+    /**
+     * Context Intelligence v2: build LoopBudgetConfig (design doc §5.5 / §10.1).
+     * <p>
+     * Three-tier fallback (§8.1):
+     * <ol>
+     *   <li>envSnapshotStore + tokenBudgetPlanner available and snapshot valid → multi-factor budget planning</li>
+     *   <li>snapshot empty (enabled=false or cold start) → fallback to yml loopContextWindowTokens()</li>
+     *   <li>yml also unconfigured → fallback to DEFAULT_LOOP_CONTEXT_WINDOW_TOKENS = 128_000</li>
+     * </ol>
+     * Any exception is caught and falls back to forContext; never affects the reasoning main flow.
+     */
+    private LoopBudgetConfig buildLoopBudgetConfig(
+            String providerId, String modelName,
+            int systemTokens, int toolsTokens, int reservedPrefixTokens) {
+        // Tier 1: attempt dynamic budget
+        if (envSnapshotStore != null && tokenBudgetPlanner != null) {
+            try {
+                EnvSnapshot snapshot = envSnapshotStore.get(providerId, modelName);
+                int fallbackWindow = loopContextWindowTokens();
+                TokenBudget budget = tokenBudgetPlanner.plan(snapshot, systemTokens, toolsTokens, fallbackWindow);
+                return LoopBudgetConfig.fromBudget(budget)
+                        .withReservedPrefixTokens(reservedPrefixTokens);
+            } catch (Exception e) {
+                log.debug("[ReasoningNode] Context Intelligence plan failed, falling back to forContext: {}",
+                        e.getMessage());
+            }
+        }
+        // Tier 2 + 3: yml / hardcoded fallback
+        return LoopBudgetConfig.forContext(loopContextWindowTokens())
+                .withReservedPrefixTokens(reservedPrefixTokens);
+    }
+
+    /**
+     * Context Intelligence v2: publish LLM overflow signal (design doc §5.3).
+     * <p>
+     * Synchronous event (@EventListener without @Async), ensures confidenceUpper is updated before PTL retry.
+     * Silently skipped when eventPublisher is null (v1 compatibility).
+     */
+    private void publishOverflowSignal(String provider, String modelName, String modelType,
+                                       int attemptedTokens, String traceId) {
+        if (eventPublisher == null) return;
+        try {
+            eventPublisher.publishEvent(new LlmOverflowSignal(
+                    provider, modelName, modelType, attemptedTokens, traceId));
+        } catch (Exception e) {
+            log.debug("[ReasoningNode] publishOverflowSignal failed (non-fatal): {}", e.getMessage());
+        }
+    }
+
+    /**
+     * Context Intelligence v2: publish LLM success signal (design doc §5.2).
+     * <p>
+     * Asynchronous event (@Async("signalExecutor")), does not block the reasoning thread.
+     * Silently skipped when eventPublisher is null (v1 compatibility).
+     */
+    private void publishSuccessSignal(String provider, String modelName, String modelType,
+                                      int promptTokens, int completionTokens,
+                                      long latencyMs, String traceId) {
+        if (eventPublisher == null) return;
+        try {
+            eventPublisher.publishEvent(new LlmSuccessSignal(
+                    provider, modelName, modelType,
+                    promptTokens, completionTokens, latencyMs, traceId));
+        } catch (Exception e) {
+            log.debug("[ReasoningNode] publishSuccessSignal failed (non-fatal): {}", e.getMessage());
+        }
+    }
+
     public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoningEffort,
                          NodeStreamingChatHelper streamingHelper,
                          ConversationWindowManager conversationWindowManager) {
@@ -511,6 +622,9 @@ public ReasoningNode(ChatModel chatModel, List<ToolCallback> toolCallbacks) {
         this.wikiContextService = null;
         this.skillCatalogRenderer = null;
         this.progressLedgerService = null;
+        this.eventPublisher = null;
+        this.envSnapshotStore = null;
+        this.tokenBudgetPlanner = null;
     }
 
     @Override
@@ -596,8 +710,15 @@ public Map<String, Object> apply(OverAllState state) throws Exception {
         int toolsTokens = TokenEstimator.estimateToolsTokens(toolCallbacks);
         int loopReservedPrefixTokens = systemTokens + toolsTokens
                 + maxOutputTokens + LOOP_PREFIX_AUXILIARY_RESERVE_TOKENS;
-        LoopBudgetConfig loopCfg = LoopBudgetConfig.forContext(loopContextWindowTokens())
-                .withReservedPrefixTokens(loopReservedPrefixTokens);
+
+        // Context Intelligence v2: prefer deriving budget from EnvSnapshot + TokenBudgetPlanner
+        // (design doc §5.5 / §10.1). When any of the three is null or snapshot is empty, falls back to
+        // the yml fallback LoopBudgetConfig.forContext(loopContextWindowTokens()).
+        String ctxRuntimeModelName = state.value(MateClawStateKeys.RUNTIME_MODEL_NAME, "");
+        String ctxRuntimeProviderId = state.value(MateClawStateKeys.RUNTIME_PROVIDER_ID, "");
+        LoopBudgetConfig loopCfg = buildLoopBudgetConfig(
+                ctxRuntimeProviderId, ctxRuntimeModelName,
+                systemTokens, toolsTokens, loopReservedPrefixTokens);
         LoopMessageBudgeter.Result budgeted = LOOP_BUDGETER.budget(messages, loopCfg);
         // Only log when the budget actually modified the list — a triggered-
         // but-no-op pass is normal (history fits comfortably under the tail
@@ -743,6 +864,11 @@ public Map<String, Object> apply(OverAllState state) throws Exception {
                 "llmCallCount", nextLlmCallCount
         ));
 
+        // Context Intelligence v2: wrap timing for LlmSuccessSignal.latencyMs
+        long llmCallStartMs = System.currentTimeMillis();
+        String ctxTraceId = state.value(MateClawStateKeys.TRACE_ID, "");
+        String ctxRuntimeModelType = state.value(MateClawStateKeys.RUNTIME_MODEL_TYPE, "");
+
         NodeStreamingChatHelper.StreamResult result;
         try {
             result = streamingHelper.streamCall(chatModel, prompt, conversationId, "reasoning");
@@ -751,6 +877,9 @@ public Map<String, Object> apply(OverAllState state) throws Exception {
             // Prompt 仍带 wiki / runtime context；早期的 tail-only 路径会把
             // wiki 段一起丢掉，重试后的 prompt 比原始更短少一层信息。
             if (result.isPromptTooLong() && conversationWindowManager != null) {
+                // Context Intelligence v2: publish overflow signal (synchronous, effective before retry)
+                publishOverflowSignal(ctxRuntimeProviderId, ctxRuntimeModelName, ctxRuntimeModelType,
+                        result.promptTokens(), ctxTraceId);
                 log.warn("[ReasoningNode] Prompt too long, attempting STRUCTURED compaction and retry");
 
                 // MateClawStateAccessor.agentId() returns String per state
@@ -843,6 +972,15 @@ public Map<String, Object> apply(OverAllState state) throws Exception {
                     .build();
         }
 
+        // Context Intelligence v2: publish success signal (asynchronous, does not block reasoning)
+        // Only published when the final result is non-PTL / non-user-stopped — PTL already published the overflow signal above,
+        // and user-stop carries no valid token information.
+        if (!result.isPromptTooLong() && !result.stopped()) {
+            long latencyMs = System.currentTimeMillis() - llmCallStartMs;
+            publishSuccessSignal(ctxRuntimeProviderId, ctxRuntimeModelName, ctxRuntimeModelType,
+                    result.promptTokens(), result.completionTokens(), latencyMs, ctxTraceId);
+        }
+
         // ======= 处理 StreamResult =======
 
         // 用户主动停止且有部分内容

diff --git a/mateclaw-server/src/main/java/vip/mate/agent/graph/plan/StateGraphPlanExecuteAgent.java b/mateclaw-server/src/main/java/vip/mate/agent/graph/plan/StateGraphPlanExecuteAgent.java
@@ -299,6 +299,8 @@ private Map<String, Object> buildInitialState(String userMessage, String convers
         inputs.put(MateClawStateKeys.COMPLETION_TOKENS, 0);
         inputs.put(MateClawStateKeys.RUNTIME_MODEL_NAME, modelName != null ? modelName : "");
         inputs.put(MateClawStateKeys.RUNTIME_PROVIDER_ID, runtimeProviderId != null ? runtimeProviderId : "");
+        // Context Intelligence v2: inject modelType for ContextProfile.fromModelType() threshold check
+        inputs.put(MateClawStateKeys.RUNTIME_MODEL_TYPE, runtimeModelConfig != null && runtimeModelConfig.getModelType() != null ? runtimeModelConfig.getModelType() : "");
         inputs.put(MateClawStateKeys.TRACE_ID, UUID.randomUUID().toString().substring(0, 8));
 
         if (currentTurn.routingDecision() != null

diff --git a/mateclaw-server/src/main/java/vip/mate/agent/graph/state/MateClawStateKeys.java b/mateclaw-server/src/main/java/vip/mate/agent/graph/state/MateClawStateKeys.java
@@ -159,6 +159,14 @@ private MateClawStateKeys() {
     // ===== 运行时模型快照（REPLACE 策略，buildInitialState 注入）=====
     public static final String RUNTIME_MODEL_NAME = "runtime_model_name";
     public static final String RUNTIME_PROVIDER_ID = "runtime_provider_id";
+    /**
+     * Runtime model type (chat / reasoning / embedding / ...), injected by buildInitialState.
+     * <p>
+     * Context Intelligence v2 uses {@code ContextProfile.fromModelType(modelType)}
+     * to decide whether to participate in dynamic window probing ({@code isDynamic() && hasTextContext()}).
+     * v1 failed to register this key, causing it to be lost on multi-node merges, and ContextProfile inference fell back to defaults.
+     */
+    public static final String RUNTIME_MODEL_TYPE = "runtime_model_type";
 
     // ===== RFC-052: Tool returnDirect 与数据隔离 =====