diff --git a/mateclaw-server/src/main/java/vip/mate/agent/AgentGraphBuilder.java b/mateclaw-server/src/main/java/vip/mate/agent/AgentGraphBuilder.java index a1222fa67..9b4181c0a 100644 --- a/mateclaw-server/src/main/java/vip/mate/agent/AgentGraphBuilder.java +++ b/mateclaw-server/src/main/java/vip/mate/agent/AgentGraphBuilder.java @@ -132,6 +132,11 @@ public class AgentGraphBuilder { private final vip.mate.goal.service.GoalFollowupService goalFollowupService; private final vip.mate.goal.config.GoalProperties goalProperties; + // Context Intelligence v2 components (design doc §10.2) + private final org.springframework.context.ApplicationEventPublisher eventPublisher; + private final vip.mate.context.intelligence.snapshot.EnvSnapshotStore envSnapshotStore; + private final vip.mate.context.intelligence.budget.TokenBudgetPlanner tokenBudgetPlanner; + /** * Auto-grant resolver wired into the executor so an active * {@code mate_approval_grant} row can skip {@code createPending()} for matching @@ -650,6 +655,9 @@ CompiledGraph buildPlanExecuteGraph(AgentToolSet toolSet, ChatModel chatModel, i .addStrategy(MateClawStateKeys.LLM_CALL_COUNT, KeyStrategy.REPLACE) .addStrategy(MateClawStateKeys.RUNTIME_MODEL_NAME, KeyStrategy.REPLACE) .addStrategy(MateClawStateKeys.RUNTIME_PROVIDER_ID, KeyStrategy.REPLACE) + // v1 Bug 3 fix: RUNTIME_MODEL_TYPE was missing registration, causing it to be lost on multi-node merges; + // ContextProfile.fromModelType fell back to defaults. See design doc §10.3. + .addStrategy(MateClawStateKeys.RUNTIME_MODEL_TYPE, KeyStrategy.REPLACE) // SourceEvidenceLedger: ActionNode 把每轮 ToolResponse 抽取出的 // (sourcePaths, sourceSymbols, failedPaths) merge 进这个 ledger, // 后续 ReasoningNode / FinalAnswerNode 调 validateAnswer 校验 @@ -856,7 +864,8 @@ CompiledGraph buildReActGraph(AgentToolSet toolSet, ChatModel chatModel, int max ReasoningNode reasoningNode = new ReasoningNode(chatModel, toolSet, reasoningEffort, supportsReasoningEffort, streamingHelper, conversationWindowManager, streamTracker, 0, wikiContextService, - skillCatalogRenderer, toolDisclosureService, progressLedgerService); + skillCatalogRenderer, toolDisclosureService, progressLedgerService, + eventPublisher, envSnapshotStore, tokenBudgetPlanner); ActionNode actionNode = new ActionNode(executor, streamTracker); ObservationProcessor observationProcessor = new ObservationProcessor(graphObservationProperties); ObservationNode observationNode = new ObservationNode(observationProcessor, streamTracker); @@ -937,6 +946,9 @@ CompiledGraph buildReActGraph(AgentToolSet toolSet, ChatModel chatModel, int max .addStrategy(MateClawStateKeys.COMPLETION_TOKENS, KeyStrategy.REPLACE) .addStrategy(MateClawStateKeys.RUNTIME_MODEL_NAME, KeyStrategy.REPLACE) .addStrategy(MateClawStateKeys.RUNTIME_PROVIDER_ID, KeyStrategy.REPLACE) + // v1 Bug 3 fix: RUNTIME_MODEL_TYPE was missing registration, causing it to be lost on multi-node merges; + // ContextProfile.fromModelType fell back to defaults. See design doc §10.3. + .addStrategy(MateClawStateKeys.RUNTIME_MODEL_TYPE, KeyStrategy.REPLACE) // SourceEvidenceLedger: ActionNode 把每轮 ToolResponse 抽取出的 // (sourcePaths, sourceSymbols, failedPaths) merge 进这个 ledger, // 后续 ReasoningNode / FinalAnswerNode 调 validateAnswer 校验 diff --git a/mateclaw-server/src/main/java/vip/mate/agent/context/LoopBudgetConfig.java b/mateclaw-server/src/main/java/vip/mate/agent/context/LoopBudgetConfig.java index 01228d6dd..f42fc5b48 100644 --- a/mateclaw-server/src/main/java/vip/mate/agent/context/LoopBudgetConfig.java +++ b/mateclaw-server/src/main/java/vip/mate/agent/context/LoopBudgetConfig.java @@ -1,5 +1,7 @@ package vip.mate.agent.context; +import vip.mate.context.intelligence.budget.TokenBudget; + /** * Configuration for per-reasoning-loop message budgeting. * @@ -137,4 +139,27 @@ public LoopBudgetConfig withReservedPrefixTokens(int reservedPrefixTokens) { return new LoopBudgetConfig(triggerTokens, keepTailTokens, minTailMessages, tailSoftCeilingRatio, reservedPrefixTokens, targetMaxMessages); } + + /** + * v2 factory method: derive LoopBudgetConfig from {@link TokenBudget} (design doc §5.5 / §10.1). + *
+ * Replaces v1's fromAllocation. {@code triggerTokens} and {@code keepTailTokens} are taken directly from + * TokenBudget (computed multi-factorially by {@code TokenBudgetPlanner.plan}); other fields reuse + * the sensible defaults from {@link #forContext}. + *
+ * Boundary protection: TokenBudget may produce values below MIN_TRIGGER_TOKENS /
+ * MIN_TAIL_TOKENS under very small windows (e.g. 4K); Math.max floors them here, and when tail ≥ trigger
+ * falls back to {@code trigger - MIN_TRIGGER_TOKENS} (same protection logic as {@link #forContext}).
+ *
+ * @param budget budget planned by v2 TokenBudgetPlanner (non-null)
+ * @return LoopBudgetConfig (validated via compact constructor)
+ */
+ public static LoopBudgetConfig fromBudget(TokenBudget budget) {
+ int trigger = Math.max(MIN_TRIGGER_TOKENS, budget.compactTriggerTokens());
+ int tail = Math.max(MIN_TAIL_TOKENS, budget.keepTailTokens());
+ if (tail >= trigger) {
+ tail = Math.max(MIN_TAIL_TOKENS, trigger - MIN_TRIGGER_TOKENS);
+ }
+ return new LoopBudgetConfig(trigger, tail, 4, 1.5, 0, 200);
+ }
}
diff --git a/mateclaw-server/src/main/java/vip/mate/agent/graph/StateGraphReActAgent.java b/mateclaw-server/src/main/java/vip/mate/agent/graph/StateGraphReActAgent.java
index 6e0fcd01a..7314e3594 100644
--- a/mateclaw-server/src/main/java/vip/mate/agent/graph/StateGraphReActAgent.java
+++ b/mateclaw-server/src/main/java/vip/mate/agent/graph/StateGraphReActAgent.java
@@ -524,6 +524,8 @@ private Map
+ * Adds 3 parameters: {@code eventPublisher} / {@code envSnapshotStore} / {@code tokenBudgetPlanner}.
+ * All three can be null (legacy callers / test scenarios); when null, falls back to yml + hardcoded fallback,
+ * with behavior identical to v1.
+ */
+ public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoningEffort,
+ boolean supportsReasoningEffort,
+ NodeStreamingChatHelper streamingHelper,
+ ConversationWindowManager conversationWindowManager,
+ ChatStreamTracker streamTracker, int maxOutputTokens,
+ vip.mate.wiki.service.WikiContextService wikiContextService,
+ vip.mate.skill.runtime.SkillCatalogRenderer skillCatalogRenderer,
+ vip.mate.tool.disclosure.ToolDisclosureService toolDisclosureService,
+ vip.mate.agent.progress.ProgressLedgerService progressLedgerService,
+ ApplicationEventPublisher eventPublisher,
+ EnvSnapshotStore envSnapshotStore,
+ TokenBudgetPlanner tokenBudgetPlanner) {
this.chatModel = chatModel;
this.toolSet = toolSet;
this.toolCallbacks = toolSet.callbacks();
@@ -461,6 +501,9 @@ public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoning
this.wikiContextService = wikiContextService;
this.skillCatalogRenderer = skillCatalogRenderer;
this.progressLedgerService = progressLedgerService;
+ this.eventPublisher = eventPublisher;
+ this.envSnapshotStore = envSnapshotStore;
+ this.tokenBudgetPlanner = tokenBudgetPlanner;
}
/**
@@ -477,6 +520,74 @@ private int loopContextWindowTokens() {
return DEFAULT_LOOP_CONTEXT_WINDOW_TOKENS;
}
+ /**
+ * Context Intelligence v2: build LoopBudgetConfig (design doc §5.5 / §10.1).
+ *
+ * Three-tier fallback (§8.1):
+ *
+ * Synchronous event (@EventListener without @Async), ensures confidenceUpper is updated before PTL retry.
+ * Silently skipped when eventPublisher is null (v1 compatibility).
+ */
+ private void publishOverflowSignal(String provider, String modelName, String modelType,
+ int attemptedTokens, String traceId) {
+ if (eventPublisher == null) return;
+ try {
+ eventPublisher.publishEvent(new LlmOverflowSignal(
+ provider, modelName, modelType, attemptedTokens, traceId));
+ } catch (Exception e) {
+ log.debug("[ReasoningNode] publishOverflowSignal failed (non-fatal): {}", e.getMessage());
+ }
+ }
+
+ /**
+ * Context Intelligence v2: publish LLM success signal (design doc §5.2).
+ *
+ * Asynchronous event (@Async("signalExecutor")), does not block the reasoning thread.
+ * Silently skipped when eventPublisher is null (v1 compatibility).
+ */
+ private void publishSuccessSignal(String provider, String modelName, String modelType,
+ int promptTokens, int completionTokens,
+ long latencyMs, String traceId) {
+ if (eventPublisher == null) return;
+ try {
+ eventPublisher.publishEvent(new LlmSuccessSignal(
+ provider, modelName, modelType,
+ promptTokens, completionTokens, latencyMs, traceId));
+ } catch (Exception e) {
+ log.debug("[ReasoningNode] publishSuccessSignal failed (non-fatal): {}", e.getMessage());
+ }
+ }
+
public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoningEffort,
NodeStreamingChatHelper streamingHelper,
ConversationWindowManager conversationWindowManager) {
@@ -511,6 +622,9 @@ public ReasoningNode(ChatModel chatModel, List
+ * Context Intelligence v2 uses {@code ContextProfile.fromModelType(modelType)}
+ * to decide whether to participate in dynamic window probing ({@code isDynamic() && hasTextContext()}).
+ * v1 failed to register this key, causing it to be lost on multi-node merges, and ContextProfile inference fell back to defaults.
+ */
+ public static final String RUNTIME_MODEL_TYPE = "runtime_model_type";
// ===== RFC-052: Tool returnDirect 与数据隔离 =====
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/ContextIntelligenceAutoConfiguration.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/ContextIntelligenceAutoConfiguration.java
new file mode 100644
index 000000000..fe5fbf4bb
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/ContextIntelligenceAutoConfiguration.java
@@ -0,0 +1,68 @@
+package vip.mate.context.intelligence;
+
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.boot.context.properties.EnableConfigurationProperties;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import vip.mate.context.intelligence.config.ContextIntelligenceProperties;
+import vip.mate.context.intelligence.listener.ContextSignalProcessor;
+import vip.mate.context.intelligence.metrics.ContextIntelMetrics;
+import vip.mate.context.intelligence.perception.BackendDiversityRegistry;
+import vip.mate.context.intelligence.perception.PressureInferencer;
+import vip.mate.context.intelligence.persist.PersistRetryQueue;
+import vip.mate.context.intelligence.persist.WindowStateRepository;
+import vip.mate.context.intelligence.probe.WindowProbeRegistry;
+import vip.mate.context.intelligence.snapshot.EnvSnapshotStore;
+
+/**
+ * Context Intelligence v2 auto-configuration entry.
+ *
+ * Assembly strategy (following project conventions, see {@code HookAutoConfiguration} / {@code MemoryAutoConfiguration}):
+ * Default enabled=false (design doc §11 Phase 1: new module coexists without affecting the existing system).
+ * After integration is complete, explicitly enable it via {@code application.yml}.
+ * Registered only when {@code mateclaw.context.intelligence.enabled=true}.
+ * When disabled, no events are published and no listeners exist; the read path relies on EnvSnapshotStore returning EMPTY to trigger fallback.
+ */
+ @Bean
+ @ConditionalOnProperty(name = "mateclaw.context.intelligence.enabled", havingValue = "true")
+ public ContextSignalProcessor contextSignalProcessor(
+ WindowProbeRegistry windowProbeRegistry,
+ BackendDiversityRegistry diversityRegistry,
+ PressureInferencer pressureInferencer,
+ EnvSnapshotStore envSnapshotStore,
+ WindowStateRepository windowStateRepository,
+ PersistRetryQueue persistRetryQueue,
+ ContextIntelMetrics metrics) {
+ return new ContextSignalProcessor(
+ windowProbeRegistry,
+ diversityRegistry,
+ pressureInferencer,
+ envSnapshotStore,
+ windowStateRepository,
+ persistRetryQueue,
+ metrics);
+ }
+}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/budget/TokenBudget.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/budget/TokenBudget.java
new file mode 100644
index 000000000..650951ba5
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/budget/TokenBudget.java
@@ -0,0 +1,35 @@
+package vip.mate.context.intelligence.budget;
+
+/**
+ * Token budget (return value of {@link TokenBudgetPlanner}).
+ *
+ * Computed by the multi-factor budget planner from effectiveWindow + pressure + diversity.
+ *
+ * @param historyBudget available tokens for history messages
+ * @param injectionBudget available tokens for injected content (memory/wiki/tool)
+ * @param compactTriggerTokens threshold that triggers compaction
+ * @param keepTailTokens tail-retained tokens
+ * @param trace decision trace (for monitoring, nullable)
+ * @author MateClaw Team
+ */
+public record TokenBudget(
+ int historyBudget,
+ int injectionBudget,
+ int compactTriggerTokens,
+ int keepTailTokens,
+ TokenBudgetTrace trace
+) {
+ /**
+ * fallback factory: derive from yml static values when the snapshot is empty.
+ *
+ * Ratios are consistent with v1: history 60% / injection 40% / compactTrigger 60% / keepTail 36%.
+ */
+ public static TokenBudget legacy(int fallbackWindow) {
+ return new TokenBudget(
+ (int) (fallbackWindow * 0.60), // historyBudget
+ (int) (fallbackWindow * 0.40), // injectionBudget
+ (int) (fallbackWindow * 0.60), // compactTrigger
+ (int) (fallbackWindow * 0.36), // keepTail
+ null); // no trace
+ }
+}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/budget/TokenBudgetPlanner.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/budget/TokenBudgetPlanner.java
new file mode 100644
index 000000000..dd8fa7cb1
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/budget/TokenBudgetPlanner.java
@@ -0,0 +1,130 @@
+package vip.mate.context.intelligence.budget;
+
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Component;
+import vip.mate.context.intelligence.config.ContextIntelligenceProperties;
+import vip.mate.context.intelligence.enums.ResourcePressure;
+import vip.mate.context.intelligence.snapshot.EnvSnapshot;
+
+/**
+ * Multi-factor budget planner (exit of the read path).
+ *
+ * Combines EnvSnapshot's effectiveWindow + pressure + diversity to compute the token budget
+ * for each round of the ReAct loop.
+ *
+ * Three-tier fallback (§8.1):
+ *
+ * Key design (§5.5 / C.5):
+ *
+ * Named {@code TokenBudgetTrace} rather than {@code BudgetTrace}, because the nested record
+ * {@code LoopMessageBudgeter.BudgetTrace} already exists; a top-level class with the same name
+ * would cause import ambiguity.
+ *
+ * @param baseWindow original effectiveWindow
+ * @param ceiledWindow window after applying the P10 ceiling
+ * @param safeWindowApplied safeWindow actually applied (0 means not applied)
+ * @param pressure pressure level at decision time
+ * @param historyRatio history ratio actually used
+ * @param systemTokens system message tokens
+ * @param toolsTokens tool declaration tokens
+ * @param outputReserve reserved output tokens
+ * @param legacyFallback whether the yml fallback was used
+ * @param phase WindowProbe phase at decision time (COLD/PROBING/...), null means unknown
+ * @param confidenceLower lower bound of the state machine confidence at decision time
+ * @param confidenceUpper upper bound of the state machine confidence at decision time
+ * @param reason decision path marker: normal / diversity_clamp / pressure_adjust / fallback_yml / fallback_hardcode
+ * @author MateClaw Team
+ */
+public record TokenBudgetTrace(
+ int baseWindow,
+ int ceiledWindow,
+ int safeWindowApplied,
+ ResourcePressure pressure,
+ double historyRatio,
+ int systemTokens,
+ int toolsTokens,
+ int outputReserve,
+ boolean legacyFallback,
+ String phase,
+ int confidenceLower,
+ int confidenceUpper,
+ String reason
+) {}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/config/ContextIntelligenceProperties.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/config/ContextIntelligenceProperties.java
new file mode 100644
index 000000000..baf5c04f0
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/config/ContextIntelligenceProperties.java
@@ -0,0 +1,217 @@
+package vip.mate.context.intelligence.config;
+
+import org.springframework.boot.context.properties.ConfigurationProperties;
+
+/**
+ * Context Intelligence module configuration properties.
+ *
+ * See design doc Appendix C.9 for full configuration (authoritative config).
+ *
+ * @author MateClaw Team
+ */
+@ConfigurationProperties(prefix = "mateclaw.context.intelligence")
+public class ContextIntelligenceProperties {
+
+ /** Module master switch; when false, the read path falls back to yml + hardcoded values */
+ private boolean enabled = false;
+
+ /** Backend diversity detection switch: auto (auto-detect) / off (disabled) */
+ private String diversityDetection = "auto";
+
+ /** Pressure inference switch */
+ private boolean pressureInference = true;
+
+ /** DB persistence switch */
+ private boolean dbPersist = true;
+
+ /** Dedicated thread pool config */
+ private Executor executor = new Executor();
+
+ /** Window probe state machine parameters */
+ private Probe probe = new Probe();
+
+ /** Diversity detection parameters */
+ private Diversity diversity = new Diversity();
+
+ /** Pressure inference parameters */
+ private Pressure pressure = new Pressure();
+
+ /** Budget allocation ratios */
+ private Budget budget = new Budget();
+
+ /** Idle model cleanup */
+ private AutoClean autoClean = new AutoClean();
+
+ /** Metrics monitoring */
+ private Metrics metrics = new Metrics();
+
+ // --- getters/setters ---
+
+ public boolean isEnabled() { return enabled; }
+ public void setEnabled(boolean enabled) { this.enabled = enabled; }
+
+ public String getDiversityDetection() { return diversityDetection; }
+ public void setDiversityDetection(String diversityDetection) { this.diversityDetection = diversityDetection; }
+
+ public boolean isPressureInference() { return pressureInference; }
+ public void setPressureInference(boolean pressureInference) { this.pressureInference = pressureInference; }
+
+ public boolean isDbPersist() { return dbPersist; }
+ public void setDbPersist(boolean dbPersist) { this.dbPersist = dbPersist; }
+
+ public Executor getExecutor() { return executor; }
+ public void setExecutor(Executor executor) { this.executor = executor; }
+
+ public Probe getProbe() { return probe; }
+ public void setProbe(Probe probe) { this.probe = probe; }
+
+ public Diversity getDiversity() { return diversity; }
+ public void setDiversity(Diversity diversity) { this.diversity = diversity; }
+
+ public Pressure getPressure() { return pressure; }
+ public void setPressure(Pressure pressure) { this.pressure = pressure; }
+
+ public Budget getBudget() { return budget; }
+ public void setBudget(Budget budget) { this.budget = budget; }
+
+ public AutoClean getAutoClean() { return autoClean; }
+ public void setAutoClean(AutoClean autoClean) { this.autoClean = autoClean; }
+
+ public Metrics getMetrics() { return metrics; }
+ public void setMetrics(Metrics metrics) { this.metrics = metrics; }
+
+ // --- nested classes ---
+
+ public static class Executor {
+ private int coreSize = 8;
+ private int maxQueueSize = 500;
+
+ public int getCoreSize() { return coreSize; }
+ public void setCoreSize(int coreSize) { this.coreSize = coreSize; }
+
+ public int getMaxQueueSize() { return maxQueueSize; }
+ public void setMaxQueueSize(int maxQueueSize) { this.maxQueueSize = maxQueueSize; }
+ }
+
+ public static class Probe {
+ private int sampleSize = 20;
+ private double overflowShrinkRatio = 0.85;
+ private double binaryConvergence = 0.10;
+ private long staleReprobeMs = 1_800_000;
+ private int coldSeedFallback = 32_768;
+ private int globalCeiling = 2_000_000;
+ private long stablePersistIntervalMs = 180_000;
+
+ public int getSampleSize() { return sampleSize; }
+ public void setSampleSize(int sampleSize) { this.sampleSize = sampleSize; }
+
+ public double getOverflowShrinkRatio() { return overflowShrinkRatio; }
+ public void setOverflowShrinkRatio(double overflowShrinkRatio) { this.overflowShrinkRatio = overflowShrinkRatio; }
+
+ public double getBinaryConvergence() { return binaryConvergence; }
+ public void setBinaryConvergence(double binaryConvergence) { this.binaryConvergence = binaryConvergence; }
+
+ public long getStaleReprobeMs() { return staleReprobeMs; }
+ public void setStaleReprobeMs(long staleReprobeMs) { this.staleReprobeMs = staleReprobeMs; }
+
+ public int getColdSeedFallback() { return coldSeedFallback; }
+ public void setColdSeedFallback(int coldSeedFallback) { this.coldSeedFallback = coldSeedFallback; }
+
+ public int getGlobalCeiling() { return globalCeiling; }
+ public void setGlobalCeiling(int globalCeiling) { this.globalCeiling = globalCeiling; }
+
+ public long getStablePersistIntervalMs() { return stablePersistIntervalMs; }
+ public void setStablePersistIntervalMs(long stablePersistIntervalMs) { this.stablePersistIntervalMs = stablePersistIntervalMs; }
+ }
+
+ public static class Diversity {
+ private int maxObservations = 500;
+ private int safePercentile = 10;
+ private double successVarianceThreshold = 5.0;
+ private double overflowRatioThreshold = 3.0;
+ private int decayHours = 24;
+
+ public int getMaxObservations() { return maxObservations; }
+ public void setMaxObservations(int maxObservations) { this.maxObservations = maxObservations; }
+
+ public int getSafePercentile() { return safePercentile; }
+ public void setSafePercentile(int safePercentile) { this.safePercentile = safePercentile; }
+
+ public double getSuccessVarianceThreshold() { return successVarianceThreshold; }
+ public void setSuccessVarianceThreshold(double successVarianceThreshold) { this.successVarianceThreshold = successVarianceThreshold; }
+
+ public double getOverflowRatioThreshold() { return overflowRatioThreshold; }
+ public void setOverflowRatioThreshold(double overflowRatioThreshold) { this.overflowRatioThreshold = overflowRatioThreshold; }
+
+ public int getDecayHours() { return decayHours; }
+ public void setDecayHours(int decayHours) { this.decayHours = decayHours; }
+ }
+
+ public static class Pressure {
+ private int successDegradeStep = 5;
+ private int overflowEscalateThreshold = 3;
+ private long latencyEscalateMs = 30_000;
+ private int minLatencySamples = 3;
+
+ public int getSuccessDegradeStep() { return successDegradeStep; }
+ public void setSuccessDegradeStep(int successDegradeStep) { this.successDegradeStep = successDegradeStep; }
+
+ public int getOverflowEscalateThreshold() { return overflowEscalateThreshold; }
+ public void setOverflowEscalateThreshold(int overflowEscalateThreshold) { this.overflowEscalateThreshold = overflowEscalateThreshold; }
+
+ public long getLatencyEscalateMs() { return latencyEscalateMs; }
+ public void setLatencyEscalateMs(long latencyEscalateMs) { this.latencyEscalateMs = latencyEscalateMs; }
+
+ public int getMinLatencySamples() { return minLatencySamples; }
+ public void setMinLatencySamples(int minLatencySamples) { this.minLatencySamples = minLatencySamples; }
+ }
+
+ public static class Budget {
+ private double normalHistoryRatio = 0.60;
+ private double elevatedHistoryRatio = 0.70;
+ private double highHistoryRatio = 0.80;
+ private double criticalHistoryRatio = 0.85;
+ private double outputReserveRatio = 0.15;
+ private double keepTailRatio = 0.60;
+
+ public double getNormalHistoryRatio() { return normalHistoryRatio; }
+ public void setNormalHistoryRatio(double v) { this.normalHistoryRatio = v; }
+
+ public double getElevatedHistoryRatio() { return elevatedHistoryRatio; }
+ public void setElevatedHistoryRatio(double v) { this.elevatedHistoryRatio = v; }
+
+ public double getHighHistoryRatio() { return highHistoryRatio; }
+ public void setHighHistoryRatio(double v) { this.highHistoryRatio = v; }
+
+ public double getCriticalHistoryRatio() { return criticalHistoryRatio; }
+ public void setCriticalHistoryRatio(double v) { this.criticalHistoryRatio = v; }
+
+ public double getOutputReserveRatio() { return outputReserveRatio; }
+ public void setOutputReserveRatio(double v) { this.outputReserveRatio = v; }
+
+ public double getKeepTailRatio() { return keepTailRatio; }
+ public void setKeepTailRatio(double v) { this.keepTailRatio = v; }
+ }
+
+ public static class AutoClean {
+ private boolean enabled = true;
+ private long idleThresholdMs = 3_600_000;
+ private long cleanIntervalMs = 3_600_000;
+
+ public boolean isEnabled() { return enabled; }
+ public void setEnabled(boolean enabled) { this.enabled = enabled; }
+
+ public long getIdleThresholdMs() { return idleThresholdMs; }
+ public void setIdleThresholdMs(long idleThresholdMs) { this.idleThresholdMs = idleThresholdMs; }
+
+ public long getCleanIntervalMs() { return cleanIntervalMs; }
+ public void setCleanIntervalMs(long cleanIntervalMs) { this.cleanIntervalMs = cleanIntervalMs; }
+ }
+
+ public static class Metrics {
+ private boolean enabled = true;
+
+ public boolean isEnabled() { return enabled; }
+ public void setEnabled(boolean enabled) { this.enabled = enabled; }
+ }
+}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/config/SignalExecutorConfig.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/config/SignalExecutorConfig.java
new file mode 100644
index 000000000..deef6490a
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/config/SignalExecutorConfig.java
@@ -0,0 +1,43 @@
+package vip.mate.context.intelligence.config;
+
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
+
+import java.util.concurrent.ThreadPoolExecutor;
+
+/**
+ * Dedicated thread pool config for signal processing.
+ *
+ * Provides an independent thread pool for {@code @Async("signalExecutor")},
+ * isolated from the virtual thread executor of the global {@code AsyncSecurityConfig},
+ * to prevent signal processing tasks from crowding out business async task quota.
+ *
+ * Rejection policy uses {@code CallerRunsPolicy}: when the queue is full, falls back to the caller thread
+ * (reactor thread); losing a signal is not fatal (the next one will arrive), but fallback is safer than discard.
+ *
+ * Created only when {@code mateclaw.context.intelligence.enabled=true};
+ * when disabled there are no listeners and thus no need for a thread pool.
+ *
+ * @author MateClaw Team
+ */
+@Configuration
+public class SignalExecutorConfig {
+
+ @Bean("signalExecutor")
+ @ConditionalOnProperty(name = "mateclaw.context.intelligence.enabled", havingValue = "true")
+ public ThreadPoolTaskExecutor signalExecutor(ContextIntelligenceProperties props) {
+ ThreadPoolTaskExecutor ex = new ThreadPoolTaskExecutor();
+ int core = props.getExecutor().getCoreSize();
+ ex.setCorePoolSize(core);
+ ex.setMaxPoolSize(core * 2);
+ ex.setQueueCapacity(props.getExecutor().getMaxQueueSize());
+ ex.setThreadNamePrefix("ctx-signal-");
+ ex.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
+ ex.setWaitForTasksToCompleteOnShutdown(true);
+ ex.setAwaitTerminationSeconds(10);
+ ex.initialize();
+ return ex;
+ }
+}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/enums/ContextProfile.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/enums/ContextProfile.java
new file mode 100644
index 000000000..6d1f7e3af
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/enums/ContextProfile.java
@@ -0,0 +1,94 @@
+package vip.mate.context.intelligence.enums;
+
+/**
+ * Model context profile enum.
+ *
+ * Used for gating: {@code profile.isDynamic() && profile.hasTextContext()} determines whether
+ * a model participates in context intelligence (dynamic window probing + budget planning).
+ *
+ * The v2 initial release does not implement modelType routing (chat vs reasoning allocation ratio
+ * differences); it is only used to determine whether context intelligence is enabled.
+ *
+ * @author MateClaw Team
+ */
+public enum ContextProfile {
+
+ /** Dynamic chat model, participates in context intelligence */
+ DYNAMIC_CHAT(true, true),
+
+ /** Dynamic reasoning model, participates in context intelligence */
+ DYNAMIC_REASONING(true, true),
+
+ /** Fixed-window chat model, does not participate in dynamic probing */
+ FIXED_CHAT(false, true),
+
+ /** Fixed-window reasoning model, does not participate in dynamic probing */
+ FIXED_REASONING(false, true),
+
+ /** Embedding model, no text context */
+ FIXED_EMBEDDING(false, false),
+
+ /** Rerank model, no text context */
+ FIXED_RERANK(false, false),
+
+ /** Audio model */
+ AUDIO(false, false),
+
+ /** Image generation model */
+ IMAGE_GEN(false, false),
+
+ /** Image understanding model */
+ IMAGE_VISION(false, false),
+
+ /** Video model */
+ VIDEO(false, false),
+
+ /** Not applicable */
+ NOT_APPLICABLE(false, false);
+
+ private final boolean dynamic;
+ private final boolean textContext;
+
+ ContextProfile(boolean dynamic, boolean textContext) {
+ this.dynamic = dynamic;
+ this.textContext = textContext;
+ }
+
+ /** Whether this is a dynamic-window model (participates in WindowProbe probing) */
+ public boolean isDynamic() {
+ return dynamic;
+ }
+
+ /** Whether the model has text context (participates in TokenBudgetPlanner budget planning) */
+ public boolean hasTextContext() {
+ return textContext;
+ }
+
+ /**
+ * Infer the profile from a modelType string.
+ *
+ * Common values: {@code chat} / {@code reasoning} / {@code code} / {@code embedding} /
+ * {@code rerank} / {@code audio} / {@code image_gen} / {@code image_vision} / {@code video}
+ *
+ * @param modelType model type string (from DB model_type or state RUNTIME_MODEL_TYPE)
+ * @return the corresponding ContextProfile, defaults to DYNAMIC_CHAT
+ */
+ public static ContextProfile fromModelType(String modelType) {
+ if (modelType == null || modelType.isBlank()) {
+ return DYNAMIC_CHAT;
+ }
+ String t = modelType.trim().toLowerCase();
+ return switch (t) {
+ case "chat" -> DYNAMIC_CHAT;
+ case "reasoning" -> DYNAMIC_REASONING;
+ case "code" -> DYNAMIC_CHAT;
+ case "embedding" -> FIXED_EMBEDDING;
+ case "rerank" -> FIXED_RERANK;
+ case "audio" -> AUDIO;
+ case "image_gen", "image-gen", "imagegen" -> IMAGE_GEN;
+ case "image_vision", "image-vision", "imagevision" -> IMAGE_VISION;
+ case "video" -> VIDEO;
+ default -> DYNAMIC_CHAT;
+ };
+ }
+}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/enums/ResourcePressure.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/enums/ResourcePressure.java
new file mode 100644
index 000000000..e242e7628
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/enums/ResourcePressure.java
@@ -0,0 +1,37 @@
+package vip.mate.context.intelligence.enums;
+
+/**
+ * Resource pressure level enum.
+ *
+ * Used by {@code PressureInferencer} to infer the current resource pressure of the LLM backend,
+ * which affects the history/injection allocation ratio in {@code TokenBudgetPlanner}.
+ *
+ * The numeric mapping is used for Micrometer Gauge reporting (0/1/2/3).
+ *
+ * @author MateClaw Team
+ */
+public enum ResourcePressure {
+
+ /** Normal pressure, history ratio 60% */
+ NORMAL(0),
+
+ /** Mild pressure, history ratio 70% */
+ ELEVATED(1),
+
+ /** High pressure, history ratio 80% */
+ HIGH(2),
+
+ /** Severe pressure, history ratio 85% */
+ CRITICAL(3);
+
+ private final int gaugeValue;
+
+ ResourcePressure(int gaugeValue) {
+ this.gaugeValue = gaugeValue;
+ }
+
+ /** Micrometer Gauge reporting value */
+ public int gaugeValue() {
+ return gaugeValue;
+ }
+}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/event/LlmOverflowSignal.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/event/LlmOverflowSignal.java
new file mode 100644
index 000000000..987aa9db6
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/event/LlmOverflowSignal.java
@@ -0,0 +1,26 @@
+package vip.mate.context.intelligence.event;
+
+/**
+ * LLM overflow call signal event.
+ *
+ * Published by {@code ReasoningNode} when {@code streamCall} returns {@code isPromptTooLong()=true},
+ * consumed synchronously via the Spring event bus (without {@code @Async}).
+ *
+ * Synchronous handling ensures {@code confidenceUpper} is updated before retry,
+ * which affects binary search convergence on consecutive PTL events.
+ *
+ * @param provider provider ID
+ * @param modelName model name used by the request
+ * @param modelType model type, e.g. "chat"/"reasoning"/"code"
+ * @param attemptedTokens estimated prompt token count before overflow
+ * @param traceId 8-char UUID prefix of the graph state
+ *
+ * @author MateClaw Team
+ */
+public record LlmOverflowSignal(
+ String provider,
+ String modelName,
+ String modelType,
+ int attemptedTokens,
+ String traceId
+) {}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/event/LlmSuccessSignal.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/event/LlmSuccessSignal.java
new file mode 100644
index 000000000..6fd272dc6
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/event/LlmSuccessSignal.java
@@ -0,0 +1,29 @@
+package vip.mate.context.intelligence.event;
+
+/**
+ * LLM successful call signal event.
+ *
+ * Published by {@code ReasoningNode} after each {@code streamCall} returns a non-overflow result,
+ * consumed asynchronously via the Spring event bus ({@code @Async("signalExecutor")}).
+ *
+ * Carries data used to update the window probe state machine, the backend diversity tracker, and the pressure inferencer.
+ *
+ * @param provider provider ID (from graph state RUNTIME_PROVIDER_ID)
+ * @param modelName model name used by the request (from graph state RUNTIME_MODEL_NAME)
+ * @param modelType model type, e.g. "chat"/"reasoning"/"code" (from graph state RUNTIME_MODEL_TYPE)
+ * @param promptTokens actual prompt token count returned by the API (from StreamResult.promptTokens())
+ * @param completionTokens completion token count returned by the API (from StreamResult.completionTokens())
+ * @param latencyMs total elapsed time of this streamCall (timed around the ReasoningNode call)
+ * @param traceId 8-char UUID prefix of the graph state (from MateClawStateKeys.TRACE_ID)
+ *
+ * @author MateClaw Team
+ */
+public record LlmSuccessSignal(
+ String provider,
+ String modelName,
+ String modelType,
+ int promptTokens,
+ int completionTokens,
+ long latencyMs,
+ String traceId
+) {}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/listener/ContextSignalProcessor.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/listener/ContextSignalProcessor.java
new file mode 100644
index 000000000..350a8d453
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/listener/ContextSignalProcessor.java
@@ -0,0 +1,198 @@
+package vip.mate.context.intelligence.listener;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.context.event.EventListener;
+import org.springframework.scheduling.annotation.Async;
+import vip.mate.context.intelligence.event.LlmOverflowSignal;
+import vip.mate.context.intelligence.event.LlmSuccessSignal;
+import vip.mate.context.intelligence.metrics.ContextIntelMetrics;
+import vip.mate.context.intelligence.perception.BackendDiversityRegistry;
+import vip.mate.context.intelligence.perception.BackendDiversityTracker;
+import vip.mate.context.intelligence.perception.PressureInferencer;
+import vip.mate.context.intelligence.persist.PersistRetryQueue;
+import vip.mate.context.intelligence.persist.WindowStateRepository;
+import vip.mate.context.intelligence.probe.ProbeUpdate;
+import vip.mate.context.intelligence.probe.WindowProbeRegistry;
+import vip.mate.context.intelligence.snapshot.EnvSnapshotStore;
+
+import java.time.Duration;
+
+/**
+ * Signal processor (write path entry).
+ *
+ * Listens to {@link LlmSuccessSignal} and {@link LlmOverflowSignal}, coordinating state machine + perception + snapshot + persistence.
+ *
+ * Success path (§5.2 / C.6): {@code @Async("signalExecutor")} asynchronous processing, does not block inference threads.
+ * Each sub-step has an independent try-catch, so one failure does not affect others.
+ *
+ * Overflow path (§5.3): synchronous processing (no @Async), ensuring confidenceUpper is updated before PTL retry.
+ *
+ * @author MateClaw Team
+ */
+@Slf4j
+@RequiredArgsConstructor
+public class ContextSignalProcessor {
+
+ private final WindowProbeRegistry windowProbeRegistry;
+ private final BackendDiversityRegistry diversityRegistry;
+ private final PressureInferencer pressureInferencer;
+ private final EnvSnapshotStore envSnapshotStore;
+ private final WindowStateRepository windowStateRepository;
+ private final PersistRetryQueue persistRetryQueue;
+ /** Observability metrics gateway (§C.7), internal null degradation, no caller null-check needed */
+ private final ContextIntelMetrics metrics;
+
+ // ==================== Success path (async) ====================
+
+ /**
+ * Handle success signal (§5.2 / C.6).
+ *
+ * Each sub-step has an independent try-catch, so one failure does not affect others.
+ */
+ @Async("signalExecutor")
+ @EventListener
+ public void onSuccess(LlmSuccessSignal event) {
+ long startNs = System.nanoTime();
+ try {
+ String key = WindowProbeRegistry.key(event.provider(), event.modelName());
+
+ // Step 1: WindowProbe state machine update
+ ProbeUpdate update = null;
+ try {
+ update = windowProbeRegistry.recordSuccess(key, event.promptTokens());
+ } catch (Exception e) {
+ log.debug("[ContextIntel] traceId={} WindowProbe recordSuccess failed: {}",
+ event.traceId(), e.getMessage());
+ // §C.7: probe update failed -> signal main effect lost, record as dropped
+ metrics.recordSignalDropped("probe_update_failed");
+ }
+
+ // Step 2: BackendDiversityTracker update
+ try {
+ diversityRegistry.recordSuccess(key, event.promptTokens());
+ } catch (Exception e) {
+ log.debug("[ContextIntel] traceId={} DiversityRegistry failed: {}",
+ event.traceId(), e.getMessage());
+ }
+
+ // Step 3: PressureInferencer update
+ try {
+ pressureInferencer.recordSuccess(event.latencyMs());
+ } catch (Exception e) {
+ log.debug("[ContextIntel] traceId={} PressureInferencer failed: {}",
+ event.traceId(), e.getMessage());
+ }
+
+ // Step 4+5: Refresh EnvSnapshot (only when value changes)
+ try {
+ if (update != null) {
+ BackendDiversityTracker tracker = diversityRegistry.get(key);
+ boolean diversity = (tracker != null) && tracker.isDiversityDetected();
+ int safeWindow = (tracker != null && diversity) ? tracker.computeSafeWindow() : 0;
+ envSnapshotStore.refreshIfChanged(key,
+ update.snapshot().effectiveWindow(),
+ pressureInferencer.currentPressure(),
+ diversity,
+ safeWindow);
+ }
+ } catch (Exception e) {
+ log.debug("[ContextIntel] traceId={} EnvSnapshot refresh failed: {}",
+ event.traceId(), e.getMessage());
+ }
+
+ // Step 6: DB persist (only on phase change or first success)
+ if (update != null && (update.phaseChanged() || update.snapshot().totalSuccess() == 1)) {
+ try {
+ windowStateRepository.persist(key, update.snapshot());
+ } catch (Exception e) {
+ log.debug("[ContextIntel] traceId={} DB persist failed: {}",
+ event.traceId(), e.getMessage());
+ }
+ }
+
+ // Step 7: Retry queue
+ try {
+ persistRetryQueue.retryOnce();
+ } catch (Exception e) {
+ log.debug("[ContextIntel] traceId={} retryOnce failed: {}",
+ event.traceId(), e.getMessage());
+ }
+ } finally {
+ // §C.7: total duration of success signal processing
+ metrics.recordSuccessProcess(Duration.ofNanos(System.nanoTime() - startNs));
+ }
+ }
+
+ // ==================== Overflow path (sync) ====================
+
+ /**
+ * Handle overflow signal (§5.3).
+ *
+ * Synchronous execution (no @Async), ensuring confidenceUpper is updated before PTL retry.
+ * Fully wrapped in try-catch; exceptions only logged at debug level (risk 1 mitigation).
+ */
+ @EventListener
+ public void onOverflow(LlmOverflowSignal event) {
+ long startNs = System.nanoTime();
+ try {
+ String key = WindowProbeRegistry.key(event.provider(), event.modelName());
+
+ try {
+ // Step 1: WindowProbe state machine update (overflow shrinkage)
+ ProbeUpdate update = windowProbeRegistry.recordOverflow(key, event.attemptedTokens());
+
+ // Step 2: BackendDiversityTracker update (v1 Bug 2 fix)
+ try {
+ diversityRegistry.recordOverflow(key, event.attemptedTokens());
+ } catch (Exception e) {
+ log.debug("[ContextIntel] traceId={} DiversityRegistry.recordOverflow failed: {}",
+ event.traceId(), e.getMessage());
+ }
+
+ // Step 3: PressureInferencer update
+ try {
+ pressureInferencer.recordOverflow();
+ } catch (Exception e) {
+ log.debug("[ContextIntel] traceId={} PressureInferencer.recordOverflow failed: {}",
+ event.traceId(), e.getMessage());
+ }
+
+ // Step 4: Refresh EnvSnapshot (overflow inevitably shrinks the window, so the value definitely changes)
+ try {
+ if (update != null) {
+ BackendDiversityTracker tracker = diversityRegistry.get(key);
+ boolean diversity = (tracker != null) && tracker.isDiversityDetected();
+ int safeWindow = (tracker != null && diversity) ? tracker.computeSafeWindow() : 0;
+ envSnapshotStore.refreshIfChanged(key,
+ update.snapshot().effectiveWindow(),
+ pressureInferencer.currentPressure(),
+ diversity,
+ safeWindow);
+ }
+ } catch (Exception e) {
+ log.debug("[ContextIntel] traceId={} EnvSnapshot refresh failed: {}",
+ event.traceId(), e.getMessage());
+ }
+
+ // Step 5: DB persist (synchronous, overflow is a low-frequency event)
+ if (update != null) {
+ try {
+ windowStateRepository.persist(key, update.snapshot());
+ } catch (Exception e) {
+ log.debug("[ContextIntel] traceId={} DB persist failed: {}",
+ event.traceId(), e.getMessage());
+ }
+ }
+ } catch (Exception e) {
+ log.debug("[ContextIntel] traceId={} onOverflow failed: {}",
+ event.traceId(), e.getMessage());
+ // §C.7: overflow handling failed entirely -> signal dropped
+ metrics.recordSignalDropped("overflow_exception");
+ }
+ } finally {
+ // §C.7: total duration of overflow signal processing
+ metrics.recordOverflowProcess(Duration.ofNanos(System.nanoTime() - startNs));
+ }
+ }
+}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/metrics/ContextIntelMetrics.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/metrics/ContextIntelMetrics.java
new file mode 100644
index 000000000..95f105796
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/metrics/ContextIntelMetrics.java
@@ -0,0 +1,190 @@
+package vip.mate.context.intelligence.metrics;
+
+import io.micrometer.core.instrument.Counter;
+import io.micrometer.core.instrument.MeterRegistry;
+import io.micrometer.core.instrument.Tag;
+import io.micrometer.core.instrument.Tags;
+import io.micrometer.core.instrument.Timer;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.ObjectProvider;
+import org.springframework.stereotype.Component;
+import vip.mate.context.intelligence.config.ContextIntelligenceProperties;
+
+import java.time.Duration;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Context Intelligence domain metrics gateway (modeled after {@code vip.mate.wiki.metrics.WikiMetrics}).
+ *
+ * All observability metrics of the v2 module flow through this gateway, in order to:
+ * Corresponds to design document §C.7 and §8.2. Metric inventory:
+ * Gauge implementation note: Uses {@link AtomicInteger} as the gauge value container,
+ * registered once via {@link MeterRegistry#gauge(String, Iterable, Object)};
+ * subsequent updates via {@link AtomicInteger#set(int)} are automatically reflected at scrape time.
+ *
+ * @author MateClaw Team
+ */
+@Slf4j
+@Component
+public class ContextIntelMetrics {
+
+ private final MeterRegistry registry;
+ /** Combined switch: true only when MeterRegistry exists AND metrics.enabled=true */
+ private final boolean enabled;
+ private final ConcurrentHashMap
+ * Mirrors the compute pattern of {@code WindowProbeRegistry}, managing tracker instances
+ * keyed by (provider, model). One independent tracker per key (not global), because the
+ * detection targets the variance "under the same model name".
+ *
+ * @author MateClaw Team
+ */
+@Slf4j
+@Component
+public class BackendDiversityRegistry {
+
+ private final ConcurrentHashMap
+ * May return null (computeIfAbsent not yet triggered on first access).
+ */
+ public BackendDiversityTracker get(String key) {
+ return trackers.get(key);
+ }
+
+ /** Restore the diversityDetected flag from DB (§5.7 restart recovery) */
+ public void restore(String key, boolean diversityDetected) {
+ trackers.compute(key, (k, existing) -> {
+ if (existing != null) {
+ // already exists: do not overwrite the existing tracker (in-memory data takes priority)
+ return existing;
+ }
+ return BackendDiversityTracker.restore(diversityDetected, props.getDiversity());
+ });
+ }
+
+ /** Idle eviction (coordinated by WindowProbeRegistry.evictIdleProbes) */
+ public void evict(String key) {
+ trackers.remove(key);
+ }
+}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/perception/BackendDiversityTracker.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/perception/BackendDiversityTracker.java
new file mode 100644
index 000000000..3a9eef560
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/perception/BackendDiversityTracker.java
@@ -0,0 +1,181 @@
+package vip.mate.context.intelligence.perception;
+
+import vip.mate.context.intelligence.config.ContextIntelligenceProperties;
+
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+/**
+ * Backend diversity tracker (per-model instance, not a Spring Bean).
+ *
+ * Detects whether a single model name routes to multiple backends (e.g. a gateway sometimes
+ * routes "gpt-4" to an 8K model, sometimes to a 128K model). This manifests as a large
+ * variance in successTokens or an abnormal success/overflow ratio for that key.
+ *
+ * Detection conditions (triggered when any is met, sticky until decay):
+ *
+ * Thread safety: successTokens uses copy-on-write + AtomicReference;
+ * minOverflowToken / maxSuccessToken / observationCount use AtomicInteger.
+ *
+ * @author MateClaw Team
+ */
+public class BackendDiversityTracker {
+
+ // --- observation data ---
+ final AtomicReference
+ * Only meaningful when diversityDetected=true. Returns 0 when there is no observation data.
+ */
+ public int computeSafeWindow() {
+ List
+ * Infers the current resource pressure level of the LLM backend based on consecutive
+ * success/overflow/latency. The pressure level is shared globally (affecting the budget
+ * allocation ratio for all models).
+ *
+ * Degradation path (stepped, C.4): CRITICAL → HIGH → ELEVATED → NORMAL
+ *
+ * Escalation path: overflowEscalateThreshold consecutive overflows OR average latency > latencyEscalateMs (requires ≥ minLatencySamples samples)
+ *
+ * @author MateClaw Team
+ */
+@Slf4j
+@Component
+public class PressureInferencer {
+
+ // --- current pressure level (volatile, lock-free read path)---
+ volatile ResourcePressure currentPressure = ResourcePressure.NORMAL;
+
+ // --- consecutive counters (AtomicInteger, CAS updates)---
+ final AtomicInteger consecutiveSuccesses = new AtomicInteger(0);
+ final AtomicInteger consecutiveOverflows = new AtomicInteger(0);
+
+ // --- latency samples (ring buffer, synchronized guards multi-field updates)---
+ final long[] latencySamples = new long[10];
+ int latencyIdx = 0;
+ int latencySampleCount = 0;
+
+ private final ContextIntelligenceProperties.Pressure cfg;
+
+ public PressureInferencer(ContextIntelligenceProperties props) {
+ this.cfg = props.getPressure();
+ }
+
+ // ==================== write path ====================
+
+ /**
+ * Record a successful call (stepped degradation, C.4).
+ *
+ * Each level of degradation requires successDegradeStep consecutive successes; the counter
+ * is reset after degrading. Latency escalation requires ≥ minLatencySamples samples to
+ * avoid misjudging a single spike (v2.6).
+ */
+ public void recordSuccess(long latencyMs) {
+ try {
+ consecutiveSuccesses.incrementAndGet();
+ consecutiveOverflows.set(0);
+ updateLatencySamples(latencyMs);
+
+ // stepped degradation
+ int step = cfg.getSuccessDegradeStep();
+ int successes = consecutiveSuccesses.get();
+ if (successes >= step) {
+ boolean degraded = false;
+ if (currentPressure == ResourcePressure.CRITICAL) {
+ currentPressure = ResourcePressure.HIGH;
+ degraded = true;
+ } else if (currentPressure == ResourcePressure.HIGH) {
+ currentPressure = ResourcePressure.ELEVATED;
+ degraded = true;
+ } else if (currentPressure == ResourcePressure.ELEVATED) {
+ currentPressure = ResourcePressure.NORMAL;
+ degraded = true;
+ }
+ if (degraded) {
+ consecutiveSuccesses.set(0); // reset counter, accumulate step successes again for the next level
+ }
+ }
+
+ // latency escalation check (requires ≥ minLatencySamples samples, v2.6)
+ if (latencySampleCount >= cfg.getMinLatencySamples()) {
+ long avgLatency = averageLatency();
+ if (avgLatency > cfg.getLatencyEscalateMs()) {
+ escalatePressure();
+ }
+ }
+ } catch (Exception e) {
+ log.debug("[ContextIntel] PressureInferencer recordSuccess failed: {}", e.getMessage());
+ }
+ }
+
+ /** Record an overflow call (overflowEscalateThreshold consecutive overflows -> escalate) */
+ public void recordOverflow() {
+ try {
+ consecutiveOverflows.incrementAndGet();
+ consecutiveSuccesses.set(0);
+
+ if (consecutiveOverflows.get() >= cfg.getOverflowEscalateThreshold()) {
+ escalatePressure();
+ }
+ } catch (Exception e) {
+ log.debug("[ContextIntel] PressureInferencer recordOverflow failed: {}", e.getMessage());
+ }
+ }
+
+ // ==================== read path ====================
+
+ public ResourcePressure currentPressure() {
+ return currentPressure;
+ }
+
+ // ==================== internal methods ====================
+
+ /** Escalate the pressure level (NORMAL → ELEVATED → HIGH → CRITICAL) */
+ private void escalatePressure() {
+ if (currentPressure == ResourcePressure.NORMAL) {
+ currentPressure = ResourcePressure.ELEVATED;
+ } else if (currentPressure == ResourcePressure.ELEVATED) {
+ currentPressure = ResourcePressure.HIGH;
+ } else if (currentPressure == ResourcePressure.HIGH) {
+ currentPressure = ResourcePressure.CRITICAL;
+ }
+ // CRITICAL is the highest level, no further escalation
+ }
+
+ /**
+ * Update the latency sample ring buffer.
+ *
+ * synchronized guard: multiple fields (latencySamples / latencyIdx / latencySampleCount)
+ * need atomic updates. signalExecutor has 8 threads, but the method is very fast and contention is low.
+ */
+ private synchronized void updateLatencySamples(long latencyMs) {
+ latencySamples[latencyIdx] = latencyMs;
+ latencyIdx = (latencyIdx + 1) % latencySamples.length;
+ if (latencySampleCount < latencySamples.length) {
+ latencySampleCount++;
+ }
+ }
+
+ /** Compute the average of the latency samples */
+ private long averageLatency() {
+ if (latencySampleCount == 0) {
+ return 0;
+ }
+ long sum = 0;
+ for (int i = 0; i < latencySampleCount; i++) {
+ sum += latencySamples[i];
+ }
+ return sum / latencySampleCount;
+ }
+}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/persist/PersistRetryQueue.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/persist/PersistRetryQueue.java
new file mode 100644
index 000000000..8bc993bce
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/persist/PersistRetryQueue.java
@@ -0,0 +1,69 @@
+package vip.mate.context.intelligence.persist;
+
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.ObjectProvider;
+import org.springframework.stereotype.Component;
+import vip.mate.context.intelligence.probe.WindowProbeSnapshot;
+
+import java.util.concurrent.ConcurrentLinkedQueue;
+
+/**
+ * DB persist failure retry queue (in-memory, single retry).
+ *
+ * When DB persist fails, the snapshot is buffered to this queue. It is retried once on the next signal.
+ *
+ * Key design (§5.6):
+ *
+ * Implements {@link WindowStateLoader} for {@code WindowProbeRegistry} to load persisted state.
+ *
+ * Table schema (created in V161): {@code mate_model_context_state}, PK = (provider_id, model_name)
+ *
+ * Persistence strategy (§5.6):
+ *
+ *
+ * Any exception is caught and falls back to forContext; never affects the reasoning main flow.
+ */
+ private LoopBudgetConfig buildLoopBudgetConfig(
+ String providerId, String modelName,
+ int systemTokens, int toolsTokens, int reservedPrefixTokens) {
+ // Tier 1: attempt dynamic budget
+ if (envSnapshotStore != null && tokenBudgetPlanner != null) {
+ try {
+ EnvSnapshot snapshot = envSnapshotStore.get(providerId, modelName);
+ int fallbackWindow = loopContextWindowTokens();
+ TokenBudget budget = tokenBudgetPlanner.plan(snapshot, systemTokens, toolsTokens, fallbackWindow);
+ return LoopBudgetConfig.fromBudget(budget)
+ .withReservedPrefixTokens(reservedPrefixTokens);
+ } catch (Exception e) {
+ log.debug("[ReasoningNode] Context Intelligence plan failed, falling back to forContext: {}",
+ e.getMessage());
+ }
+ }
+ // Tier 2 + 3: yml / hardcoded fallback
+ return LoopBudgetConfig.forContext(loopContextWindowTokens())
+ .withReservedPrefixTokens(reservedPrefixTokens);
+ }
+
+ /**
+ * Context Intelligence v2: publish LLM overflow signal (design doc §5.3).
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ * @author MateClaw Team
+ */
+@Slf4j
+@Component
+public class TokenBudgetPlanner {
+
+ private final ContextIntelligenceProperties props;
+
+ public TokenBudgetPlanner(ContextIntelligenceProperties props) {
+ this.props = props;
+ }
+
+ /**
+ * Plan the token budget.
+ *
+ * @param snapshot environment snapshot (may be EMPTY)
+ * @param systemTokens token count of the system message
+ * @param toolsTokens token count of tool declarations
+ * @param fallbackWindow yml/hardcoded fallback window (used when snapshot is invalid)
+ * @return TokenBudget (includes trace for monitoring)
+ */
+ public TokenBudget plan(EnvSnapshot snapshot, int systemTokens, int toolsTokens, int fallbackWindow) {
+ // Step 1: determine the base window
+ int baseWindow = calcBaseWindow(snapshot);
+ if (baseWindow <= 0) {
+ return buildLegacyBudget(fallbackWindow, systemTokens, toolsTokens, "fallback_yml");
+ }
+
+ // Step 2: apply the diversity ceiling (P10 clamp)
+ int safeWindowApplied = 0;
+ int ceiledWindow = baseWindow;
+ if (snapshot.diversityDetected() && snapshot.safeWindow() > 0 && snapshot.safeWindow() < baseWindow) {
+ safeWindowApplied = snapshot.safeWindow();
+ ceiledWindow = snapshot.safeWindow();
+ }
+
+ // Step 3: adjust the allocation ratio based on the pressure factor
+ double historyRatio = adjustPressureRatio(snapshot.pressure());
+
+ // Step 4: compute the budget for each part
+ ContextIntelligenceProperties.Budget budgetCfg = props.getBudget();
+ int outputReserve = (int) (ceiledWindow * budgetCfg.getOutputReserveRatio());
+ int available = ceiledWindow - systemTokens - toolsTokens - outputReserve;
+ if (available <= 0) {
+ return buildLegacyBudget(fallbackWindow, systemTokens, toolsTokens, "fallback_hardcode");
+ }
+
+ int historyBudget = (int) (available * historyRatio);
+ int injectionBudget = available - historyBudget;
+ int compactTrigger = historyBudget;
+ int keepTail = (int) (historyBudget * budgetCfg.getKeepTailRatio());
+
+ String reason = safeWindowApplied > 0 ? "diversity_clamp" : "normal";
+ TokenBudgetTrace trace = new TokenBudgetTrace(
+ baseWindow, ceiledWindow, safeWindowApplied,
+ snapshot.pressure(), historyRatio,
+ systemTokens, toolsTokens, outputReserve,
+ false, // legacyFallback
+ null, 0, 0, // phase/confidence fields not populated yet (requires extra Registry lookup)
+ reason
+ );
+ return new TokenBudget(historyBudget, injectionBudget, compactTrigger, keepTail, trace);
+ }
+
+ // ==================== private methods (C.5 normalization)====================
+
+ private int calcBaseWindow(EnvSnapshot snapshot) {
+ if (snapshot == null || !snapshot.isAvailable()) {
+ return 0;
+ }
+ return Math.max(0, snapshot.effectiveWindow());
+ }
+
+ private double adjustPressureRatio(ResourcePressure pressure) {
+ ContextIntelligenceProperties.Budget budgetCfg = props.getBudget();
+ return switch (pressure) {
+ case NORMAL -> budgetCfg.getNormalHistoryRatio(); // default 0.60
+ case ELEVATED -> budgetCfg.getElevatedHistoryRatio(); // default 0.70
+ case HIGH -> budgetCfg.getHighHistoryRatio(); // default 0.80
+ case CRITICAL -> budgetCfg.getCriticalHistoryRatio(); // default 0.85
+ };
+ }
+
+ private TokenBudget buildLegacyBudget(int fallbackWindow, int systemTokens, int toolsTokens, String reason) {
+ TokenBudget legacy = TokenBudget.legacy(fallbackWindow);
+ TokenBudgetTrace trace = new TokenBudgetTrace(
+ 0, 0, 0,
+ ResourcePressure.NORMAL, 0.60,
+ systemTokens, toolsTokens, 0,
+ true, // legacyFallback
+ null, 0, 0,
+ reason
+ );
+ return new TokenBudget(
+ legacy.historyBudget(),
+ legacy.injectionBudget(),
+ legacy.compactTriggerTokens(),
+ legacy.keepTailTokens(),
+ trace
+ );
+ }
+}
diff --git a/mateclaw-server/src/main/java/vip/mate/context/intelligence/budget/TokenBudgetTrace.java b/mateclaw-server/src/main/java/vip/mate/context/intelligence/budget/TokenBudgetTrace.java
new file mode 100644
index 000000000..9e112cb39
--- /dev/null
+++ b/mateclaw-server/src/main/java/vip/mate/context/intelligence/budget/TokenBudgetTrace.java
@@ -0,0 +1,41 @@
+package vip.mate.context.intelligence.budget;
+
+import vip.mate.context.intelligence.enums.ResourcePressure;
+
+/**
+ * Budget decision trace record (for monitoring/debugging, to locate the root cause of budget anomalies).
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ * Metric name Type Tags Description
+ * {@code context.intel.signal.process.duration} Timer type=success|overflow Signal processing duration
+ * {@code context.intel.phase.transition} Counter provider,model,from,to Phase transition count
+ * {@code context.intel.snapshot.refresh} Counter provider,model Snapshot refresh count
+ * {@code context.intel.db.persist.failure} Counter provider,model DB persist failure
+ * {@code context.intel.signal.dropped} Counter reason Signal dropped
+ * {@code context.intel.pressure.level} Gauge provider,model Current pressure level 0/1/2/3
+ * {@code context.intel.effective.window} Gauge provider,model Current effective window tokens
+ * {@code context.intel.phase} Gauge provider,model Current state machine phase 0/1/2/3/4
+ * {@code context.intel.diversity.detected} Gauge provider,model Whether multiple backends detected 0/1
+ *
+ * > successTokens;
+ final AtomicInteger minOverflowToken; // v1 Bug 2: never updated, fixed in v2
+ final AtomicInteger maxSuccessToken;
+ final AtomicInteger observationCount;
+
+ // --- detection result (sticky)---
+ volatile boolean diversityDetected;
+ volatile long lastResetTime;
+
+ // --- config ---
+ final int maxObservations;
+ final int safePercentile;
+ final double successVarianceThreshold;
+ final double overflowRatioThreshold;
+ final long decayMs;
+
+ BackendDiversityTracker(ContextIntelligenceProperties.Diversity cfg) {
+ this.successTokens = new AtomicReference<>(new ArrayList<>());
+ this.minOverflowToken = new AtomicInteger(Integer.MAX_VALUE);
+ this.maxSuccessToken = new AtomicInteger(0);
+ this.observationCount = new AtomicInteger(0);
+ this.diversityDetected = false;
+ this.lastResetTime = System.currentTimeMillis();
+ this.maxObservations = cfg.getMaxObservations();
+ this.safePercentile = cfg.getSafePercentile();
+ this.successVarianceThreshold = cfg.getSuccessVarianceThreshold();
+ this.overflowRatioThreshold = cfg.getOverflowRatioThreshold();
+ this.decayMs = cfg.getDecayHours() * 3_600_000L;
+ }
+
+ /** Restore diversityDetected flag from DB (§5.7 restart recovery) */
+ static BackendDiversityTracker restore(boolean diversityDetected, ContextIntelligenceProperties.Diversity cfg) {
+ BackendDiversityTracker t = new BackendDiversityTracker(cfg);
+ t.diversityDetected = diversityDetected;
+ return t;
+ }
+
+ // ==================== write path ====================
+
+ /** Record the prompt token count of a successful call */
+ void recordSuccess(int promptTokens) {
+ checkDecay();
+
+ // copy-on-write update of successTokens
+ successTokens.updateAndGet(current -> {
+ List
+ *
+ *
+ * @author MateClaw Team
+ */
+@Slf4j
+@Component
+public class PersistRetryQueue {
+
+ private static final int MAX_QUEUE_SIZE = 50;
+
+ private final ConcurrentLinkedQueue
+ *
+ *
+ * @author MateClaw Team
+ */
+@Slf4j
+@Component
+public class WindowStateRepository implements WindowStateLoader {
+
+ private static final String TABLE = "mate_model_context_state";
+
+ private static final String SQL_UPDATE = """
+ UPDATE """ + TABLE + """
+ SET phase=?, effective_window=?, confidence_lower=?, confidence_upper=?,
+ declared_limit=?, peak_observed=?, successive_success=?, successive_overflow=?,
+ total_success=?, total_overflow=?, last_success_at=?, last_overflow_at=?, last_updated_at=?
+ WHERE provider_id=? AND model_name=?
+ """;
+
+ private static final String SQL_INSERT = """
+ INSERT INTO """ + TABLE + """
+ (provider_id, model_name, phase, effective_window, confidence_lower, confidence_upper,
+ declared_limit, peak_observed, successive_success, successive_overflow,
+ total_success, total_overflow, last_success_at, last_overflow_at, last_updated_at)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ """;
+
+ private static final String SQL_SELECT = """
+ SELECT phase, effective_window, confidence_lower, confidence_upper,
+ declared_limit, peak_observed, successive_success, successive_overflow,
+ total_success, total_overflow, last_success_at, last_overflow_at, last_updated_at
+ FROM """ + TABLE + """
+ WHERE provider_id=? AND model_name=?
+ """;
+
+ private final JdbcTemplate jdbcTemplate;
+ private final PersistRetryQueue retryQueue;
+ /** Observability metrics gateway (§C.7), nullable: ObjectProvider may return null in test environment */
+ private final ContextIntelMetrics metrics;
+
+ public WindowStateRepository(JdbcTemplate jdbcTemplate,
+ PersistRetryQueue retryQueue,
+ ObjectProvider