Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ public class AgentGraphBuilder {
private final vip.mate.goal.service.GoalFollowupService goalFollowupService;
private final vip.mate.goal.config.GoalProperties goalProperties;

// Context Intelligence v2 components (design doc §10.2)
private final org.springframework.context.ApplicationEventPublisher eventPublisher;
private final vip.mate.context.intelligence.snapshot.EnvSnapshotStore envSnapshotStore;
private final vip.mate.context.intelligence.budget.TokenBudgetPlanner tokenBudgetPlanner;

/**
* Auto-grant resolver wired into the executor so an active
* {@code mate_approval_grant} row can skip {@code createPending()} for matching
Expand Down Expand Up @@ -650,6 +655,9 @@ CompiledGraph buildPlanExecuteGraph(AgentToolSet toolSet, ChatModel chatModel, i
.addStrategy(MateClawStateKeys.LLM_CALL_COUNT, KeyStrategy.REPLACE)
.addStrategy(MateClawStateKeys.RUNTIME_MODEL_NAME, KeyStrategy.REPLACE)
.addStrategy(MateClawStateKeys.RUNTIME_PROVIDER_ID, KeyStrategy.REPLACE)
// v1 Bug 3 fix: RUNTIME_MODEL_TYPE was missing registration, causing it to be lost on multi-node merges;
// ContextProfile.fromModelType fell back to defaults. See design doc §10.3.
.addStrategy(MateClawStateKeys.RUNTIME_MODEL_TYPE, KeyStrategy.REPLACE)
// SourceEvidenceLedger: ActionNode 把每轮 ToolResponse 抽取出的
// (sourcePaths, sourceSymbols, failedPaths) merge 进这个 ledger,
// 后续 ReasoningNode / FinalAnswerNode 调 validateAnswer 校验
Expand Down Expand Up @@ -856,7 +864,8 @@ CompiledGraph buildReActGraph(AgentToolSet toolSet, ChatModel chatModel, int max
ReasoningNode reasoningNode = new ReasoningNode(chatModel, toolSet, reasoningEffort,
supportsReasoningEffort,
streamingHelper, conversationWindowManager, streamTracker, 0, wikiContextService,
skillCatalogRenderer, toolDisclosureService, progressLedgerService);
skillCatalogRenderer, toolDisclosureService, progressLedgerService,
eventPublisher, envSnapshotStore, tokenBudgetPlanner);
ActionNode actionNode = new ActionNode(executor, streamTracker);
ObservationProcessor observationProcessor = new ObservationProcessor(graphObservationProperties);
ObservationNode observationNode = new ObservationNode(observationProcessor, streamTracker);
Expand Down Expand Up @@ -937,6 +946,9 @@ CompiledGraph buildReActGraph(AgentToolSet toolSet, ChatModel chatModel, int max
.addStrategy(MateClawStateKeys.COMPLETION_TOKENS, KeyStrategy.REPLACE)
.addStrategy(MateClawStateKeys.RUNTIME_MODEL_NAME, KeyStrategy.REPLACE)
.addStrategy(MateClawStateKeys.RUNTIME_PROVIDER_ID, KeyStrategy.REPLACE)
// v1 Bug 3 fix: RUNTIME_MODEL_TYPE was missing registration, causing it to be lost on multi-node merges;
// ContextProfile.fromModelType fell back to defaults. See design doc §10.3.
.addStrategy(MateClawStateKeys.RUNTIME_MODEL_TYPE, KeyStrategy.REPLACE)
// SourceEvidenceLedger: ActionNode 把每轮 ToolResponse 抽取出的
// (sourcePaths, sourceSymbols, failedPaths) merge 进这个 ledger,
// 后续 ReasoningNode / FinalAnswerNode 调 validateAnswer 校验
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package vip.mate.agent.context;

import vip.mate.context.intelligence.budget.TokenBudget;

/**
* Configuration for per-reasoning-loop message budgeting.
*
Expand Down Expand Up @@ -137,4 +139,27 @@ public LoopBudgetConfig withReservedPrefixTokens(int reservedPrefixTokens) {
return new LoopBudgetConfig(triggerTokens, keepTailTokens, minTailMessages,
tailSoftCeilingRatio, reservedPrefixTokens, targetMaxMessages);
}

/**
* v2 factory method: derive LoopBudgetConfig from {@link TokenBudget} (design doc §5.5 / §10.1).
* <p>
* Replaces v1's fromAllocation. {@code triggerTokens} and {@code keepTailTokens} are taken directly from
* TokenBudget (computed multi-factorially by {@code TokenBudgetPlanner.plan}); other fields reuse
* the sensible defaults from {@link #forContext}.
* <p>
* <b>Boundary protection</b>: TokenBudget may produce values below MIN_TRIGGER_TOKENS /
* MIN_TAIL_TOKENS under very small windows (e.g. 4K); Math.max floors them here, and when tail ≥ trigger
* falls back to {@code trigger - MIN_TRIGGER_TOKENS} (same protection logic as {@link #forContext}).
*
* @param budget budget planned by v2 TokenBudgetPlanner (non-null)
* @return LoopBudgetConfig (validated via compact constructor)
*/
public static LoopBudgetConfig fromBudget(TokenBudget budget) {
int trigger = Math.max(MIN_TRIGGER_TOKENS, budget.compactTriggerTokens());
int tail = Math.max(MIN_TAIL_TOKENS, budget.keepTailTokens());
if (tail >= trigger) {
tail = Math.max(MIN_TAIL_TOKENS, trigger - MIN_TRIGGER_TOKENS);
}
return new LoopBudgetConfig(trigger, tail, 4, 1.5, 0, 200);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,8 @@ private Map<String, Object> buildInitialState(String userMessage, String convers
inputs.put(COMPLETION_TOKENS, 0);
inputs.put(RUNTIME_MODEL_NAME, modelName != null ? modelName : "");
inputs.put(RUNTIME_PROVIDER_ID, runtimeProviderId != null ? runtimeProviderId : "");
// Context Intelligence v2: inject modelType for ContextProfile.fromModelType() threshold check
inputs.put(RUNTIME_MODEL_TYPE, runtimeModelConfig != null && runtimeModelConfig.getModelType() != null ? runtimeModelConfig.getModelType() : "");
inputs.put(TRACE_ID, UUID.randomUUID().toString().substring(0, 8));

// Multimodal sidecar routing — null when the turn carries no media or
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.springframework.ai.model.tool.ToolCallingChatOptions;
import org.springframework.ai.openai.OpenAiChatOptions;
import org.springframework.ai.tool.ToolCallback;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.util.StringUtils;
import vip.mate.agent.AgentToolSet;
import vip.mate.agent.GraphEventPublisher;
Expand All @@ -29,6 +30,12 @@
import vip.mate.agent.graph.state.MateClawStateAccessor;
import vip.mate.agent.graph.state.MateClawStateKeys;
import vip.mate.agent.graph.state.SourceEvidenceLedger;
import vip.mate.context.intelligence.budget.TokenBudget;
import vip.mate.context.intelligence.budget.TokenBudgetPlanner;
import vip.mate.context.intelligence.event.LlmOverflowSignal;
import vip.mate.context.intelligence.event.LlmSuccessSignal;
import vip.mate.context.intelligence.snapshot.EnvSnapshot;
import vip.mate.context.intelligence.snapshot.EnvSnapshotStore;

import vip.mate.channel.web.ChatStreamTracker;

Expand Down Expand Up @@ -350,6 +357,14 @@ private boolean hasWikiTool() {
*/
private final vip.mate.agent.progress.ProgressLedgerService progressLedgerService;

/**
* Context Intelligence v2 components (design doc §10.1). All three can be null:
* legacy callers / test constructors that don't inject them fall back to the yml fallback in {@link #loopContextWindowTokens()}.
*/
private final ApplicationEventPublisher eventPublisher;
private final EnvSnapshotStore envSnapshotStore;
private final TokenBudgetPlanner tokenBudgetPlanner;

public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoningEffort,
NodeStreamingChatHelper streamingHelper,
ConversationWindowManager conversationWindowManager,
Expand Down Expand Up @@ -448,6 +463,31 @@ public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoning
vip.mate.skill.runtime.SkillCatalogRenderer skillCatalogRenderer,
vip.mate.tool.disclosure.ToolDisclosureService toolDisclosureService,
vip.mate.agent.progress.ProgressLedgerService progressLedgerService) {
this(chatModel, toolSet, reasoningEffort, supportsReasoningEffort, streamingHelper,
conversationWindowManager, streamTracker, maxOutputTokens, wikiContextService,
skillCatalogRenderer, toolDisclosureService, progressLedgerService,
null, null, null);
}

/**
* Context Intelligence v2 primary constructor (design doc §10.2).
* <p>
* Adds 3 parameters: {@code eventPublisher} / {@code envSnapshotStore} / {@code tokenBudgetPlanner}.
* All three can be null (legacy callers / test scenarios); when null, falls back to yml + hardcoded fallback,
* with behavior identical to v1.
*/
public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoningEffort,
boolean supportsReasoningEffort,
NodeStreamingChatHelper streamingHelper,
ConversationWindowManager conversationWindowManager,
ChatStreamTracker streamTracker, int maxOutputTokens,
vip.mate.wiki.service.WikiContextService wikiContextService,
vip.mate.skill.runtime.SkillCatalogRenderer skillCatalogRenderer,
vip.mate.tool.disclosure.ToolDisclosureService toolDisclosureService,
vip.mate.agent.progress.ProgressLedgerService progressLedgerService,
ApplicationEventPublisher eventPublisher,
EnvSnapshotStore envSnapshotStore,
TokenBudgetPlanner tokenBudgetPlanner) {
this.chatModel = chatModel;
this.toolSet = toolSet;
this.toolCallbacks = toolSet.callbacks();
Expand All @@ -461,6 +501,9 @@ public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoning
this.wikiContextService = wikiContextService;
this.skillCatalogRenderer = skillCatalogRenderer;
this.progressLedgerService = progressLedgerService;
this.eventPublisher = eventPublisher;
this.envSnapshotStore = envSnapshotStore;
this.tokenBudgetPlanner = tokenBudgetPlanner;
}

/**
Expand All @@ -477,6 +520,74 @@ private int loopContextWindowTokens() {
return DEFAULT_LOOP_CONTEXT_WINDOW_TOKENS;
}

/**
* Context Intelligence v2: build LoopBudgetConfig (design doc §5.5 / §10.1).
* <p>
* Three-tier fallback (§8.1):
* <ol>
* <li>envSnapshotStore + tokenBudgetPlanner available and snapshot valid → multi-factor budget planning</li>
* <li>snapshot empty (enabled=false or cold start) → fallback to yml loopContextWindowTokens()</li>
* <li>yml also unconfigured → fallback to DEFAULT_LOOP_CONTEXT_WINDOW_TOKENS = 128_000</li>
* </ol>
* Any exception is caught and falls back to forContext; never affects the reasoning main flow.
*/
private LoopBudgetConfig buildLoopBudgetConfig(
String providerId, String modelName,
int systemTokens, int toolsTokens, int reservedPrefixTokens) {
// Tier 1: attempt dynamic budget
if (envSnapshotStore != null && tokenBudgetPlanner != null) {
try {
EnvSnapshot snapshot = envSnapshotStore.get(providerId, modelName);
int fallbackWindow = loopContextWindowTokens();
TokenBudget budget = tokenBudgetPlanner.plan(snapshot, systemTokens, toolsTokens, fallbackWindow);
return LoopBudgetConfig.fromBudget(budget)
.withReservedPrefixTokens(reservedPrefixTokens);
} catch (Exception e) {
log.debug("[ReasoningNode] Context Intelligence plan failed, falling back to forContext: {}",
e.getMessage());
}
}
// Tier 2 + 3: yml / hardcoded fallback
return LoopBudgetConfig.forContext(loopContextWindowTokens())
.withReservedPrefixTokens(reservedPrefixTokens);
}

/**
* Context Intelligence v2: publish LLM overflow signal (design doc §5.3).
* <p>
* Synchronous event (@EventListener without @Async), ensures confidenceUpper is updated before PTL retry.
* Silently skipped when eventPublisher is null (v1 compatibility).
*/
private void publishOverflowSignal(String provider, String modelName, String modelType,
int attemptedTokens, String traceId) {
if (eventPublisher == null) return;
try {
eventPublisher.publishEvent(new LlmOverflowSignal(
provider, modelName, modelType, attemptedTokens, traceId));
} catch (Exception e) {
log.debug("[ReasoningNode] publishOverflowSignal failed (non-fatal): {}", e.getMessage());
}
}

/**
* Context Intelligence v2: publish LLM success signal (design doc §5.2).
* <p>
* Asynchronous event (@Async("signalExecutor")), does not block the reasoning thread.
* Silently skipped when eventPublisher is null (v1 compatibility).
*/
private void publishSuccessSignal(String provider, String modelName, String modelType,
int promptTokens, int completionTokens,
long latencyMs, String traceId) {
if (eventPublisher == null) return;
try {
eventPublisher.publishEvent(new LlmSuccessSignal(
provider, modelName, modelType,
promptTokens, completionTokens, latencyMs, traceId));
} catch (Exception e) {
log.debug("[ReasoningNode] publishSuccessSignal failed (non-fatal): {}", e.getMessage());
}
}

public ReasoningNode(ChatModel chatModel, AgentToolSet toolSet, String reasoningEffort,
NodeStreamingChatHelper streamingHelper,
ConversationWindowManager conversationWindowManager) {
Expand Down Expand Up @@ -511,6 +622,9 @@ public ReasoningNode(ChatModel chatModel, List<ToolCallback> toolCallbacks) {
this.wikiContextService = null;
this.skillCatalogRenderer = null;
this.progressLedgerService = null;
this.eventPublisher = null;
this.envSnapshotStore = null;
this.tokenBudgetPlanner = null;
}

@Override
Expand Down Expand Up @@ -596,8 +710,15 @@ public Map<String, Object> apply(OverAllState state) throws Exception {
int toolsTokens = TokenEstimator.estimateToolsTokens(toolCallbacks);
int loopReservedPrefixTokens = systemTokens + toolsTokens
+ maxOutputTokens + LOOP_PREFIX_AUXILIARY_RESERVE_TOKENS;
LoopBudgetConfig loopCfg = LoopBudgetConfig.forContext(loopContextWindowTokens())
.withReservedPrefixTokens(loopReservedPrefixTokens);

// Context Intelligence v2: prefer deriving budget from EnvSnapshot + TokenBudgetPlanner
// (design doc §5.5 / §10.1). When any of the three is null or snapshot is empty, falls back to
// the yml fallback LoopBudgetConfig.forContext(loopContextWindowTokens()).
String ctxRuntimeModelName = state.value(MateClawStateKeys.RUNTIME_MODEL_NAME, "");
String ctxRuntimeProviderId = state.value(MateClawStateKeys.RUNTIME_PROVIDER_ID, "");
LoopBudgetConfig loopCfg = buildLoopBudgetConfig(
ctxRuntimeProviderId, ctxRuntimeModelName,
systemTokens, toolsTokens, loopReservedPrefixTokens);
LoopMessageBudgeter.Result budgeted = LOOP_BUDGETER.budget(messages, loopCfg);
// Only log when the budget actually modified the list — a triggered-
// but-no-op pass is normal (history fits comfortably under the tail
Expand Down Expand Up @@ -743,6 +864,11 @@ public Map<String, Object> apply(OverAllState state) throws Exception {
"llmCallCount", nextLlmCallCount
));

// Context Intelligence v2: wrap timing for LlmSuccessSignal.latencyMs
long llmCallStartMs = System.currentTimeMillis();
String ctxTraceId = state.value(MateClawStateKeys.TRACE_ID, "");
String ctxRuntimeModelType = state.value(MateClawStateKeys.RUNTIME_MODEL_TYPE, "");

NodeStreamingChatHelper.StreamResult result;
try {
result = streamingHelper.streamCall(chatModel, prompt, conversationId, "reasoning");
Expand All @@ -751,6 +877,9 @@ public Map<String, Object> apply(OverAllState state) throws Exception {
// Prompt 仍带 wiki / runtime context;早期的 tail-only 路径会把
// wiki 段一起丢掉,重试后的 prompt 比原始更短少一层信息。
if (result.isPromptTooLong() && conversationWindowManager != null) {
// Context Intelligence v2: publish overflow signal (synchronous, effective before retry)
publishOverflowSignal(ctxRuntimeProviderId, ctxRuntimeModelName, ctxRuntimeModelType,
result.promptTokens(), ctxTraceId);
log.warn("[ReasoningNode] Prompt too long, attempting STRUCTURED compaction and retry");

// MateClawStateAccessor.agentId() returns String per state
Expand Down Expand Up @@ -843,6 +972,15 @@ public Map<String, Object> apply(OverAllState state) throws Exception {
.build();
}

// Context Intelligence v2: publish success signal (asynchronous, does not block reasoning)
// Only published when the final result is non-PTL / non-user-stopped — PTL already published the overflow signal above,
// and user-stop carries no valid token information.
if (!result.isPromptTooLong() && !result.stopped()) {
long latencyMs = System.currentTimeMillis() - llmCallStartMs;
publishSuccessSignal(ctxRuntimeProviderId, ctxRuntimeModelName, ctxRuntimeModelType,
result.promptTokens(), result.completionTokens(), latencyMs, ctxTraceId);
}

// ======= 处理 StreamResult =======

// 用户主动停止且有部分内容
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ private Map<String, Object> buildInitialState(String userMessage, String convers
inputs.put(MateClawStateKeys.COMPLETION_TOKENS, 0);
inputs.put(MateClawStateKeys.RUNTIME_MODEL_NAME, modelName != null ? modelName : "");
inputs.put(MateClawStateKeys.RUNTIME_PROVIDER_ID, runtimeProviderId != null ? runtimeProviderId : "");
// Context Intelligence v2: inject modelType for ContextProfile.fromModelType() threshold check
inputs.put(MateClawStateKeys.RUNTIME_MODEL_TYPE, runtimeModelConfig != null && runtimeModelConfig.getModelType() != null ? runtimeModelConfig.getModelType() : "");
inputs.put(MateClawStateKeys.TRACE_ID, UUID.randomUUID().toString().substring(0, 8));

if (currentTurn.routingDecision() != null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,14 @@ private MateClawStateKeys() {
// ===== 运行时模型快照(REPLACE 策略,buildInitialState 注入)=====
public static final String RUNTIME_MODEL_NAME = "runtime_model_name";
public static final String RUNTIME_PROVIDER_ID = "runtime_provider_id";
/**
* Runtime model type (chat / reasoning / embedding / ...), injected by buildInitialState.
* <p>
* Context Intelligence v2 uses {@code ContextProfile.fromModelType(modelType)}
* to decide whether to participate in dynamic window probing ({@code isDynamic() && hasTextContext()}).
* v1 failed to register this key, causing it to be lost on multi-node merges, and ContextProfile inference fell back to defaults.
*/
public static final String RUNTIME_MODEL_TYPE = "runtime_model_type";

// ===== RFC-052: Tool returnDirect 与数据隔离 =====

Expand Down
Loading