Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cli/src/commands/autopilot/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1648,6 +1648,9 @@ impl stakpak_gateway::dispatcher::RunOverrideResolver for ProfileRunOverrideReso
.map(stakpak_gateway::client::AutoApproveOverride::AllowList),
system_prompt: resolved.system_prompt,
max_turns: resolved.max_turns,
context_window: resolved.context_window,
context_budget_threshold: resolved.context_budget_threshold,
keep_last_n_assistant_messages: resolved.keep_last_n_assistant_messages,
};

if overrides.is_empty() {
Expand Down
3 changes: 3 additions & 0 deletions cli/src/commands/watch/commands/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,9 @@ fn resolve_schedule_profile_overrides(
auto_approve: normalized_auto_approve,
system_prompt: resolved.system_prompt,
max_turns: resolved.max_turns,
context_window: resolved.context_window,
context_budget_threshold: resolved.context_budget_threshold,
keep_last_n_assistant_messages: resolved.keep_last_n_assistant_messages,
};

let overrides = if overrides.is_empty() {
Expand Down
9 changes: 9 additions & 0 deletions cli/src/config/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ pub struct AppConfig {
pub system_prompt: Option<String>,
/// Optional max turn override for sessions using this profile.
pub max_turns: Option<usize>,
/// Optional context window override for sessions using this profile.
pub context_window: Option<u64>,
/// Optional context budget threshold for sessions using this profile.
pub context_budget_threshold: Option<f32>,
/// Optional keep-last-N-assistant-messages for sessions using this profile.
pub keep_last_n_assistant_messages: Option<usize>,
/// Unique ID for anonymous telemetry
pub anonymous_id: Option<String>,
/// Whether to collect telemetry data
Expand Down Expand Up @@ -170,6 +176,9 @@ impl AppConfig {
subagent: profile_config.subagent,
system_prompt: profile_config.system_prompt,
max_turns: profile_config.max_turns,
context_window: profile_config.context_window,
context_budget_threshold: profile_config.context_budget_threshold,
keep_last_n_assistant_messages: profile_config.keep_last_n_assistant_messages,
anonymous_id: settings.anonymous_id,
collect_telemetry: settings.collect_telemetry,
editor: settings.editor,
Expand Down
29 changes: 29 additions & 0 deletions cli/src/config/profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,23 @@ pub struct ProfileConfig {
#[serde(skip_serializing_if = "Option::is_none")]
pub max_turns: Option<usize>,

/// Override the model's context window size (in tokens).
/// When set, this value replaces the model's default context window
/// for budget and trimming calculations.
#[serde(skip_serializing_if = "Option::is_none")]
pub context_window: Option<u64>,

/// Fraction of the context window at which context trimming triggers.
/// Range: 0.1–1.0 (e.g. 0.8 = start trimming at 80% of context window).
/// Default: 0.8 (80%).
#[serde(skip_serializing_if = "Option::is_none")]
pub context_budget_threshold: Option<f32>,

/// Number of recent assistant messages to keep untrimmed during context
/// trimming. Default: 5.
#[serde(skip_serializing_if = "Option::is_none")]
pub keep_last_n_assistant_messages: Option<usize>,

// =========================================================================
// Legacy model fields - kept for backward compatibility during migration
// These are read but deprecated (will migrate to 'model' field)
Expand Down Expand Up @@ -140,6 +157,9 @@ impl ProfileConfig {
recent_models: default.recent_models.clone(),
system_prompt: default.system_prompt.clone(),
max_turns: default.max_turns,
context_window: default.context_window,
context_budget_threshold: default.context_budget_threshold,
keep_last_n_assistant_messages: default.keep_last_n_assistant_messages,
// Enable warden for readonly sandboxed execution
warden: Some(WardenConfig::readonly_profile()),
// Don't copy allowed_tools/auto_approve - readonly has its own restrictions
Expand Down Expand Up @@ -437,6 +457,15 @@ impl ProfileConfig {
max_turns: self
.max_turns
.or_else(|| other.and_then(|config| config.max_turns)),
context_window: self
.context_window
.or_else(|| other.and_then(|config| config.context_window)),
context_budget_threshold: self
.context_budget_threshold
.or_else(|| other.and_then(|config| config.context_budget_threshold)),
keep_last_n_assistant_messages: self
.keep_last_n_assistant_messages
.or_else(|| other.and_then(|config| config.keep_last_n_assistant_messages)),
// Legacy fields - kept for reading only, not merged
eco_model: None,
smart_model: None,
Expand Down
14 changes: 13 additions & 1 deletion cli/src/config/profile_resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@ use stakpak_shared::utils::normalize_optional_string;

use super::AppConfig;

#[derive(Debug, Clone, Default, PartialEq, Eq)]
#[derive(Debug, Clone, Default, PartialEq)]
pub(crate) struct ResolvedProfileOverrides {
pub model: Option<String>,
pub auto_approve: Option<Vec<String>>,
pub allowed_tools: Option<Vec<String>>,
pub system_prompt: Option<String>,
pub max_turns: Option<usize>,
pub context_window: Option<u64>,
pub context_budget_threshold: Option<f32>,
pub keep_last_n_assistant_messages: Option<usize>,
}

pub(crate) fn resolve_profile_run_overrides(
Expand All @@ -24,12 +27,18 @@ pub(crate) fn resolve_profile_run_overrides(
let allowed_tools = normalize_tool_list(config.allowed_tools);
let system_prompt = normalize_optional_string(config.system_prompt);
let max_turns = config.max_turns;
let context_window = config.context_window;
let context_budget_threshold = config.context_budget_threshold;
let keep_last_n_assistant_messages = config.keep_last_n_assistant_messages;

if model.is_none()
&& auto_approve.is_none()
&& allowed_tools.is_none()
&& system_prompt.is_none()
&& max_turns.is_none()
&& context_window.is_none()
&& context_budget_threshold.is_none()
&& keep_last_n_assistant_messages.is_none()
{
return None;
}
Expand All @@ -40,6 +49,9 @@ pub(crate) fn resolve_profile_run_overrides(
allowed_tools,
system_prompt,
max_turns,
context_window,
context_budget_threshold,
keep_last_n_assistant_messages,
})
}

Expand Down
34 changes: 32 additions & 2 deletions libs/api/src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,17 @@ pub struct AgentClientConfig {
pub store_path: Option<String>,
/// Hook registry for lifecycle events
pub hook_registry: Option<HookRegistry<AgentState>>,
/// How many recent assistant messages to keep untrimmed when context
/// trimming is triggered (default: 5).
pub keep_last_n_assistant_messages: Option<usize>,
/// Fraction of the context window at which trimming triggers
/// (e.g. 0.8 = 80%, default: 0.8).
pub context_budget_threshold: Option<f32>,
/// Override the model's context window size (in tokens).
/// When set, replaces the model's built-in `context_window` for budget
/// calculations. Useful for local/custom models where the window may
/// not be auto-detected correctly.
pub context_window: Option<u64>,
}

impl AgentClientConfig {
Expand Down Expand Up @@ -94,6 +105,24 @@ impl AgentClientConfig {
self.hook_registry = Some(registry);
self
}

/// Set context trimming: number of recent assistant messages to preserve
pub fn with_keep_last_n_assistant_messages(mut self, n: usize) -> Self {
self.keep_last_n_assistant_messages = Some(n);
self
}

/// Set context trimming: budget threshold (0.0–1.0)
pub fn with_context_budget_threshold(mut self, threshold: f32) -> Self {
self.context_budget_threshold = Some(threshold);
self
}

/// Set context window override (in tokens)
pub fn with_context_window(mut self, window: u64) -> Self {
self.context_window = Some(window);
self
}
}

// =============================================================================
Expand Down Expand Up @@ -221,8 +250,9 @@ impl AgentClient {
hook_registry.register(
LifecycleEvent::BeforeInference,
Box::new(TaskBoardContextHook::new(TaskBoardContextHookOptions {
keep_last_n_assistant_messages: Some(5), // Keep the last 5 assistant messages in context
context_budget_threshold: Some(0.8), // defaults to 0.8 (80%)
keep_last_n_assistant_messages: config.keep_last_n_assistant_messages.or(Some(5)),
context_budget_threshold: config.context_budget_threshold.or(Some(0.8)),
context_window: config.context_window,
})),
);
let hook_registry = Arc::new(hook_registry);
Expand Down
25 changes: 25 additions & 0 deletions libs/api/src/local/context_managers/task_board_context_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use stakpak_shared::models::{
pub struct TaskBoardContextManager {
keep_last_n_assistant_messages: usize,
context_budget_threshold: f32,
context_window: Option<u64>,
}

impl super::ContextManager for TaskBoardContextManager {
Expand Down Expand Up @@ -466,6 +467,7 @@ mod tests {
TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 2, // Only keep last 2 assistant messages untrimmed
context_budget_threshold: 0.8,
context_window: None,
})
}

Expand Down Expand Up @@ -1306,6 +1308,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 2,
context_budget_threshold: 0.8,
context_window: None,
});

// Build: user, assistant, user, assistant, user, user, assistant
Expand Down Expand Up @@ -1410,6 +1413,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 2,
context_budget_threshold: 0.8,
context_window: None,
});

// Build 10 turns of user/assistant
Expand Down Expand Up @@ -1514,6 +1518,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 1,
context_budget_threshold: 0.8,
context_window: None,
});

// Realistic agent flow:
Expand Down Expand Up @@ -1618,6 +1623,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 10,
context_budget_threshold: 0.8,
context_window: None,
});

// Only 3 assistant messages but keep_last_n = 10
Expand Down Expand Up @@ -1682,6 +1688,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 0,
context_budget_threshold: 0.8,
context_window: None,
});

let messages = vec![
Expand Down Expand Up @@ -1735,6 +1742,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 1,
context_budget_threshold: 0.8,
context_window: None,
});

// Use large assistant messages and small user messages so that trimming
Expand Down Expand Up @@ -1812,6 +1820,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 2,
context_budget_threshold: 0.8,
context_window: None,
});

// 10 turns → 20 messages, small window → establishes a trim index
Expand Down Expand Up @@ -1848,6 +1857,7 @@ mod tests {
let cm_generous = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 8,
context_budget_threshold: 0.8,
context_window: None,
});

let (_, metadata2) = cm_generous.reduce_context_with_budget(messages, 100, metadata1, None);
Expand All @@ -1870,6 +1880,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 2,
context_budget_threshold: 0.8,
context_window: None,
});

// Build a conversation that's just under threshold without tools
Expand Down Expand Up @@ -1974,6 +1985,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 3,
context_budget_threshold: 0.8,
context_window: None,
});

// 5 turns of: user → assistant(tool_call) → tool(result) → assistant(follow-up)
Expand Down Expand Up @@ -2080,6 +2092,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 50,
context_budget_threshold: 0.8,
context_window: None,
});

// Simulate a 200k context window model (like Claude)
Expand Down Expand Up @@ -2362,6 +2375,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 50,
context_budget_threshold: 0.3,
context_window: None,
});

// Simulate a session with 10 turns (20 messages) — well under 50
Expand Down Expand Up @@ -2435,6 +2449,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 3,
context_budget_threshold: 0.8,
context_window: None,
});

// 6 turns: user + assistant with large content.
Expand Down Expand Up @@ -2519,6 +2534,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 20,
context_budget_threshold: 0.8,
context_window: None,
});

// Simulate a session with 30 turns of tool-heavy interaction.
Expand Down Expand Up @@ -2651,6 +2667,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 2,
context_budget_threshold: 0.8,
context_window: None,
});

// Build conversation that exceeds threshold
Expand Down Expand Up @@ -2742,6 +2759,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 10,
context_budget_threshold: 0.8,
context_window: None,
});

let messages = vec![
Expand Down Expand Up @@ -2790,6 +2808,7 @@ mod tests {
let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
keep_last_n_assistant_messages: 50, // high keep_last_n, like production
context_budget_threshold: 0.3,
context_window: None,
});

// 5 turns with large assistant responses
Expand Down Expand Up @@ -2882,13 +2901,19 @@ pub struct TaskBoardContextManagerOptions {
pub keep_last_n_assistant_messages: usize,
/// Fraction of context window at which trimming triggers (e.g., 0.8 = 80%)
pub context_budget_threshold: f32,
/// Override the model's context window size (in tokens).
/// When set, replaces the model's built-in `context_window` for budget
/// calculations. Useful for local/custom models where the window may
/// not be auto-detected correctly.
pub context_window: Option<u64>,
}

impl TaskBoardContextManager {
pub fn new(options: TaskBoardContextManagerOptions) -> Self {
Self {
keep_last_n_assistant_messages: options.keep_last_n_assistant_messages,
context_budget_threshold: options.context_budget_threshold,
context_window: options.context_window,
}
}
}
Loading
Loading