diff --git a/cli/src/commands/autopilot/mod.rs b/cli/src/commands/autopilot/mod.rs
index ba7bf0934..61ea148c4 100644
--- a/cli/src/commands/autopilot/mod.rs
+++ b/cli/src/commands/autopilot/mod.rs
@@ -1648,6 +1648,9 @@ impl stakpak_gateway::dispatcher::RunOverrideResolver for ProfileRunOverrideReso
                 .map(stakpak_gateway::client::AutoApproveOverride::AllowList),
             system_prompt: resolved.system_prompt,
             max_turns: resolved.max_turns,
+            context_window: resolved.context_window,
+            context_budget_threshold: resolved.context_budget_threshold,
+            keep_last_n_assistant_messages: resolved.keep_last_n_assistant_messages,
         };
 
         if overrides.is_empty() {
diff --git a/cli/src/commands/watch/commands/run.rs b/cli/src/commands/watch/commands/run.rs
index f40ef77f8..0cd9810b1 100644
--- a/cli/src/commands/watch/commands/run.rs
+++ b/cli/src/commands/watch/commands/run.rs
@@ -901,6 +901,9 @@ fn resolve_schedule_profile_overrides(
         auto_approve: normalized_auto_approve,
         system_prompt: resolved.system_prompt,
         max_turns: resolved.max_turns,
+        context_window: resolved.context_window,
+        context_budget_threshold: resolved.context_budget_threshold,
+        keep_last_n_assistant_messages: resolved.keep_last_n_assistant_messages,
     };
 
     let overrides = if overrides.is_empty() {
diff --git a/cli/src/config/app.rs b/cli/src/config/app.rs
index 798188a5a..49a9ea65c 100644
--- a/cli/src/config/app.rs
+++ b/cli/src/config/app.rs
@@ -55,6 +55,12 @@ pub struct AppConfig {
     pub system_prompt: Option<String>,
     /// Optional max turn override for sessions using this profile.
     pub max_turns: Option<usize>,
+    /// Optional context window override for sessions using this profile.
+    pub context_window: Option<u64>,
+    /// Optional context budget threshold for sessions using this profile.
+    pub context_budget_threshold: Option<f32>,
+    /// Optional keep-last-N-assistant-messages for sessions using this profile.
+    pub keep_last_n_assistant_messages: Option<usize>,
     /// Unique ID for anonymous telemetry
     pub anonymous_id: Option<String>,
     /// Whether to collect telemetry data
@@ -170,6 +176,9 @@ impl AppConfig {
             subagent: profile_config.subagent,
             system_prompt: profile_config.system_prompt,
             max_turns: profile_config.max_turns,
+            context_window: profile_config.context_window,
+            context_budget_threshold: profile_config.context_budget_threshold,
+            keep_last_n_assistant_messages: profile_config.keep_last_n_assistant_messages,
             anonymous_id: settings.anonymous_id,
             collect_telemetry: settings.collect_telemetry,
             editor: settings.editor,
diff --git a/cli/src/config/profile.rs b/cli/src/config/profile.rs
index 64ee536f1..c15123291 100644
--- a/cli/src/config/profile.rs
+++ b/cli/src/config/profile.rs
@@ -99,6 +99,23 @@ pub struct ProfileConfig {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub max_turns: Option<usize>,
 
+    /// Override the model's context window size (in tokens).
+    /// When set, this value replaces the model's default context window
+    /// for budget and trimming calculations.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub context_window: Option<u64>,
+
+    /// Fraction of the context window at which context trimming triggers.
+    /// Range: 0.1–1.0 (e.g. 0.8 = start trimming at 80% of context window).
+    /// Default: 0.8 (80%).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub context_budget_threshold: Option<f32>,
+
+    /// Number of recent assistant messages to keep untrimmed during context
+    /// trimming. Default: 5.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub keep_last_n_assistant_messages: Option<usize>,
+
     // =========================================================================
     // Legacy model fields - kept for backward compatibility during migration
     // These are read but deprecated (will migrate to 'model' field)
@@ -140,6 +157,9 @@ impl ProfileConfig {
                 recent_models: default.recent_models.clone(),
                 system_prompt: default.system_prompt.clone(),
                 max_turns: default.max_turns,
+                context_window: default.context_window,
+                context_budget_threshold: default.context_budget_threshold,
+                keep_last_n_assistant_messages: default.keep_last_n_assistant_messages,
                 // Enable warden for readonly sandboxed execution
                 warden: Some(WardenConfig::readonly_profile()),
                 // Don't copy allowed_tools/auto_approve - readonly has its own restrictions
@@ -437,6 +457,15 @@ impl ProfileConfig {
             max_turns: self
                 .max_turns
                 .or_else(|| other.and_then(|config| config.max_turns)),
+            context_window: self
+                .context_window
+                .or_else(|| other.and_then(|config| config.context_window)),
+            context_budget_threshold: self
+                .context_budget_threshold
+                .or_else(|| other.and_then(|config| config.context_budget_threshold)),
+            keep_last_n_assistant_messages: self
+                .keep_last_n_assistant_messages
+                .or_else(|| other.and_then(|config| config.keep_last_n_assistant_messages)),
             // Legacy fields - kept for reading only, not merged
             eco_model: None,
             smart_model: None,
diff --git a/cli/src/config/profile_resolver.rs b/cli/src/config/profile_resolver.rs
index 1ddb9ce79..af0bdc06d 100644
--- a/cli/src/config/profile_resolver.rs
+++ b/cli/src/config/profile_resolver.rs
@@ -4,13 +4,16 @@ use stakpak_shared::utils::normalize_optional_string;
 
 use super::AppConfig;
 
-#[derive(Debug, Clone, Default, PartialEq, Eq)]
+#[derive(Debug, Clone, Default, PartialEq)]
 pub(crate) struct ResolvedProfileOverrides {
     pub model: Option<String>,
     pub auto_approve: Option<Vec<String>>,
     pub allowed_tools: Option<Vec<String>>,
     pub system_prompt: Option<String>,
     pub max_turns: Option<usize>,
+    pub context_window: Option<u64>,
+    pub context_budget_threshold: Option<f32>,
+    pub keep_last_n_assistant_messages: Option<usize>,
 }
 
 pub(crate) fn resolve_profile_run_overrides(
@@ -24,12 +27,18 @@ pub(crate) fn resolve_profile_run_overrides(
     let allowed_tools = normalize_tool_list(config.allowed_tools);
     let system_prompt = normalize_optional_string(config.system_prompt);
     let max_turns = config.max_turns;
+    let context_window = config.context_window;
+    let context_budget_threshold = config.context_budget_threshold;
+    let keep_last_n_assistant_messages = config.keep_last_n_assistant_messages;
 
     if model.is_none()
         && auto_approve.is_none()
         && allowed_tools.is_none()
         && system_prompt.is_none()
         && max_turns.is_none()
+        && context_window.is_none()
+        && context_budget_threshold.is_none()
+        && keep_last_n_assistant_messages.is_none()
     {
         return None;
     }
@@ -40,6 +49,9 @@ pub(crate) fn resolve_profile_run_overrides(
         allowed_tools,
         system_prompt,
         max_turns,
+        context_window,
+        context_budget_threshold,
+        keep_last_n_assistant_messages,
     })
 }
 
diff --git a/libs/api/src/client/mod.rs b/libs/api/src/client/mod.rs
index 8683ea732..d2e5b01ed 100644
--- a/libs/api/src/client/mod.rs
+++ b/libs/api/src/client/mod.rs
@@ -61,6 +61,17 @@ pub struct AgentClientConfig {
     pub store_path: Option<String>,
     /// Hook registry for lifecycle events
     pub hook_registry: Option<HookRegistry<AgentState>>,
+    /// How many recent assistant messages to keep untrimmed when context
+    /// trimming is triggered (default: 5).
+    pub keep_last_n_assistant_messages: Option<usize>,
+    /// Fraction of the context window at which trimming triggers
+    /// (e.g. 0.8 = 80%, default: 0.8).
+    pub context_budget_threshold: Option<f32>,
+    /// Override the model's context window size (in tokens).
+    /// When set, replaces the model's built-in `context_window` for budget
+    /// calculations. Useful for local/custom models where the window may
+    /// not be auto-detected correctly.
+    pub context_window: Option<u64>,
 }
 
 impl AgentClientConfig {
@@ -94,6 +105,24 @@ impl AgentClientConfig {
         self.hook_registry = Some(registry);
         self
     }
+
+    /// Set context trimming: number of recent assistant messages to preserve
+    pub fn with_keep_last_n_assistant_messages(mut self, n: usize) -> Self {
+        self.keep_last_n_assistant_messages = Some(n);
+        self
+    }
+
+    /// Set context trimming: budget threshold (0.0–1.0)
+    pub fn with_context_budget_threshold(mut self, threshold: f32) -> Self {
+        self.context_budget_threshold = Some(threshold);
+        self
+    }
+
+    /// Set context window override (in tokens)
+    pub fn with_context_window(mut self, window: u64) -> Self {
+        self.context_window = Some(window);
+        self
+    }
 }
 
 // =============================================================================
@@ -221,8 +250,9 @@ impl AgentClient {
         hook_registry.register(
             LifecycleEvent::BeforeInference,
             Box::new(TaskBoardContextHook::new(TaskBoardContextHookOptions {
-                keep_last_n_assistant_messages: Some(5), // Keep the last 5 assistant messages in context
-                context_budget_threshold: Some(0.8),     // defaults to 0.8 (80%)
+                keep_last_n_assistant_messages: config.keep_last_n_assistant_messages.or(Some(5)),
+                context_budget_threshold: config.context_budget_threshold.or(Some(0.8)),
+                context_window: config.context_window,
             })),
         );
         let hook_registry = Arc::new(hook_registry);
diff --git a/libs/api/src/local/context_managers/task_board_context_manager.rs b/libs/api/src/local/context_managers/task_board_context_manager.rs
index 80a9b96ef..2c7a0154f 100644
--- a/libs/api/src/local/context_managers/task_board_context_manager.rs
+++ b/libs/api/src/local/context_managers/task_board_context_manager.rs
@@ -8,6 +8,7 @@ use stakpak_shared::models::{
 pub struct TaskBoardContextManager {
     keep_last_n_assistant_messages: usize,
     context_budget_threshold: f32,
+    context_window: Option<u64>,
 }
 
 impl super::ContextManager for TaskBoardContextManager {
@@ -466,6 +467,7 @@ mod tests {
         TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 2, // Only keep last 2 assistant messages untrimmed
             context_budget_threshold: 0.8,
+            context_window: None,
         })
     }
 
@@ -1306,6 +1308,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 2,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         // Build: user, assistant, user, assistant, user, user, assistant
@@ -1410,6 +1413,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 2,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         // Build 10 turns of user/assistant
@@ -1514,6 +1518,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 1,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         // Realistic agent flow:
@@ -1618,6 +1623,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 10,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         // Only 3 assistant messages but keep_last_n = 10
@@ -1682,6 +1688,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 0,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         let messages = vec![
@@ -1735,6 +1742,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 1,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         // Use large assistant messages and small user messages so that trimming
@@ -1812,6 +1820,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 2,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         // 10 turns → 20 messages, small window → establishes a trim index
@@ -1848,6 +1857,7 @@ mod tests {
         let cm_generous = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 8,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         let (_, metadata2) = cm_generous.reduce_context_with_budget(messages, 100, metadata1, None);
@@ -1870,6 +1880,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 2,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         // Build a conversation that's just under threshold without tools
@@ -1974,6 +1985,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 3,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         // 5 turns of: user → assistant(tool_call) → tool(result) → assistant(follow-up)
@@ -2080,6 +2092,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 50,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         // Simulate a 200k context window model (like Claude)
@@ -2362,6 +2375,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 50,
             context_budget_threshold: 0.3,
+            context_window: None,
         });
 
         // Simulate a session with 10 turns (20 messages) — well under 50
@@ -2435,6 +2449,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 3,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         // 6 turns: user + assistant with large content.
@@ -2519,6 +2534,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 20,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         // Simulate a session with 30 turns of tool-heavy interaction.
@@ -2651,6 +2667,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 2,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         // Build conversation that exceeds threshold
@@ -2742,6 +2759,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 10,
             context_budget_threshold: 0.8,
+            context_window: None,
         });
 
         let messages = vec![
@@ -2790,6 +2808,7 @@ mod tests {
         let cm = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: 50, // high keep_last_n, like production
             context_budget_threshold: 0.3,
+            context_window: None,
         });
 
         // 5 turns with large assistant responses
@@ -2882,6 +2901,11 @@ pub struct TaskBoardContextManagerOptions {
     pub keep_last_n_assistant_messages: usize,
     /// Fraction of context window at which trimming triggers (e.g., 0.8 = 80%)
     pub context_budget_threshold: f32,
+    /// Override the model's context window size (in tokens).
+    /// When set, replaces the model's built-in `context_window` for budget
+    /// calculations. Useful for local/custom models where the window may
+    /// not be auto-detected correctly.
+    pub context_window: Option<u64>,
 }
 
 impl TaskBoardContextManager {
@@ -2889,6 +2913,7 @@ impl TaskBoardContextManager {
         Self {
             keep_last_n_assistant_messages: options.keep_last_n_assistant_messages,
             context_budget_threshold: options.context_budget_threshold,
+            context_window: options.context_window,
         }
     }
 }
diff --git a/libs/api/src/local/hooks/task_board_context/mod.rs b/libs/api/src/local/hooks/task_board_context/mod.rs
index 797a4dae4..05c3e44d7 100644
--- a/libs/api/src/local/hooks/task_board_context/mod.rs
+++ b/libs/api/src/local/hooks/task_board_context/mod.rs
@@ -21,6 +21,11 @@ pub struct TaskBoardContextHookOptions {
     pub keep_last_n_assistant_messages: Option<usize>,
     /// Fraction of the context window at which trimming triggers (e.g. 0.8 = 80%).
     pub context_budget_threshold: Option<f32>,
+    /// Override the model's context window size (in tokens).
+    /// When set, replaces the model's built-in `context_window` for budget
+    /// calculations. Useful for local/custom models where the window may
+    /// not be auto-detected correctly.
+    pub context_window: Option<u64>,
 }
 
 impl TaskBoardContextHook {
@@ -28,6 +33,7 @@ impl TaskBoardContextHook {
         let context_manager = TaskBoardContextManager::new(TaskBoardContextManagerOptions {
             keep_last_n_assistant_messages: options.keep_last_n_assistant_messages.unwrap_or(50),
             context_budget_threshold: options.context_budget_threshold.unwrap_or(0.8),
+            context_window: options.context_window,
         });
 
         Self { context_manager }
@@ -42,9 +48,16 @@ define_hook!(
             return Ok(HookAction::Continue);
         }
 
-        let model = ctx.state.active_model.clone();
+        let mut model = ctx.state.active_model.clone();
         let max_output_tokens: u64 = 16000;
 
+        // Apply context_window override if configured, so the budget-aware
+        // context trimming uses the user-specified window instead of the
+        // model's built-in limit.
+        if let Some(override_window) = self.context_manager.context_window {
+            model.limit.context = override_window;
+        }
+
         // Subtract fixed overhead from context window so the trimmer budgets
         // only the space actually available for chat messages.
         // - System prompt: added after trimming (line 67+), not in message list
diff --git a/libs/gateway/src/dispatcher.rs b/libs/gateway/src/dispatcher.rs
index 5d3546899..0b1f2a8bf 100644
--- a/libs/gateway/src/dispatcher.rs
+++ b/libs/gateway/src/dispatcher.rs
@@ -3198,6 +3198,7 @@ mod tests {
                     auto_approve: Some(AutoApproveOverride::AllowList(vec!["view".to_string()])),
                     system_prompt: Some("ops prompt".to_string()),
                     max_turns: Some(16),
+                    ..RunOverrides::default()
                 },
             )]),
         };
diff --git a/libs/server/src/openapi.rs b/libs/server/src/openapi.rs
index 6fab0c99e..815f09148 100644
--- a/libs/server/src/openapi.rs
+++ b/libs/server/src/openapi.rs
@@ -206,6 +206,12 @@ pub struct RunOverridesDoc {
     pub auto_approve: Option<AutoApproveOverrideDoc>,
     pub system_prompt: Option<String>,
     pub max_turns: Option<usize>,
+    /// Override the model's context window size in tokens (1000–2000000)
+    pub context_window: Option<u64>,
+    /// Fraction of the context window at which trimming triggers (0.1–1.0)
+    pub context_budget_threshold: Option<f32>,
+    /// Number of recent assistant messages to keep untrimmed during trimming
+    pub keep_last_n_assistant_messages: Option<usize>,
 }
 
 #[derive(Debug, Serialize, Deserialize, ToSchema)]
diff --git a/libs/server/src/routes.rs b/libs/server/src/routes.rs
index 329c2b7fe..63877fe72 100644
--- a/libs/server/src/routes.rs
+++ b/libs/server/src/routes.rs
@@ -250,6 +250,12 @@ const DEFAULT_MAX_TURNS: usize = 64;
 const MIN_MAX_TURNS: usize = 1;
 const MAX_MAX_TURNS: usize = 256;
 const MAX_SYSTEM_PROMPT_CHARS: usize = 32 * 1024;
+const DEFAULT_CONTEXT_BUDGET_THRESHOLD: f32 = 0.8;
+const DEFAULT_KEEP_LAST_N_ASSISTANT_MESSAGES: usize = 5;
+const MIN_CONTEXT_BUDGET_THRESHOLD: f32 = 0.1;
+const MAX_CONTEXT_BUDGET_THRESHOLD: f32 = 1.0;
+const MIN_CONTEXT_WINDOW: u64 = 1_000;
+const MAX_CONTEXT_WINDOW: u64 = 2_000_000;
 
 pub fn router(state: AppState, auth: AuthConfig) -> Router {
     public_router()
@@ -574,12 +580,23 @@ async fn sessions_message_handler(
                 // bypasses validate_session_message_request.
                 .clamp(MIN_MAX_TURNS, MAX_MAX_TURNS);
 
+            let context_window_override = overrides.and_then(|value| value.context_window);
+            let context_budget_threshold = overrides
+                .and_then(|value| value.context_budget_threshold)
+                .unwrap_or(DEFAULT_CONTEXT_BUDGET_THRESHOLD);
+            let keep_last_n_assistant_messages = overrides
+                .and_then(|value| value.keep_last_n_assistant_messages)
+                .unwrap_or(DEFAULT_KEEP_LAST_N_ASSISTANT_MESSAGES);
+
             let run_config = RunConfig {
                 model,
                 inference: state.inference.clone(),
                 tool_approval_policy,
                 system_prompt: system_prompt_override,
                 max_turns,
+                context_window: context_window_override,
+                context_budget_threshold: Some(context_budget_threshold),
+                keep_last_n_assistant_messages: Some(keep_last_n_assistant_messages),
             };
 
             let caller_context = map_caller_context_inputs(request.context.as_deref());
@@ -1098,6 +1115,30 @@ fn validate_session_message_request(request: &SessionMessageRequest) -> Option<R
                 "system_prompt exceeds maximum length",
             ));
         }
+
+        if let Some(context_window) = overrides.context_window
+            && !(MIN_CONTEXT_WINDOW..=MAX_CONTEXT_WINDOW).contains(&context_window)
+        {
+            return Some(api_error(
+                StatusCode::BAD_REQUEST,
+                "invalid_overrides",
+                &format!(
+                    "context_window must be between {MIN_CONTEXT_WINDOW} and {MAX_CONTEXT_WINDOW}"
+                ),
+            ));
+        }
+
+        if let Some(threshold) = overrides.context_budget_threshold
+            && !(MIN_CONTEXT_BUDGET_THRESHOLD..=MAX_CONTEXT_BUDGET_THRESHOLD).contains(&threshold)
+        {
+            return Some(api_error(
+                StatusCode::BAD_REQUEST,
+                "invalid_overrides",
+                &format!(
+                    "context_budget_threshold must be between {MIN_CONTEXT_BUDGET_THRESHOLD} and {MAX_CONTEXT_BUDGET_THRESHOLD}"
+                ),
+            ));
+        }
     }
 
     None
diff --git a/libs/server/src/session_actor.rs b/libs/server/src/session_actor.rs
index 5a4e4b929..cb02a4461 100644
--- a/libs/server/src/session_actor.rs
+++ b/libs/server/src/session_actor.rs
@@ -207,11 +207,18 @@ async fn run_session_actor(
     let mut baseline_messages = initial_messages.clone();
     baseline_messages.push(user_message.clone());
 
+    // Apply context_window override if configured, so the budget-aware context
+    // reducer uses the user-specified window instead of the model's built-in limit.
+    let mut model = run_config.model.clone();
+    if let Some(context_window) = run_config.context_window {
+        model.limit.context = context_window;
+    }
+
     let checkpoint_runtime = Arc::new(CheckpointRuntime::new(
         state.clone(),
         session_id,
         run_id,
-        run_config.model.clone(),
+        model.clone(),
         parent_checkpoint_id,
         baseline_messages,
         initial_metadata.clone(),
@@ -251,9 +258,10 @@ async fn run_session_actor(
     // Use the model's maximum output capacity as the output budget for context
     // window calculations. This is conservative — the actual response may be shorter,
     // but reserving the full limit avoids mid-response context truncation.
-    let max_output_tokens = run_config.model.limit.output as u32;
+    let max_output_tokens = model.limit.output as u32;
+
     let agent_config = AgentConfig {
-        model: run_config.model.clone(),
+        model,
         system_prompt: session_context.system_prompt,
         max_turns: run_config.max_turns,
         max_output_tokens,
@@ -269,7 +277,14 @@ async fn run_session_actor(
     })];
 
     let compactor = PassthroughCompactionEngine;
-    let context_reducer = BudgetAwareContextReducer::new(5, 0.8);
+    let context_reducer = BudgetAwareContextReducer::new(
+        run_config
+            .keep_last_n_assistant_messages
+            .unwrap_or(5),
+        run_config
+            .context_budget_threshold
+            .unwrap_or(0.8),
+    );
     let run_context = build_run_context(session_id, run_id);
 
     let run_result = run_agent(
diff --git a/libs/server/src/types.rs b/libs/server/src/types.rs
index 9969df98b..d7a9457ff 100644
--- a/libs/server/src/types.rs
+++ b/libs/server/src/types.rs
@@ -33,6 +33,15 @@ pub struct RunConfig {
     pub tool_approval_policy: ToolApprovalPolicy,
     pub system_prompt: Option<String>,
     pub max_turns: usize,
+    /// Override the model's context window size (in tokens).
+    /// When set, replaces the model's default `limit.context` for budget calculations.
+    pub context_window: Option<u64>,
+    /// Fraction of the context window at which context trimming triggers (0.0–1.0).
+    /// Default when not set: 0.8.
+    pub context_budget_threshold: Option<f32>,
+    /// Number of most recent assistant messages to keep untrimmed during trimming.
+    /// Default when not set: 5.
+    pub keep_last_n_assistant_messages: Option<usize>,
 }
 
 impl std::fmt::Debug for RunConfig {
@@ -42,6 +51,12 @@ impl std::fmt::Debug for RunConfig {
             .field("tool_approval_policy", &self.tool_approval_policy)
             .field("system_prompt", &self.system_prompt)
             .field("max_turns", &self.max_turns)
+            .field("context_window", &self.context_window)
+            .field("context_budget_threshold", &self.context_budget_threshold)
+            .field(
+                "keep_last_n_assistant_messages",
+                &self.keep_last_n_assistant_messages,
+            )
             .field("inference", &"<opaque>")
             .finish()
     }
diff --git a/libs/shared/src/models/overrides.rs b/libs/shared/src/models/overrides.rs
index 44ea6cd0c..1050ce833 100644
--- a/libs/shared/src/models/overrides.rs
+++ b/libs/shared/src/models/overrides.rs
@@ -10,7 +10,7 @@ pub enum AutoApproveOverride {
 }
 
 /// Per-request run overrides merged with runtime defaults.
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
+#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
 pub struct RunOverrides {
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub model: Option<String>,
@@ -20,6 +20,22 @@ pub struct RunOverrides {
     pub system_prompt: Option<String>,
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub max_turns: Option<usize>,
+    /// Override the model's context window size (in tokens).
+    /// When set, this value replaces the model's default context window
+    /// for budget and trimming calculations.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub context_window: Option<u64>,
+    /// Fraction of the context window at which context trimming triggers.
+    /// Range: 0.0–1.0 (e.g. 0.8 = start trimming at 80% of context window).
+    /// Default when not set: 0.8.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub context_budget_threshold: Option<f32>,
+    /// Number of most recent assistant messages to keep untrimmed when
+    /// context trimming is triggered. Only assistant and tool messages are
+    /// trimmed; user and system messages are always preserved in full.
+    /// Default when not set: 5.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub keep_last_n_assistant_messages: Option<usize>,
 }
 
 impl RunOverrides {
@@ -28,6 +44,9 @@ impl RunOverrides {
             && self.auto_approve.is_none()
             && self.system_prompt.is_none()
             && self.max_turns.is_none()
+            && self.context_window.is_none()
+            && self.context_budget_threshold.is_none()
+            && self.keep_last_n_assistant_messages.is_none()
     }
 }
 
@@ -63,10 +82,28 @@ mod tests {
             ])),
             system_prompt: Some("hello".to_string()),
             max_turns: Some(24),
+            context_window: Some(200_000),
+            context_budget_threshold: Some(0.7),
+            keep_last_n_assistant_messages: Some(10),
         };
 
         let encoded = serde_json::to_string(&overrides).expect("serialize overrides");
         let decoded: RunOverrides = serde_json::from_str(&encoded).expect("deserialize overrides");
         assert_eq!(decoded, overrides);
     }
+
+    #[test]
+    fn run_overrides_partial_context_fields() {
+        // Verify PartialEq works correctly when only some fields are set
+        let a = RunOverrides {
+            context_window: Some(100_000),
+            ..RunOverrides::default()
+        };
+        let b = RunOverrides {
+            context_window: Some(100_000),
+            ..RunOverrides::default()
+        };
+        assert_eq!(a, b);
+        assert!(!a.is_empty());
+    }
 }