From 73c90da3add389a1d496d9ba84bf8fe30b977ce0 Mon Sep 17 00:00:00 2001 From: Lukin Date: Sat, 4 Jul 2026 22:52:39 +0800 Subject: [PATCH 1/3] Route cheap low-risk turns to economy Co-Authored-By: Codex --- config/classifier.yaml | 10 ++ implementation-notes.md | 8 ++ .../src/classifier/golden-routing.test.ts | 42 ++++++++ .../core/src/classifier/overrides.test.ts | 97 +++++++++++++++++++ packages/core/src/classifier/overrides.ts | 70 ++++++++++++- .../src/config/classifier-schema.test.ts | 12 +++ .../shared/src/config/classifier-schema.ts | 8 ++ 7 files changed, 245 insertions(+), 2 deletions(-) diff --git a/config/classifier.yaml b/config/classifier.yaml index d322c45c..17675718 100644 --- a/config/classifier.yaml +++ b/config/classifier.yaml @@ -184,6 +184,16 @@ classifier: low_cost_automation: intent_markers: ["[cron:", "chief-monitor", "architect-monitor", "MONITOR.md"] no_reply_markers: ["NO_REPLY", "nothing to action", "无事不回复", "没事不回复", "无需回复"] + # Cheap-model low-risk keeps short current-turn read/check/status requests + # on the cheap lane when the client explicitly asked for a cheap model, + # even if the transcript carries a large history or many tools. It is + # scoped to the LAST user turn and excludes code-changing / JSON / vision + # requests, so explicit heavy-model or edit/debug work is not down-routed. + cheap_model_low_risk: + requested_model_markers: ["gpt-5.4-mini", "gpt-5.4-mini-*", "spark"] + current_turn_max_chars: 300 + low_risk_markers: ["check", "inspect", "status", "health", "read", "list", "show", "summarize", "report", "monitor", "look at", "see if", "查看", "检查", "状态", "健康", "读取", "列出", "展示", "总结", "报告", "监控"] + blocked_markers: ["debug", "fix", "implement", "refactor", "modify", "edit", "patch", "write", "create", "generate", "delete", "deploy", "release", "security", "vulnerability", "exploit", "prove", "derive", "calculate", "solve", "调试", "修复", "实现", "重构", "修改", "编辑", "补丁", "编写", "创建", "生成", "删除", "部署", "发布", "安全", "漏洞", "利用", "证明", "推导", "计算", "求解"] formal_logic_keywords: ["⊢", "∴", "modus ponens", "first-order logic", "充分必要", "反证法", "归纳法"] tools_floor: standard long_context_token_threshold: 64000 # capability signal (NOT a task_type); bumped 50k->64k to match llm-router lanes.yaml long_context anchor diff --git a/implementation-notes.md b/implementation-notes.md index 05d17bd3..26acd64c 100644 --- a/implementation-notes.md +++ b/implementation-notes.md @@ -7,6 +7,14 @@ --- +## 2026-07-04 · cheap-model 短低风险当前轮不被长历史抬价(Classifier / routing,docs/03/04,原则 2/4/5) + +- **背景(Lukin)**:生产 `openclaw` 24h 数据显示,部分请求显式请求 `gpt-5.4-mini` / cheap alias,最后一条 user 只有约 200 字且是 read/check/status 类低风险动作,但因为完整 transcript 很大、tools 很多,被 Layer-1 长上下文/工具信号抬到 `balanced/coding`,最终使用 `gpt-5.5`。 +- **规则决策**:新增配置化 override `cheap_model_low_risk`。只有 `requested_model` 命中配置 cheap markers、最后一条 user 在 `current_turn_max_chars` 内、命中低风险 marker、且没有 blocked marker 时,才把 complexity 设为 `simple`。判断只看当前 user turn,不用历史正文。 +- **安全边界**:显式 `gpt-5.5` 请求不受影响;JSON response_format、vision attachment、代码变更/调试/部署/安全/数学等 blocked marker 均不触发。真正的窗口 fit 和 provider 能力仍交给后续 capability filter / fallback。 +- **配置取舍**:默认 markers 包含 `gpt-5.4-mini` / `gpt-5.4-mini-*` / `spark`,但没有把 `gpt54` 当成 economy hint,避免把用户想要的中档模型继续压到 mini。 +- **验证计划**:新增 override 单测和 golden routing 回归,覆盖 cheap-model 短低风险长历史降到 economy、重模型请求不降级、code-changing/JSON/vision 不触发;再跑 targeted Vitest、typecheck、build。 + ## 2026-07-04 · 视觉上下文压缩以 observe/off 为默认接入(Request optimizer / native Anthropic,docs/04/05/07/11,原则 1/3/5/7/8) - **背景(Lukin)**:pxpipe 证明“密集文本渲成图片”在特定模型/价格结构下可能降低输入 token 成本,但该方法不是无损压缩;模型视觉路径会丢失精确字符串、数字、路径、hash、ID 等细节。 diff --git a/packages/core/src/classifier/golden-routing.test.ts b/packages/core/src/classifier/golden-routing.test.ts index 621bca53..e03da928 100644 --- a/packages/core/src/classifier/golden-routing.test.ts +++ b/packages/core/src/classifier/golden-routing.test.ts @@ -649,3 +649,45 @@ describe("GOLDEN: cron monitor automation stays cheap", () => { expect(got.selected_lane).toBe("economy"); }); }); + +// PROD regression (openclaw key, 2026-07-04): short "check/status" current +// turns that explicitly request a cheap model can carry a huge transcript. The +// current turn is low-risk, so long-history/tools signals must not upgrade it to +// gpt-5.5. Explicit heavy-model requests remain untouched. +describe("GOLDEN: cheap-model low-risk current turns stay cheap", () => { + it("short low-risk gpt-5.4-mini request with long history -> economy", async () => { + const request = req("Please check the current status and report anything notable.", { + requested_model: "gpt-5.4-mini", + messages: [ + { role: "assistant", content: "prior repository context ".repeat(18_000) }, + { role: "user", content: "Please check the current status and report anything notable." }, + ], + tools: Array.from({ length: 37 }, (_value, index) => ({ + type: "function", + function: { name: `tool_${index}` }, + })), + }); + + const got = await decide(request); + expect(got.task_type).toBe("chat"); + expect(got.complexity).toBe("simple"); + expect(got.selected_lane).toBe("economy"); + }); + + it("explicit gpt-5.5 request with the same low-risk turn is not down-routed", async () => { + const request = req("Please check the current status and report anything notable.", { + requested_model: "gpt-5.5", + messages: [ + { role: "assistant", content: "prior repository context ".repeat(18_000) }, + { role: "user", content: "Please check the current status and report anything notable." }, + ], + tools: Array.from({ length: 37 }, (_value, index) => ({ + type: "function", + function: { name: `tool_${index}` }, + })), + }); + + const got = await decide(request); + expect(got.selected_lane).not.toBe("economy"); + }); +}); diff --git a/packages/core/src/classifier/overrides.test.ts b/packages/core/src/classifier/overrides.test.ts index 0ed190e5..3357290e 100644 --- a/packages/core/src/classifier/overrides.test.ts +++ b/packages/core/src/classifier/overrides.test.ts @@ -29,15 +29,24 @@ function makeConfig( function req(opts: { content?: string; messages?: { role: string; content: unknown }[]; + requested_model?: string; + response_format?: Record | null; + attachments?: unknown[] | null; tools?: unknown[] | null; max_tokens?: number | null; }): { messages: { role: string; content: unknown }[]; + requested_model: string; + response_format: Record | null; + attachments: unknown[] | null; tools: unknown[] | null; max_tokens: number | null; } { return { messages: opts.messages ?? [{ role: "user", content: opts.content ?? "" }], + requested_model: opts.requested_model ?? "auto", + response_format: opts.response_format ?? null, + attachments: opts.attachments ?? null, tools: opts.tools ?? null, max_tokens: opts.max_tokens ?? null, }; @@ -135,6 +144,94 @@ describe("evaluateOverrides — low-cost automation (set → simple)", () => { }); }); +describe("evaluateOverrides — cheap model low-risk current turn (set → simple)", () => { + const cheapModelLowRisk = { + requested_model_markers: ["gpt-5.4-mini", "spark"], + current_turn_max_chars: 300, + low_risk_markers: ["check", "inspect", "status", "read"], + blocked_markers: ["debug", "fix", "implement", "refactor", "patch"], + }; + + it("pins a short low-risk cheap-model request to simple despite long history/tools", () => { + const cfg = makeConfig({ + cheap_model_low_risk: cheapModelLowRisk, + long_context_token_threshold: 1_000, + }); + const hits = evaluateOverrides( + req({ + requested_model: "gpt-5.4-mini", + messages: [ + { role: "assistant", content: "prior implementation details ".repeat(10_000) }, + { role: "user", content: "Please check the current status and report anything notable." }, + ], + tools: [{ name: "shell_exec" }], + }), + cfg, + 70_000, + ); + expect(hits).toContainEqual({ + rule: "cheap_model_low_risk", + kind: "set", + complexity: "simple", + }); + expect(hits).toContainEqual({ rule: "tools_floor", kind: "floor", complexity: "standard" }); + expect(hits).toContainEqual({ rule: "long_context", kind: "floor", complexity: "complex" }); + expect(applyOverrides("complex", hits)).toBe("simple"); + }); + + it("does not fire for explicit heavy-model requests", () => { + const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk }); + const hits = evaluateOverrides( + req({ + requested_model: "gpt-5.5", + content: "Please check the current status and report anything notable.", + }), + cfg, + 20, + ); + expect(hits.find((h) => h.rule === "cheap_model_low_risk")).toBeUndefined(); + }); + + it("does not fire for code-changing current turns", () => { + const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk }); + const hits = evaluateOverrides( + req({ + requested_model: "gpt-5.4-mini", + content: "Please inspect this bug, fix the function, and patch the tests.", + }), + cfg, + 20, + ); + expect(hits.find((h) => h.rule === "cheap_model_low_risk")).toBeUndefined(); + }); + + it("does not fire when JSON or vision constraints are present", () => { + const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk }); + expect( + evaluateOverrides( + req({ + requested_model: "gpt-5.4-mini", + content: "Please check this and return structured fields.", + response_format: { type: "json_schema" }, + }), + cfg, + 20, + ).find((h) => h.rule === "cheap_model_low_risk"), + ).toBeUndefined(); + expect( + evaluateOverrides( + req({ + requested_model: "gpt-5.4-mini", + content: "Please inspect this screenshot status.", + attachments: [{ type: "image" }], + }), + cfg, + 20, + ).find((h) => h.rule === "cheap_model_low_risk"), + ).toBeUndefined(); + }); +}); + describe("evaluateOverrides — tools floor (floor → standard)", () => { it("raises a trivial tools request to the standard floor (edge)", () => { const cfg = makeConfig(); diff --git a/packages/core/src/classifier/overrides.ts b/packages/core/src/classifier/overrides.ts index b3be495f..e6ddba66 100644 --- a/packages/core/src/classifier/overrides.ts +++ b/packages/core/src/classifier/overrides.ts @@ -16,7 +16,7 @@ import type { Complexity } from "./tiers.js"; export type OverrideKind = "set" | "floor"; export interface OverrideHit { - /** "heartbeat" | "exact_confirmation" | "low_cost_automation" | "formal_logic" | "tools_floor" | "long_context" | "short_message" */ + /** "heartbeat" | "exact_confirmation" | "low_cost_automation" | "cheap_model_low_risk" | "formal_logic" | "tools_floor" | "long_context" | "short_message" */ rule: string; kind: OverrideKind; complexity: Complexity; @@ -31,7 +31,10 @@ const RANK: Record = Object.fromEntries( ORDER.map((tier, i) => [tier, i]), ) as Record; -type OverrideInput = Pick; +type OverrideInput = Pick< + InternalRequest, + "messages" | "requested_model" | "response_format" | "attachments" | "tools" | "max_tokens" +>; // Returns ALL matching overrides (possibly empty). The engine decides the final // tier via `applyOverrides`. This separation keeps detection pure and lets the @@ -75,6 +78,15 @@ export function evaluateOverrides( hits.push({ rule: "low_cost_automation", kind: "set", complexity: "simple" }); } + // Cheap-model low-risk current turn: a client explicitly requested a cheap + // model for a short read/check/status turn, but the transcript may carry a very + // large history and many tools. Scope the signal to the current user turn and + // require the requested model + low-risk marker so generic long-context work is + // not silently down-routed. + if (isCheapModelLowRiskTurn(req, lastUserText, cfg)) { + hits.push({ rule: "cheap_model_low_risk", kind: "set", complexity: "simple" }); + } + // Short-message shortcut: a tiny last user message with NO complex structural // signal (no code block / stack trace / over-long body) is trivially simple. if (isShortAndSimple(lastUserText, ov.short_message_max_chars, cfg)) { @@ -138,6 +150,43 @@ function isShortAndSimple(text: string, maxChars: number, cfg: ClassifierRulesCo return true; } +function isCheapModelLowRiskTurn( + req: OverrideInput, + text: string, + cfg: ClassifierRulesConfig, +): boolean { + const cheap = cfg.overrides.cheap_model_low_risk; + if (cheap.requested_model_markers.length === 0 || cheap.low_risk_markers.length === 0) { + return false; + } + if (!modelMatches(req.requested_model, cheap.requested_model_markers)) return false; + if (isJsonResponseFormat(req.response_format)) return false; + if (hasImageAttachment(req.attachments)) return false; + + const trimmed = text.trim(); + if (trimmed.length === 0 || trimmed.length > cheap.current_turn_max_chars) return false; + if (detectCodeBlock(trimmed) > 0) return false; + if (detectStackTrace(trimmed) > 0) return false; + if (containsAny(trimmed, cheap.blocked_markers)) return false; + return containsAny(trimmed, cheap.low_risk_markers); +} + +function modelMatches(model: string, markers: string[]): boolean { + const normalized = model.trim().toLowerCase(); + if (normalized.length === 0) return false; + return markers.some((marker) => { + const m = marker.trim().toLowerCase(); + if (m.length === 0) return false; + if (m.includes("*")) return globToRegExp(m).test(normalized); + return normalized === m; + }); +} + +function globToRegExp(glob: string): RegExp { + const escaped = glob.replace(/[.+?^${}()|[\]\\]/gu, "\\$&").replace(/\*/gu, ".*"); + return new RegExp(`^${escaped}$`, "u"); +} + // The short-message disqualifier must see BOTH the English signal dimensions and // their international (*_intl_kw) counterparts — otherwise a short Chinese analysis/ // security/diagnostic prompt ("分析这个系统的根因") is wrongly force-pinned `simple` @@ -171,6 +220,23 @@ function containsAny(text: string, keywords: string[]): boolean { return keywords.some((kw) => kw.length > 0 && haystack.includes(kw.toLowerCase())); } +function isJsonResponseFormat(rf: OverrideInput["response_format"]): boolean { + if (!isRecord(rf)) return false; + const t = rf.type; + return typeof t === "string" && (t === "json_object" || t === "json_schema"); +} + +function hasImageAttachment(attachments: OverrideInput["attachments"]): boolean { + if (!Array.isArray(attachments) || attachments.length === 0) return false; + for (const att of attachments) { + if (!isRecord(att)) return true; + const t = att.type; + if (typeof t !== "string") return true; + if (t === "image" || t === "image_url" || t.startsWith("image")) return true; + } + return false; +} + function normalizeUtterance(text: string): string { return text .trim() diff --git a/packages/shared/src/config/classifier-schema.test.ts b/packages/shared/src/config/classifier-schema.test.ts index e1e43fd1..21cf37a3 100644 --- a/packages/shared/src/config/classifier-schema.test.ts +++ b/packages/shared/src/config/classifier-schema.test.ts @@ -26,6 +26,12 @@ function fullClassifier() { intent_markers: ["[cron:", "MONITOR.md"], no_reply_markers: ["NO_REPLY"], }, + cheap_model_low_risk: { + requested_model_markers: ["gpt-5.4-mini", "spark"], + current_turn_max_chars: 300, + low_risk_markers: ["check", "status"], + blocked_markers: ["fix", "implement"], + }, formal_logic_keywords: ["⊢"], tools_floor: "standard", long_context_token_threshold: 64000, @@ -99,6 +105,12 @@ describe("ClassifierConfigSchema", () => { intent_markers: [], no_reply_markers: [], }); + expect(parsed.overrides.cheap_model_low_risk).toEqual({ + requested_model_markers: [], + current_turn_max_chars: 300, + low_risk_markers: [], + blocked_markers: [], + }); expect(parsed.overrides.tools_floor).toBe("standard"); }); diff --git a/packages/shared/src/config/classifier-schema.ts b/packages/shared/src/config/classifier-schema.ts index 0d73d953..b0646a65 100644 --- a/packages/shared/src/config/classifier-schema.ts +++ b/packages/shared/src/config/classifier-schema.ts @@ -52,6 +52,14 @@ export const ClassifierRulesConfigSchema = z.object({ no_reply_markers: z.array(z.string()).default([]), }) .prefault({}), + cheap_model_low_risk: z + .object({ + requested_model_markers: z.array(z.string()).default([]), + current_turn_max_chars: z.number().int().positive().default(300), + low_risk_markers: z.array(z.string()).default([]), + blocked_markers: z.array(z.string()).default([]), + }) + .prefault({}), formal_logic_keywords: z.array(z.string()).default([]), tools_floor: TierSchema.default("standard"), long_context_token_threshold: z.number().int().positive().default(64_000), From 9de122c4fa1cfb5981410eb7ae4c0d9ecd889395 Mon Sep 17 00:00:00 2001 From: Lukin Date: Sat, 4 Jul 2026 23:05:11 +0800 Subject: [PATCH 2/3] Add cheap model aliases to low-risk routing Co-Authored-By: Codex --- config/classifier.yaml | 2 +- implementation-notes.md | 2 +- .../src/classifier/golden-routing.test.ts | 23 ++++++++++ .../core/src/classifier/overrides.test.ts | 42 ++++++++++++++++++- .../src/config/classifier-schema.test.ts | 8 +++- 5 files changed, 73 insertions(+), 4 deletions(-) diff --git a/config/classifier.yaml b/config/classifier.yaml index 17675718..fb32d473 100644 --- a/config/classifier.yaml +++ b/config/classifier.yaml @@ -190,7 +190,7 @@ classifier: # scoped to the LAST user turn and excludes code-changing / JSON / vision # requests, so explicit heavy-model or edit/debug work is not down-routed. cheap_model_low_risk: - requested_model_markers: ["gpt-5.4-mini", "gpt-5.4-mini-*", "spark"] + requested_model_markers: ["economy", "gpt-5.4-mini", "gpt-5.4-mini-*", "spark", "deepseek-v4-flash", "deepseek/deepseek-v4-flash", "openrouter/deepseek-v4-flash", "claude-haiku", "claude-haiku-*", "claude-3-5-haiku", "claude-3-5-haiku-*", "anthropic/claude-haiku-*", "zenmux-anthropic/claude-haiku-*"] current_turn_max_chars: 300 low_risk_markers: ["check", "inspect", "status", "health", "read", "list", "show", "summarize", "report", "monitor", "look at", "see if", "查看", "检查", "状态", "健康", "读取", "列出", "展示", "总结", "报告", "监控"] blocked_markers: ["debug", "fix", "implement", "refactor", "modify", "edit", "patch", "write", "create", "generate", "delete", "deploy", "release", "security", "vulnerability", "exploit", "prove", "derive", "calculate", "solve", "调试", "修复", "实现", "重构", "修改", "编辑", "补丁", "编写", "创建", "生成", "删除", "部署", "发布", "安全", "漏洞", "利用", "证明", "推导", "计算", "求解"] diff --git a/implementation-notes.md b/implementation-notes.md index 26acd64c..293abbe2 100644 --- a/implementation-notes.md +++ b/implementation-notes.md @@ -12,7 +12,7 @@ - **背景(Lukin)**:生产 `openclaw` 24h 数据显示,部分请求显式请求 `gpt-5.4-mini` / cheap alias,最后一条 user 只有约 200 字且是 read/check/status 类低风险动作,但因为完整 transcript 很大、tools 很多,被 Layer-1 长上下文/工具信号抬到 `balanced/coding`,最终使用 `gpt-5.5`。 - **规则决策**:新增配置化 override `cheap_model_low_risk`。只有 `requested_model` 命中配置 cheap markers、最后一条 user 在 `current_turn_max_chars` 内、命中低风险 marker、且没有 blocked marker 时,才把 complexity 设为 `simple`。判断只看当前 user turn,不用历史正文。 - **安全边界**:显式 `gpt-5.5` 请求不受影响;JSON response_format、vision attachment、代码变更/调试/部署/安全/数学等 blocked marker 均不触发。真正的窗口 fit 和 provider 能力仍交给后续 capability filter / fallback。 -- **配置取舍**:默认 markers 包含 `gpt-5.4-mini` / `gpt-5.4-mini-*` / `spark`,但没有把 `gpt54` 当成 economy hint,避免把用户想要的中档模型继续压到 mini。 +- **配置取舍**:默认 markers 包含 `economy`、`gpt-5.4-mini` / `gpt-5.4-mini-*` / `spark`、`deepseek-v4-flash`、`claude-haiku` 及其主要 provider/dating 形态,但没有把 `gpt54` 当成 economy hint,避免把用户想要的中档模型继续压到 mini。 - **验证计划**:新增 override 单测和 golden routing 回归,覆盖 cheap-model 短低风险长历史降到 economy、重模型请求不降级、code-changing/JSON/vision 不触发;再跑 targeted Vitest、typecheck、build。 ## 2026-07-04 · 视觉上下文压缩以 observe/off 为默认接入(Request optimizer / native Anthropic,docs/04/05/07/11,原则 1/3/5/7/8) diff --git a/packages/core/src/classifier/golden-routing.test.ts b/packages/core/src/classifier/golden-routing.test.ts index e03da928..1a8d38fc 100644 --- a/packages/core/src/classifier/golden-routing.test.ts +++ b/packages/core/src/classifier/golden-routing.test.ts @@ -674,6 +674,29 @@ describe("GOLDEN: cheap-model low-risk current turns stay cheap", () => { expect(got.selected_lane).toBe("economy"); }); + it.each([ + "economy", + "deepseek-v4-flash", + "claude-haiku", + ])("short low-risk %s request with long history -> economy", async (requestedModel) => { + const request = req("Please check the current status and report anything notable.", { + requested_model: requestedModel, + messages: [ + { role: "assistant", content: "prior repository context ".repeat(18_000) }, + { role: "user", content: "Please check the current status and report anything notable." }, + ], + tools: Array.from({ length: 37 }, (_value, index) => ({ + type: "function", + function: { name: `tool_${index}` }, + })), + }); + + const got = await decide(request); + expect(got.task_type).toBe("chat"); + expect(got.complexity).toBe("simple"); + expect(got.selected_lane).toBe("economy"); + }); + it("explicit gpt-5.5 request with the same low-risk turn is not down-routed", async () => { const request = req("Please check the current status and report anything notable.", { requested_model: "gpt-5.5", diff --git a/packages/core/src/classifier/overrides.test.ts b/packages/core/src/classifier/overrides.test.ts index 3357290e..bf35d898 100644 --- a/packages/core/src/classifier/overrides.test.ts +++ b/packages/core/src/classifier/overrides.test.ts @@ -146,7 +146,20 @@ describe("evaluateOverrides — low-cost automation (set → simple)", () => { describe("evaluateOverrides — cheap model low-risk current turn (set → simple)", () => { const cheapModelLowRisk = { - requested_model_markers: ["gpt-5.4-mini", "spark"], + requested_model_markers: [ + "economy", + "gpt-5.4-mini", + "spark", + "deepseek-v4-flash", + "deepseek/deepseek-v4-flash", + "openrouter/deepseek-v4-flash", + "claude-haiku", + "claude-haiku-*", + "claude-3-5-haiku", + "claude-3-5-haiku-*", + "anthropic/claude-haiku-*", + "zenmux-anthropic/claude-haiku-*", + ], current_turn_max_chars: 300, low_risk_markers: ["check", "inspect", "status", "read"], blocked_markers: ["debug", "fix", "implement", "refactor", "patch"], @@ -179,6 +192,33 @@ describe("evaluateOverrides — cheap model low-risk current turn (set → simpl expect(applyOverrides("complex", hits)).toBe("simple"); }); + it.each([ + "economy", + "deepseek-v4-flash", + "deepseek/deepseek-v4-flash", + "openrouter/deepseek-v4-flash", + "claude-haiku", + "claude-haiku-4-5-20251001", + "claude-3-5-haiku-20241022", + "anthropic/claude-haiku-4-5-20251001", + "zenmux-anthropic/claude-haiku-4.5", + ])("treats %s as a cheap-model hint", (requestedModel) => { + const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk }); + const hits = evaluateOverrides( + req({ + requested_model: requestedModel, + content: "Please check the current status and report anything notable.", + }), + cfg, + 20, + ); + expect(hits).toContainEqual({ + rule: "cheap_model_low_risk", + kind: "set", + complexity: "simple", + }); + }); + it("does not fire for explicit heavy-model requests", () => { const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk }); const hits = evaluateOverrides( diff --git a/packages/shared/src/config/classifier-schema.test.ts b/packages/shared/src/config/classifier-schema.test.ts index 21cf37a3..1990f328 100644 --- a/packages/shared/src/config/classifier-schema.test.ts +++ b/packages/shared/src/config/classifier-schema.test.ts @@ -27,7 +27,13 @@ function fullClassifier() { no_reply_markers: ["NO_REPLY"], }, cheap_model_low_risk: { - requested_model_markers: ["gpt-5.4-mini", "spark"], + requested_model_markers: [ + "economy", + "gpt-5.4-mini", + "spark", + "deepseek-v4-flash", + "claude-haiku", + ], current_turn_max_chars: 300, low_risk_markers: ["check", "status"], blocked_markers: ["fix", "implement"], From 0eb5a1b2e0d29fc41839b29d9cf041ea3338e627 Mon Sep 17 00:00:00 2001 From: Lukin Date: Sat, 4 Jul 2026 23:15:14 +0800 Subject: [PATCH 3/3] Simplify cheap model marker globs Co-Authored-By: Codex --- config/classifier.yaml | 2 +- implementation-notes.md | 2 +- packages/core/src/classifier/overrides.test.ts | 10 +++------- packages/shared/src/config/classifier-schema.test.ts | 3 ++- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/config/classifier.yaml b/config/classifier.yaml index fb32d473..0be9287a 100644 --- a/config/classifier.yaml +++ b/config/classifier.yaml @@ -190,7 +190,7 @@ classifier: # scoped to the LAST user turn and excludes code-changing / JSON / vision # requests, so explicit heavy-model or edit/debug work is not down-routed. cheap_model_low_risk: - requested_model_markers: ["economy", "gpt-5.4-mini", "gpt-5.4-mini-*", "spark", "deepseek-v4-flash", "deepseek/deepseek-v4-flash", "openrouter/deepseek-v4-flash", "claude-haiku", "claude-haiku-*", "claude-3-5-haiku", "claude-3-5-haiku-*", "anthropic/claude-haiku-*", "zenmux-anthropic/claude-haiku-*"] + requested_model_markers: ["economy", "gpt-5.4-mini", "gpt-5.4-mini-*", "spark", "*deepseek-v4-flash", "claude-haiku", "*claude-haiku-*", "claude-3-5-haiku", "*claude-3-5-haiku-*"] current_turn_max_chars: 300 low_risk_markers: ["check", "inspect", "status", "health", "read", "list", "show", "summarize", "report", "monitor", "look at", "see if", "查看", "检查", "状态", "健康", "读取", "列出", "展示", "总结", "报告", "监控"] blocked_markers: ["debug", "fix", "implement", "refactor", "modify", "edit", "patch", "write", "create", "generate", "delete", "deploy", "release", "security", "vulnerability", "exploit", "prove", "derive", "calculate", "solve", "调试", "修复", "实现", "重构", "修改", "编辑", "补丁", "编写", "创建", "生成", "删除", "部署", "发布", "安全", "漏洞", "利用", "证明", "推导", "计算", "求解"] diff --git a/implementation-notes.md b/implementation-notes.md index 293abbe2..2473f198 100644 --- a/implementation-notes.md +++ b/implementation-notes.md @@ -12,7 +12,7 @@ - **背景(Lukin)**:生产 `openclaw` 24h 数据显示,部分请求显式请求 `gpt-5.4-mini` / cheap alias,最后一条 user 只有约 200 字且是 read/check/status 类低风险动作,但因为完整 transcript 很大、tools 很多,被 Layer-1 长上下文/工具信号抬到 `balanced/coding`,最终使用 `gpt-5.5`。 - **规则决策**:新增配置化 override `cheap_model_low_risk`。只有 `requested_model` 命中配置 cheap markers、最后一条 user 在 `current_turn_max_chars` 内、命中低风险 marker、且没有 blocked marker 时,才把 complexity 设为 `simple`。判断只看当前 user turn,不用历史正文。 - **安全边界**:显式 `gpt-5.5` 请求不受影响;JSON response_format、vision attachment、代码变更/调试/部署/安全/数学等 blocked marker 均不触发。真正的窗口 fit 和 provider 能力仍交给后续 capability filter / fallback。 -- **配置取舍**:默认 markers 包含 `economy`、`gpt-5.4-mini` / `gpt-5.4-mini-*` / `spark`、`deepseek-v4-flash`、`claude-haiku` 及其主要 provider/dating 形态,但没有把 `gpt54` 当成 economy hint,避免把用户想要的中档模型继续压到 mini。 +- **配置取舍**:默认 markers 包含 `economy`、`gpt-5.4-mini` / `gpt-5.4-mini-*` / `spark`,并用 `*deepseek-v4-flash` / `*claude-haiku-*` / `*claude-3-5-haiku-*` 覆盖 bare、provider-prefixed 与 dated 形态;但没有把 `gpt54` 当成 economy hint,避免把用户想要的中档模型继续压到 mini。 - **验证计划**:新增 override 单测和 golden routing 回归,覆盖 cheap-model 短低风险长历史降到 economy、重模型请求不降级、code-changing/JSON/vision 不触发;再跑 targeted Vitest、typecheck、build。 ## 2026-07-04 · 视觉上下文压缩以 observe/off 为默认接入(Request optimizer / native Anthropic,docs/04/05/07/11,原则 1/3/5/7/8) diff --git a/packages/core/src/classifier/overrides.test.ts b/packages/core/src/classifier/overrides.test.ts index bf35d898..66eb3554 100644 --- a/packages/core/src/classifier/overrides.test.ts +++ b/packages/core/src/classifier/overrides.test.ts @@ -150,15 +150,11 @@ describe("evaluateOverrides — cheap model low-risk current turn (set → simpl "economy", "gpt-5.4-mini", "spark", - "deepseek-v4-flash", - "deepseek/deepseek-v4-flash", - "openrouter/deepseek-v4-flash", + "*deepseek-v4-flash", "claude-haiku", - "claude-haiku-*", + "*claude-haiku-*", "claude-3-5-haiku", - "claude-3-5-haiku-*", - "anthropic/claude-haiku-*", - "zenmux-anthropic/claude-haiku-*", + "*claude-3-5-haiku-*", ], current_turn_max_chars: 300, low_risk_markers: ["check", "inspect", "status", "read"], diff --git a/packages/shared/src/config/classifier-schema.test.ts b/packages/shared/src/config/classifier-schema.test.ts index 1990f328..6d564163 100644 --- a/packages/shared/src/config/classifier-schema.test.ts +++ b/packages/shared/src/config/classifier-schema.test.ts @@ -31,8 +31,9 @@ function fullClassifier() { "economy", "gpt-5.4-mini", "spark", - "deepseek-v4-flash", + "*deepseek-v4-flash", "claude-haiku", + "*claude-haiku-*", ], current_turn_max_chars: 300, low_risk_markers: ["check", "status"],