From 73c90da3add389a1d496d9ba84bf8fe30b977ce0 Mon Sep 17 00:00:00 2001
From: Lukin <mylukin@gmail.com>
Date: Sat, 4 Jul 2026 22:52:39 +0800
Subject: [PATCH 1/3] Route cheap low-risk turns to economy

Co-Authored-By: Codex <noreply@openai.com>
---
 config/classifier.yaml                        | 10 ++
 implementation-notes.md                       |  8 ++
 .../src/classifier/golden-routing.test.ts     | 42 ++++++++
 .../core/src/classifier/overrides.test.ts     | 97 +++++++++++++++++++
 packages/core/src/classifier/overrides.ts     | 70 ++++++++++++-
 .../src/config/classifier-schema.test.ts      | 12 +++
 .../shared/src/config/classifier-schema.ts    |  8 ++
 7 files changed, 245 insertions(+), 2 deletions(-)

diff --git a/config/classifier.yaml b/config/classifier.yaml
index d322c45c..17675718 100644
--- a/config/classifier.yaml
+++ b/config/classifier.yaml
@@ -184,6 +184,16 @@ classifier:
       low_cost_automation:
         intent_markers: ["[cron:", "chief-monitor", "architect-monitor", "MONITOR.md"]
         no_reply_markers: ["NO_REPLY", "nothing to action", "无事不回复", "没事不回复", "无需回复"]
+      # Cheap-model low-risk keeps short current-turn read/check/status requests
+      # on the cheap lane when the client explicitly asked for a cheap model,
+      # even if the transcript carries a large history or many tools. It is
+      # scoped to the LAST user turn and excludes code-changing / JSON / vision
+      # requests, so explicit heavy-model or edit/debug work is not down-routed.
+      cheap_model_low_risk:
+        requested_model_markers: ["gpt-5.4-mini", "gpt-5.4-mini-*", "spark"]
+        current_turn_max_chars: 300
+        low_risk_markers: ["check", "inspect", "status", "health", "read", "list", "show", "summarize", "report", "monitor", "look at", "see if", "查看", "检查", "状态", "健康", "读取", "列出", "展示", "总结", "报告", "监控"]
+        blocked_markers: ["debug", "fix", "implement", "refactor", "modify", "edit", "patch", "write", "create", "generate", "delete", "deploy", "release", "security", "vulnerability", "exploit", "prove", "derive", "calculate", "solve", "调试", "修复", "实现", "重构", "修改", "编辑", "补丁", "编写", "创建", "生成", "删除", "部署", "发布", "安全", "漏洞", "利用", "证明", "推导", "计算", "求解"]
       formal_logic_keywords: ["⊢", "∴", "modus ponens", "first-order logic", "充分必要", "反证法", "归纳法"]
       tools_floor: standard
       long_context_token_threshold: 64000  # capability signal (NOT a task_type); bumped 50k->64k to match llm-router lanes.yaml long_context anchor
diff --git a/implementation-notes.md b/implementation-notes.md
index 05d17bd3..26acd64c 100644
--- a/implementation-notes.md
+++ b/implementation-notes.md
@@ -7,6 +7,14 @@
 
 ---
 
+## 2026-07-04 · cheap-model 短低风险当前轮不被长历史抬价（Classifier / routing，docs/03/04，原则 2/4/5）
+
+- **背景（Lukin）**：生产 `openclaw` 24h 数据显示，部分请求显式请求 `gpt-5.4-mini` / cheap alias，最后一条 user 只有约 200 字且是 read/check/status 类低风险动作，但因为完整 transcript 很大、tools 很多，被 Layer-1 长上下文/工具信号抬到 `balanced/coding`，最终使用 `gpt-5.5`。
+- **规则决策**：新增配置化 override `cheap_model_low_risk`。只有 `requested_model` 命中配置 cheap markers、最后一条 user 在 `current_turn_max_chars` 内、命中低风险 marker、且没有 blocked marker 时，才把 complexity 设为 `simple`。判断只看当前 user turn，不用历史正文。
+- **安全边界**：显式 `gpt-5.5` 请求不受影响；JSON response_format、vision attachment、代码变更/调试/部署/安全/数学等 blocked marker 均不触发。真正的窗口 fit 和 provider 能力仍交给后续 capability filter / fallback。
+- **配置取舍**：默认 markers 包含 `gpt-5.4-mini` / `gpt-5.4-mini-*` / `spark`，但没有把 `gpt54` 当成 economy hint，避免把用户想要的中档模型继续压到 mini。
+- **验证计划**：新增 override 单测和 golden routing 回归，覆盖 cheap-model 短低风险长历史降到 economy、重模型请求不降级、code-changing/JSON/vision 不触发；再跑 targeted Vitest、typecheck、build。
+
 ## 2026-07-04 · 视觉上下文压缩以 observe/off 为默认接入（Request optimizer / native Anthropic，docs/04/05/07/11，原则 1/3/5/7/8）
 
 - **背景（Lukin）**：pxpipe 证明“密集文本渲成图片”在特定模型/价格结构下可能降低输入 token 成本，但该方法不是无损压缩；模型视觉路径会丢失精确字符串、数字、路径、hash、ID 等细节。
diff --git a/packages/core/src/classifier/golden-routing.test.ts b/packages/core/src/classifier/golden-routing.test.ts
index 621bca53..e03da928 100644
--- a/packages/core/src/classifier/golden-routing.test.ts
+++ b/packages/core/src/classifier/golden-routing.test.ts
@@ -649,3 +649,45 @@ describe("GOLDEN: cron monitor automation stays cheap", () => {
     expect(got.selected_lane).toBe("economy");
   });
 });
+
+// PROD regression (openclaw key, 2026-07-04): short "check/status" current
+// turns that explicitly request a cheap model can carry a huge transcript. The
+// current turn is low-risk, so long-history/tools signals must not upgrade it to
+// gpt-5.5. Explicit heavy-model requests remain untouched.
+describe("GOLDEN: cheap-model low-risk current turns stay cheap", () => {
+  it("short low-risk gpt-5.4-mini request with long history -> economy", async () => {
+    const request = req("Please check the current status and report anything notable.", {
+      requested_model: "gpt-5.4-mini",
+      messages: [
+        { role: "assistant", content: "prior repository context ".repeat(18_000) },
+        { role: "user", content: "Please check the current status and report anything notable." },
+      ],
+      tools: Array.from({ length: 37 }, (_value, index) => ({
+        type: "function",
+        function: { name: `tool_${index}` },
+      })),
+    });
+
+    const got = await decide(request);
+    expect(got.task_type).toBe("chat");
+    expect(got.complexity).toBe("simple");
+    expect(got.selected_lane).toBe("economy");
+  });
+
+  it("explicit gpt-5.5 request with the same low-risk turn is not down-routed", async () => {
+    const request = req("Please check the current status and report anything notable.", {
+      requested_model: "gpt-5.5",
+      messages: [
+        { role: "assistant", content: "prior repository context ".repeat(18_000) },
+        { role: "user", content: "Please check the current status and report anything notable." },
+      ],
+      tools: Array.from({ length: 37 }, (_value, index) => ({
+        type: "function",
+        function: { name: `tool_${index}` },
+      })),
+    });
+
+    const got = await decide(request);
+    expect(got.selected_lane).not.toBe("economy");
+  });
+});
diff --git a/packages/core/src/classifier/overrides.test.ts b/packages/core/src/classifier/overrides.test.ts
index 0ed190e5..3357290e 100644
--- a/packages/core/src/classifier/overrides.test.ts
+++ b/packages/core/src/classifier/overrides.test.ts
@@ -29,15 +29,24 @@ function makeConfig(
 function req(opts: {
   content?: string;
   messages?: { role: string; content: unknown }[];
+  requested_model?: string;
+  response_format?: Record<string, unknown> | null;
+  attachments?: unknown[] | null;
   tools?: unknown[] | null;
   max_tokens?: number | null;
 }): {
   messages: { role: string; content: unknown }[];
+  requested_model: string;
+  response_format: Record<string, unknown> | null;
+  attachments: unknown[] | null;
   tools: unknown[] | null;
   max_tokens: number | null;
 } {
   return {
     messages: opts.messages ?? [{ role: "user", content: opts.content ?? "" }],
+    requested_model: opts.requested_model ?? "auto",
+    response_format: opts.response_format ?? null,
+    attachments: opts.attachments ?? null,
     tools: opts.tools ?? null,
     max_tokens: opts.max_tokens ?? null,
   };
@@ -135,6 +144,94 @@ describe("evaluateOverrides — low-cost automation (set → simple)", () => {
   });
 });
 
+describe("evaluateOverrides — cheap model low-risk current turn (set → simple)", () => {
+  const cheapModelLowRisk = {
+    requested_model_markers: ["gpt-5.4-mini", "spark"],
+    current_turn_max_chars: 300,
+    low_risk_markers: ["check", "inspect", "status", "read"],
+    blocked_markers: ["debug", "fix", "implement", "refactor", "patch"],
+  };
+
+  it("pins a short low-risk cheap-model request to simple despite long history/tools", () => {
+    const cfg = makeConfig({
+      cheap_model_low_risk: cheapModelLowRisk,
+      long_context_token_threshold: 1_000,
+    });
+    const hits = evaluateOverrides(
+      req({
+        requested_model: "gpt-5.4-mini",
+        messages: [
+          { role: "assistant", content: "prior implementation details ".repeat(10_000) },
+          { role: "user", content: "Please check the current status and report anything notable." },
+        ],
+        tools: [{ name: "shell_exec" }],
+      }),
+      cfg,
+      70_000,
+    );
+    expect(hits).toContainEqual({
+      rule: "cheap_model_low_risk",
+      kind: "set",
+      complexity: "simple",
+    });
+    expect(hits).toContainEqual({ rule: "tools_floor", kind: "floor", complexity: "standard" });
+    expect(hits).toContainEqual({ rule: "long_context", kind: "floor", complexity: "complex" });
+    expect(applyOverrides("complex", hits)).toBe("simple");
+  });
+
+  it("does not fire for explicit heavy-model requests", () => {
+    const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
+    const hits = evaluateOverrides(
+      req({
+        requested_model: "gpt-5.5",
+        content: "Please check the current status and report anything notable.",
+      }),
+      cfg,
+      20,
+    );
+    expect(hits.find((h) => h.rule === "cheap_model_low_risk")).toBeUndefined();
+  });
+
+  it("does not fire for code-changing current turns", () => {
+    const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
+    const hits = evaluateOverrides(
+      req({
+        requested_model: "gpt-5.4-mini",
+        content: "Please inspect this bug, fix the function, and patch the tests.",
+      }),
+      cfg,
+      20,
+    );
+    expect(hits.find((h) => h.rule === "cheap_model_low_risk")).toBeUndefined();
+  });
+
+  it("does not fire when JSON or vision constraints are present", () => {
+    const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
+    expect(
+      evaluateOverrides(
+        req({
+          requested_model: "gpt-5.4-mini",
+          content: "Please check this and return structured fields.",
+          response_format: { type: "json_schema" },
+        }),
+        cfg,
+        20,
+      ).find((h) => h.rule === "cheap_model_low_risk"),
+    ).toBeUndefined();
+    expect(
+      evaluateOverrides(
+        req({
+          requested_model: "gpt-5.4-mini",
+          content: "Please inspect this screenshot status.",
+          attachments: [{ type: "image" }],
+        }),
+        cfg,
+        20,
+      ).find((h) => h.rule === "cheap_model_low_risk"),
+    ).toBeUndefined();
+  });
+});
+
 describe("evaluateOverrides — tools floor (floor → standard)", () => {
   it("raises a trivial tools request to the standard floor (edge)", () => {
     const cfg = makeConfig();
diff --git a/packages/core/src/classifier/overrides.ts b/packages/core/src/classifier/overrides.ts
index b3be495f..e6ddba66 100644
--- a/packages/core/src/classifier/overrides.ts
+++ b/packages/core/src/classifier/overrides.ts
@@ -16,7 +16,7 @@ import type { Complexity } from "./tiers.js";
 export type OverrideKind = "set" | "floor";
 
 export interface OverrideHit {
-  /** "heartbeat" | "exact_confirmation" | "low_cost_automation" | "formal_logic" | "tools_floor" | "long_context" | "short_message" */
+  /** "heartbeat" | "exact_confirmation" | "low_cost_automation" | "cheap_model_low_risk" | "formal_logic" | "tools_floor" | "long_context" | "short_message" */
   rule: string;
   kind: OverrideKind;
   complexity: Complexity;
@@ -31,7 +31,10 @@ const RANK: Record<Complexity, number> = Object.fromEntries(
   ORDER.map((tier, i) => [tier, i]),
 ) as Record<Complexity, number>;
 
-type OverrideInput = Pick<InternalRequest, "messages" | "tools" | "max_tokens">;
+type OverrideInput = Pick<
+  InternalRequest,
+  "messages" | "requested_model" | "response_format" | "attachments" | "tools" | "max_tokens"
+>;
 
 // Returns ALL matching overrides (possibly empty). The engine decides the final
 // tier via `applyOverrides`. This separation keeps detection pure and lets the
@@ -75,6 +78,15 @@ export function evaluateOverrides(
     hits.push({ rule: "low_cost_automation", kind: "set", complexity: "simple" });
   }
 
+  // Cheap-model low-risk current turn: a client explicitly requested a cheap
+  // model for a short read/check/status turn, but the transcript may carry a very
+  // large history and many tools. Scope the signal to the current user turn and
+  // require the requested model + low-risk marker so generic long-context work is
+  // not silently down-routed.
+  if (isCheapModelLowRiskTurn(req, lastUserText, cfg)) {
+    hits.push({ rule: "cheap_model_low_risk", kind: "set", complexity: "simple" });
+  }
+
   // Short-message shortcut: a tiny last user message with NO complex structural
   // signal (no code block / stack trace / over-long body) is trivially simple.
   if (isShortAndSimple(lastUserText, ov.short_message_max_chars, cfg)) {
@@ -138,6 +150,43 @@ function isShortAndSimple(text: string, maxChars: number, cfg: ClassifierRulesCo
   return true;
 }
 
+function isCheapModelLowRiskTurn(
+  req: OverrideInput,
+  text: string,
+  cfg: ClassifierRulesConfig,
+): boolean {
+  const cheap = cfg.overrides.cheap_model_low_risk;
+  if (cheap.requested_model_markers.length === 0 || cheap.low_risk_markers.length === 0) {
+    return false;
+  }
+  if (!modelMatches(req.requested_model, cheap.requested_model_markers)) return false;
+  if (isJsonResponseFormat(req.response_format)) return false;
+  if (hasImageAttachment(req.attachments)) return false;
+
+  const trimmed = text.trim();
+  if (trimmed.length === 0 || trimmed.length > cheap.current_turn_max_chars) return false;
+  if (detectCodeBlock(trimmed) > 0) return false;
+  if (detectStackTrace(trimmed) > 0) return false;
+  if (containsAny(trimmed, cheap.blocked_markers)) return false;
+  return containsAny(trimmed, cheap.low_risk_markers);
+}
+
+function modelMatches(model: string, markers: string[]): boolean {
+  const normalized = model.trim().toLowerCase();
+  if (normalized.length === 0) return false;
+  return markers.some((marker) => {
+    const m = marker.trim().toLowerCase();
+    if (m.length === 0) return false;
+    if (m.includes("*")) return globToRegExp(m).test(normalized);
+    return normalized === m;
+  });
+}
+
+function globToRegExp(glob: string): RegExp {
+  const escaped = glob.replace(/[.+?^${}()|[\]\\]/gu, "\\$&").replace(/\*/gu, ".*");
+  return new RegExp(`^${escaped}$`, "u");
+}
+
 // The short-message disqualifier must see BOTH the English signal dimensions and
 // their international (*_intl_kw) counterparts — otherwise a short Chinese analysis/
 // security/diagnostic prompt ("分析这个系统的根因") is wrongly force-pinned `simple`
@@ -171,6 +220,23 @@ function containsAny(text: string, keywords: string[]): boolean {
   return keywords.some((kw) => kw.length > 0 && haystack.includes(kw.toLowerCase()));
 }
 
+function isJsonResponseFormat(rf: OverrideInput["response_format"]): boolean {
+  if (!isRecord(rf)) return false;
+  const t = rf.type;
+  return typeof t === "string" && (t === "json_object" || t === "json_schema");
+}
+
+function hasImageAttachment(attachments: OverrideInput["attachments"]): boolean {
+  if (!Array.isArray(attachments) || attachments.length === 0) return false;
+  for (const att of attachments) {
+    if (!isRecord(att)) return true;
+    const t = att.type;
+    if (typeof t !== "string") return true;
+    if (t === "image" || t === "image_url" || t.startsWith("image")) return true;
+  }
+  return false;
+}
+
 function normalizeUtterance(text: string): string {
   return text
     .trim()
diff --git a/packages/shared/src/config/classifier-schema.test.ts b/packages/shared/src/config/classifier-schema.test.ts
index e1e43fd1..21cf37a3 100644
--- a/packages/shared/src/config/classifier-schema.test.ts
+++ b/packages/shared/src/config/classifier-schema.test.ts
@@ -26,6 +26,12 @@ function fullClassifier() {
           intent_markers: ["[cron:", "MONITOR.md"],
           no_reply_markers: ["NO_REPLY"],
         },
+        cheap_model_low_risk: {
+          requested_model_markers: ["gpt-5.4-mini", "spark"],
+          current_turn_max_chars: 300,
+          low_risk_markers: ["check", "status"],
+          blocked_markers: ["fix", "implement"],
+        },
         formal_logic_keywords: ["⊢"],
         tools_floor: "standard",
         long_context_token_threshold: 64000,
@@ -99,6 +105,12 @@ describe("ClassifierConfigSchema", () => {
       intent_markers: [],
       no_reply_markers: [],
     });
+    expect(parsed.overrides.cheap_model_low_risk).toEqual({
+      requested_model_markers: [],
+      current_turn_max_chars: 300,
+      low_risk_markers: [],
+      blocked_markers: [],
+    });
     expect(parsed.overrides.tools_floor).toBe("standard");
   });
 
diff --git a/packages/shared/src/config/classifier-schema.ts b/packages/shared/src/config/classifier-schema.ts
index 0d73d953..b0646a65 100644
--- a/packages/shared/src/config/classifier-schema.ts
+++ b/packages/shared/src/config/classifier-schema.ts
@@ -52,6 +52,14 @@ export const ClassifierRulesConfigSchema = z.object({
         no_reply_markers: z.array(z.string()).default([]),
       })
       .prefault({}),
+    cheap_model_low_risk: z
+      .object({
+        requested_model_markers: z.array(z.string()).default([]),
+        current_turn_max_chars: z.number().int().positive().default(300),
+        low_risk_markers: z.array(z.string()).default([]),
+        blocked_markers: z.array(z.string()).default([]),
+      })
+      .prefault({}),
     formal_logic_keywords: z.array(z.string()).default([]),
     tools_floor: TierSchema.default("standard"),
     long_context_token_threshold: z.number().int().positive().default(64_000),

From 9de122c4fa1cfb5981410eb7ae4c0d9ecd889395 Mon Sep 17 00:00:00 2001
From: Lukin <mylukin@gmail.com>
Date: Sat, 4 Jul 2026 23:05:11 +0800
Subject: [PATCH 2/3] Add cheap model aliases to low-risk routing

Co-Authored-By: Codex <noreply@openai.com>
---
 config/classifier.yaml                        |  2 +-
 implementation-notes.md                       |  2 +-
 .../src/classifier/golden-routing.test.ts     | 23 ++++++++++
 .../core/src/classifier/overrides.test.ts     | 42 ++++++++++++++++++-
 .../src/config/classifier-schema.test.ts      |  8 +++-
 5 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/config/classifier.yaml b/config/classifier.yaml
index 17675718..fb32d473 100644
--- a/config/classifier.yaml
+++ b/config/classifier.yaml
@@ -190,7 +190,7 @@ classifier:
       # scoped to the LAST user turn and excludes code-changing / JSON / vision
       # requests, so explicit heavy-model or edit/debug work is not down-routed.
       cheap_model_low_risk:
-        requested_model_markers: ["gpt-5.4-mini", "gpt-5.4-mini-*", "spark"]
+        requested_model_markers: ["economy", "gpt-5.4-mini", "gpt-5.4-mini-*", "spark", "deepseek-v4-flash", "deepseek/deepseek-v4-flash", "openrouter/deepseek-v4-flash", "claude-haiku", "claude-haiku-*", "claude-3-5-haiku", "claude-3-5-haiku-*", "anthropic/claude-haiku-*", "zenmux-anthropic/claude-haiku-*"]
         current_turn_max_chars: 300
         low_risk_markers: ["check", "inspect", "status", "health", "read", "list", "show", "summarize", "report", "monitor", "look at", "see if", "查看", "检查", "状态", "健康", "读取", "列出", "展示", "总结", "报告", "监控"]
         blocked_markers: ["debug", "fix", "implement", "refactor", "modify", "edit", "patch", "write", "create", "generate", "delete", "deploy", "release", "security", "vulnerability", "exploit", "prove", "derive", "calculate", "solve", "调试", "修复", "实现", "重构", "修改", "编辑", "补丁", "编写", "创建", "生成", "删除", "部署", "发布", "安全", "漏洞", "利用", "证明", "推导", "计算", "求解"]
diff --git a/implementation-notes.md b/implementation-notes.md
index 26acd64c..293abbe2 100644
--- a/implementation-notes.md
+++ b/implementation-notes.md
@@ -12,7 +12,7 @@
 - **背景（Lukin）**：生产 `openclaw` 24h 数据显示，部分请求显式请求 `gpt-5.4-mini` / cheap alias，最后一条 user 只有约 200 字且是 read/check/status 类低风险动作，但因为完整 transcript 很大、tools 很多，被 Layer-1 长上下文/工具信号抬到 `balanced/coding`，最终使用 `gpt-5.5`。
 - **规则决策**：新增配置化 override `cheap_model_low_risk`。只有 `requested_model` 命中配置 cheap markers、最后一条 user 在 `current_turn_max_chars` 内、命中低风险 marker、且没有 blocked marker 时，才把 complexity 设为 `simple`。判断只看当前 user turn，不用历史正文。
 - **安全边界**：显式 `gpt-5.5` 请求不受影响；JSON response_format、vision attachment、代码变更/调试/部署/安全/数学等 blocked marker 均不触发。真正的窗口 fit 和 provider 能力仍交给后续 capability filter / fallback。
-- **配置取舍**：默认 markers 包含 `gpt-5.4-mini` / `gpt-5.4-mini-*` / `spark`，但没有把 `gpt54` 当成 economy hint，避免把用户想要的中档模型继续压到 mini。
+- **配置取舍**：默认 markers 包含 `economy`、`gpt-5.4-mini` / `gpt-5.4-mini-*` / `spark`、`deepseek-v4-flash`、`claude-haiku` 及其主要 provider/dating 形态，但没有把 `gpt54` 当成 economy hint，避免把用户想要的中档模型继续压到 mini。
 - **验证计划**：新增 override 单测和 golden routing 回归，覆盖 cheap-model 短低风险长历史降到 economy、重模型请求不降级、code-changing/JSON/vision 不触发；再跑 targeted Vitest、typecheck、build。
 
 ## 2026-07-04 · 视觉上下文压缩以 observe/off 为默认接入（Request optimizer / native Anthropic，docs/04/05/07/11，原则 1/3/5/7/8）
diff --git a/packages/core/src/classifier/golden-routing.test.ts b/packages/core/src/classifier/golden-routing.test.ts
index e03da928..1a8d38fc 100644
--- a/packages/core/src/classifier/golden-routing.test.ts
+++ b/packages/core/src/classifier/golden-routing.test.ts
@@ -674,6 +674,29 @@ describe("GOLDEN: cheap-model low-risk current turns stay cheap", () => {
     expect(got.selected_lane).toBe("economy");
   });
 
+  it.each([
+    "economy",
+    "deepseek-v4-flash",
+    "claude-haiku",
+  ])("short low-risk %s request with long history -> economy", async (requestedModel) => {
+    const request = req("Please check the current status and report anything notable.", {
+      requested_model: requestedModel,
+      messages: [
+        { role: "assistant", content: "prior repository context ".repeat(18_000) },
+        { role: "user", content: "Please check the current status and report anything notable." },
+      ],
+      tools: Array.from({ length: 37 }, (_value, index) => ({
+        type: "function",
+        function: { name: `tool_${index}` },
+      })),
+    });
+
+    const got = await decide(request);
+    expect(got.task_type).toBe("chat");
+    expect(got.complexity).toBe("simple");
+    expect(got.selected_lane).toBe("economy");
+  });
+
   it("explicit gpt-5.5 request with the same low-risk turn is not down-routed", async () => {
     const request = req("Please check the current status and report anything notable.", {
       requested_model: "gpt-5.5",
diff --git a/packages/core/src/classifier/overrides.test.ts b/packages/core/src/classifier/overrides.test.ts
index 3357290e..bf35d898 100644
--- a/packages/core/src/classifier/overrides.test.ts
+++ b/packages/core/src/classifier/overrides.test.ts
@@ -146,7 +146,20 @@ describe("evaluateOverrides — low-cost automation (set → simple)", () => {
 
 describe("evaluateOverrides — cheap model low-risk current turn (set → simple)", () => {
   const cheapModelLowRisk = {
-    requested_model_markers: ["gpt-5.4-mini", "spark"],
+    requested_model_markers: [
+      "economy",
+      "gpt-5.4-mini",
+      "spark",
+      "deepseek-v4-flash",
+      "deepseek/deepseek-v4-flash",
+      "openrouter/deepseek-v4-flash",
+      "claude-haiku",
+      "claude-haiku-*",
+      "claude-3-5-haiku",
+      "claude-3-5-haiku-*",
+      "anthropic/claude-haiku-*",
+      "zenmux-anthropic/claude-haiku-*",
+    ],
     current_turn_max_chars: 300,
     low_risk_markers: ["check", "inspect", "status", "read"],
     blocked_markers: ["debug", "fix", "implement", "refactor", "patch"],
@@ -179,6 +192,33 @@ describe("evaluateOverrides — cheap model low-risk current turn (set → simpl
     expect(applyOverrides("complex", hits)).toBe("simple");
   });
 
+  it.each([
+    "economy",
+    "deepseek-v4-flash",
+    "deepseek/deepseek-v4-flash",
+    "openrouter/deepseek-v4-flash",
+    "claude-haiku",
+    "claude-haiku-4-5-20251001",
+    "claude-3-5-haiku-20241022",
+    "anthropic/claude-haiku-4-5-20251001",
+    "zenmux-anthropic/claude-haiku-4.5",
+  ])("treats %s as a cheap-model hint", (requestedModel) => {
+    const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
+    const hits = evaluateOverrides(
+      req({
+        requested_model: requestedModel,
+        content: "Please check the current status and report anything notable.",
+      }),
+      cfg,
+      20,
+    );
+    expect(hits).toContainEqual({
+      rule: "cheap_model_low_risk",
+      kind: "set",
+      complexity: "simple",
+    });
+  });
+
   it("does not fire for explicit heavy-model requests", () => {
     const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
     const hits = evaluateOverrides(
diff --git a/packages/shared/src/config/classifier-schema.test.ts b/packages/shared/src/config/classifier-schema.test.ts
index 21cf37a3..1990f328 100644
--- a/packages/shared/src/config/classifier-schema.test.ts
+++ b/packages/shared/src/config/classifier-schema.test.ts
@@ -27,7 +27,13 @@ function fullClassifier() {
           no_reply_markers: ["NO_REPLY"],
         },
         cheap_model_low_risk: {
-          requested_model_markers: ["gpt-5.4-mini", "spark"],
+          requested_model_markers: [
+            "economy",
+            "gpt-5.4-mini",
+            "spark",
+            "deepseek-v4-flash",
+            "claude-haiku",
+          ],
           current_turn_max_chars: 300,
           low_risk_markers: ["check", "status"],
           blocked_markers: ["fix", "implement"],

From 0eb5a1b2e0d29fc41839b29d9cf041ea3338e627 Mon Sep 17 00:00:00 2001
From: Lukin <mylukin@gmail.com>
Date: Sat, 4 Jul 2026 23:15:14 +0800
Subject: [PATCH 3/3] Simplify cheap model marker globs

Co-Authored-By: Codex <noreply@openai.com>
---
 config/classifier.yaml                               |  2 +-
 implementation-notes.md                              |  2 +-
 packages/core/src/classifier/overrides.test.ts       | 10 +++-------
 packages/shared/src/config/classifier-schema.test.ts |  3 ++-
 4 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/config/classifier.yaml b/config/classifier.yaml
index fb32d473..0be9287a 100644
--- a/config/classifier.yaml
+++ b/config/classifier.yaml
@@ -190,7 +190,7 @@ classifier:
       # scoped to the LAST user turn and excludes code-changing / JSON / vision
       # requests, so explicit heavy-model or edit/debug work is not down-routed.
       cheap_model_low_risk:
-        requested_model_markers: ["economy", "gpt-5.4-mini", "gpt-5.4-mini-*", "spark", "deepseek-v4-flash", "deepseek/deepseek-v4-flash", "openrouter/deepseek-v4-flash", "claude-haiku", "claude-haiku-*", "claude-3-5-haiku", "claude-3-5-haiku-*", "anthropic/claude-haiku-*", "zenmux-anthropic/claude-haiku-*"]
+        requested_model_markers: ["economy", "gpt-5.4-mini", "gpt-5.4-mini-*", "spark", "*deepseek-v4-flash", "claude-haiku", "*claude-haiku-*", "claude-3-5-haiku", "*claude-3-5-haiku-*"]
         current_turn_max_chars: 300
         low_risk_markers: ["check", "inspect", "status", "health", "read", "list", "show", "summarize", "report", "monitor", "look at", "see if", "查看", "检查", "状态", "健康", "读取", "列出", "展示", "总结", "报告", "监控"]
         blocked_markers: ["debug", "fix", "implement", "refactor", "modify", "edit", "patch", "write", "create", "generate", "delete", "deploy", "release", "security", "vulnerability", "exploit", "prove", "derive", "calculate", "solve", "调试", "修复", "实现", "重构", "修改", "编辑", "补丁", "编写", "创建", "生成", "删除", "部署", "发布", "安全", "漏洞", "利用", "证明", "推导", "计算", "求解"]
diff --git a/implementation-notes.md b/implementation-notes.md
index 293abbe2..2473f198 100644
--- a/implementation-notes.md
+++ b/implementation-notes.md
@@ -12,7 +12,7 @@
 - **背景（Lukin）**：生产 `openclaw` 24h 数据显示，部分请求显式请求 `gpt-5.4-mini` / cheap alias，最后一条 user 只有约 200 字且是 read/check/status 类低风险动作，但因为完整 transcript 很大、tools 很多，被 Layer-1 长上下文/工具信号抬到 `balanced/coding`，最终使用 `gpt-5.5`。
 - **规则决策**：新增配置化 override `cheap_model_low_risk`。只有 `requested_model` 命中配置 cheap markers、最后一条 user 在 `current_turn_max_chars` 内、命中低风险 marker、且没有 blocked marker 时，才把 complexity 设为 `simple`。判断只看当前 user turn，不用历史正文。
 - **安全边界**：显式 `gpt-5.5` 请求不受影响；JSON response_format、vision attachment、代码变更/调试/部署/安全/数学等 blocked marker 均不触发。真正的窗口 fit 和 provider 能力仍交给后续 capability filter / fallback。
-- **配置取舍**：默认 markers 包含 `economy`、`gpt-5.4-mini` / `gpt-5.4-mini-*` / `spark`、`deepseek-v4-flash`、`claude-haiku` 及其主要 provider/dating 形态，但没有把 `gpt54` 当成 economy hint，避免把用户想要的中档模型继续压到 mini。
+- **配置取舍**：默认 markers 包含 `economy`、`gpt-5.4-mini` / `gpt-5.4-mini-*` / `spark`，并用 `*deepseek-v4-flash` / `*claude-haiku-*` / `*claude-3-5-haiku-*` 覆盖 bare、provider-prefixed 与 dated 形态；但没有把 `gpt54` 当成 economy hint，避免把用户想要的中档模型继续压到 mini。
 - **验证计划**：新增 override 单测和 golden routing 回归，覆盖 cheap-model 短低风险长历史降到 economy、重模型请求不降级、code-changing/JSON/vision 不触发；再跑 targeted Vitest、typecheck、build。
 
 ## 2026-07-04 · 视觉上下文压缩以 observe/off 为默认接入（Request optimizer / native Anthropic，docs/04/05/07/11，原则 1/3/5/7/8）
diff --git a/packages/core/src/classifier/overrides.test.ts b/packages/core/src/classifier/overrides.test.ts
index bf35d898..66eb3554 100644
--- a/packages/core/src/classifier/overrides.test.ts
+++ b/packages/core/src/classifier/overrides.test.ts
@@ -150,15 +150,11 @@ describe("evaluateOverrides — cheap model low-risk current turn (set → simpl
       "economy",
       "gpt-5.4-mini",
       "spark",
-      "deepseek-v4-flash",
-      "deepseek/deepseek-v4-flash",
-      "openrouter/deepseek-v4-flash",
+      "*deepseek-v4-flash",
       "claude-haiku",
-      "claude-haiku-*",
+      "*claude-haiku-*",
       "claude-3-5-haiku",
-      "claude-3-5-haiku-*",
-      "anthropic/claude-haiku-*",
-      "zenmux-anthropic/claude-haiku-*",
+      "*claude-3-5-haiku-*",
     ],
     current_turn_max_chars: 300,
     low_risk_markers: ["check", "inspect", "status", "read"],
diff --git a/packages/shared/src/config/classifier-schema.test.ts b/packages/shared/src/config/classifier-schema.test.ts
index 1990f328..6d564163 100644
--- a/packages/shared/src/config/classifier-schema.test.ts
+++ b/packages/shared/src/config/classifier-schema.test.ts
@@ -31,8 +31,9 @@ function fullClassifier() {
             "economy",
             "gpt-5.4-mini",
             "spark",
-            "deepseek-v4-flash",
+            "*deepseek-v4-flash",
             "claude-haiku",
+            "*claude-haiku-*",
           ],
           current_turn_max_chars: 300,
           low_risk_markers: ["check", "status"],