EasyMetaAu · mylukin · Jul 4, 2026 · Jul 4, 2026 · Jul 4, 2026 · Jul 4, 2026
diff --git a/config/classifier.yaml b/config/classifier.yaml
@@ -184,6 +184,16 @@ classifier:
       low_cost_automation:
         intent_markers: ["[cron:", "chief-monitor", "architect-monitor", "MONITOR.md"]
         no_reply_markers: ["NO_REPLY", "nothing to action", "无事不回复", "没事不回复", "无需回复"]
+      # Cheap-model low-risk keeps short current-turn read/check/status requests
+      # on the cheap lane when the client explicitly asked for a cheap model,
+      # even if the transcript carries a large history or many tools. It is
+      # scoped to the LAST user turn and excludes code-changing / JSON / vision
+      # requests, so explicit heavy-model or edit/debug work is not down-routed.
+      cheap_model_low_risk:
+        requested_model_markers: ["economy", "gpt-5.4-mini", "gpt-5.4-mini-*", "spark", "*deepseek-v4-flash", "claude-haiku", "*claude-haiku-*", "claude-3-5-haiku", "*claude-3-5-haiku-*"]
+        current_turn_max_chars: 300
+        low_risk_markers: ["check", "inspect", "status", "health", "read", "list", "show", "summarize", "report", "monitor", "look at", "see if", "查看", "检查", "状态", "健康", "读取", "列出", "展示", "总结", "报告", "监控"]
+        blocked_markers: ["debug", "fix", "implement", "refactor", "modify", "edit", "patch", "write", "create", "generate", "delete", "deploy", "release", "security", "vulnerability", "exploit", "prove", "derive", "calculate", "solve", "调试", "修复", "实现", "重构", "修改", "编辑", "补丁", "编写", "创建", "生成", "删除", "部署", "发布", "安全", "漏洞", "利用", "证明", "推导", "计算", "求解"]
       formal_logic_keywords: ["⊢", "∴", "modus ponens", "first-order logic", "充分必要", "反证法", "归纳法"]
       tools_floor: standard
       long_context_token_threshold: 64000  # capability signal (NOT a task_type); bumped 50k->64k to match llm-router lanes.yaml long_context anchor

diff --git a/implementation-notes.md b/implementation-notes.md
@@ -7,6 +7,14 @@
 
 ---
 
+## 2026-07-04 · cheap-model 短低风险当前轮不被长历史抬价（Classifier / routing，docs/03/04，原则 2/4/5）
+
+- **背景（Lukin）**：生产 `openclaw` 24h 数据显示，部分请求显式请求 `gpt-5.4-mini` / cheap alias，最后一条 user 只有约 200 字且是 read/check/status 类低风险动作，但因为完整 transcript 很大、tools 很多，被 Layer-1 长上下文/工具信号抬到 `balanced/coding`，最终使用 `gpt-5.5`。
+- **规则决策**：新增配置化 override `cheap_model_low_risk`。只有 `requested_model` 命中配置 cheap markers、最后一条 user 在 `current_turn_max_chars` 内、命中低风险 marker、且没有 blocked marker 时，才把 complexity 设为 `simple`。判断只看当前 user turn，不用历史正文。
+- **安全边界**：显式 `gpt-5.5` 请求不受影响；JSON response_format、vision attachment、代码变更/调试/部署/安全/数学等 blocked marker 均不触发。真正的窗口 fit 和 provider 能力仍交给后续 capability filter / fallback。
+- **配置取舍**：默认 markers 包含 `economy`、`gpt-5.4-mini` / `gpt-5.4-mini-*` / `spark`，并用 `*deepseek-v4-flash` / `*claude-haiku-*` / `*claude-3-5-haiku-*` 覆盖 bare、provider-prefixed 与 dated 形态；但没有把 `gpt54` 当成 economy hint，避免把用户想要的中档模型继续压到 mini。
+- **验证计划**：新增 override 单测和 golden routing 回归，覆盖 cheap-model 短低风险长历史降到 economy、重模型请求不降级、code-changing/JSON/vision 不触发；再跑 targeted Vitest、typecheck、build。
+
 ## 2026-07-04 · 视觉上下文压缩以 observe/off 为默认接入（Request optimizer / native Anthropic，docs/04/05/07/11，原则 1/3/5/7/8）
 
 - **背景（Lukin）**：pxpipe 证明“密集文本渲成图片”在特定模型/价格结构下可能降低输入 token 成本，但该方法不是无损压缩；模型视觉路径会丢失精确字符串、数字、路径、hash、ID 等细节。

diff --git a/packages/core/src/classifier/golden-routing.test.ts b/packages/core/src/classifier/golden-routing.test.ts
@@ -649,3 +649,68 @@ describe("GOLDEN: cron monitor automation stays cheap", () => {
     expect(got.selected_lane).toBe("economy");
   });
 });
+
+// PROD regression (openclaw key, 2026-07-04): short "check/status" current
+// turns that explicitly request a cheap model can carry a huge transcript. The
+// current turn is low-risk, so long-history/tools signals must not upgrade it to
+// gpt-5.5. Explicit heavy-model requests remain untouched.
+describe("GOLDEN: cheap-model low-risk current turns stay cheap", () => {
+  it("short low-risk gpt-5.4-mini request with long history -> economy", async () => {
+    const request = req("Please check the current status and report anything notable.", {
+      requested_model: "gpt-5.4-mini",
+      messages: [
+        { role: "assistant", content: "prior repository context ".repeat(18_000) },
+        { role: "user", content: "Please check the current status and report anything notable." },
+      ],
+      tools: Array.from({ length: 37 }, (_value, index) => ({
+        type: "function",
+        function: { name: `tool_${index}` },
+      })),
+    });
+
+    const got = await decide(request);
+    expect(got.task_type).toBe("chat");
+    expect(got.complexity).toBe("simple");
+    expect(got.selected_lane).toBe("economy");
+  });
+
+  it.each([
+    "economy",
+    "deepseek-v4-flash",
+    "claude-haiku",
+  ])("short low-risk %s request with long history -> economy", async (requestedModel) => {
+    const request = req("Please check the current status and report anything notable.", {
+      requested_model: requestedModel,
+      messages: [
+        { role: "assistant", content: "prior repository context ".repeat(18_000) },
+        { role: "user", content: "Please check the current status and report anything notable." },
+      ],
+      tools: Array.from({ length: 37 }, (_value, index) => ({
+        type: "function",
+        function: { name: `tool_${index}` },
+      })),
+    });
+
+    const got = await decide(request);
+    expect(got.task_type).toBe("chat");
+    expect(got.complexity).toBe("simple");
+    expect(got.selected_lane).toBe("economy");
+  });
+
+  it("explicit gpt-5.5 request with the same low-risk turn is not down-routed", async () => {
+    const request = req("Please check the current status and report anything notable.", {
+      requested_model: "gpt-5.5",
+      messages: [
+        { role: "assistant", content: "prior repository context ".repeat(18_000) },
+        { role: "user", content: "Please check the current status and report anything notable." },
+      ],
+      tools: Array.from({ length: 37 }, (_value, index) => ({
+        type: "function",
+        function: { name: `tool_${index}` },
+      })),
+    });
+
+    const got = await decide(request);
+    expect(got.selected_lane).not.toBe("economy");
+  });
+});
diff --git a/packages/core/src/classifier/overrides.test.ts b/packages/core/src/classifier/overrides.test.ts
@@ -29,15 +29,24 @@ function makeConfig(
 function req(opts: {
   content?: string;
   messages?: { role: string; content: unknown }[];
+  requested_model?: string;
+  response_format?: Record<string, unknown> | null;
+  attachments?: unknown[] | null;
   tools?: unknown[] | null;
   max_tokens?: number | null;
 }): {
   messages: { role: string; content: unknown }[];
+  requested_model: string;
+  response_format: Record<string, unknown> | null;
+  attachments: unknown[] | null;
   tools: unknown[] | null;
   max_tokens: number | null;
 } {
   return {
     messages: opts.messages ?? [{ role: "user", content: opts.content ?? "" }],
+    requested_model: opts.requested_model ?? "auto",
+    response_format: opts.response_format ?? null,
+    attachments: opts.attachments ?? null,
     tools: opts.tools ?? null,
     max_tokens: opts.max_tokens ?? null,
   };
@@ -135,6 +144,130 @@ describe("evaluateOverrides — low-cost automation (set → simple)", () => {
   });
 });
 
+describe("evaluateOverrides — cheap model low-risk current turn (set → simple)", () => {
+  const cheapModelLowRisk = {
+    requested_model_markers: [
+      "economy",
+      "gpt-5.4-mini",
+      "spark",
+      "*deepseek-v4-flash",
+      "claude-haiku",
+      "*claude-haiku-*",
+      "claude-3-5-haiku",
+      "*claude-3-5-haiku-*",
+    ],
+    current_turn_max_chars: 300,
+    low_risk_markers: ["check", "inspect", "status", "read"],
+    blocked_markers: ["debug", "fix", "implement", "refactor", "patch"],
+  };
+
+  it("pins a short low-risk cheap-model request to simple despite long history/tools", () => {
+    const cfg = makeConfig({
+      cheap_model_low_risk: cheapModelLowRisk,
+      long_context_token_threshold: 1_000,
+    });
+    const hits = evaluateOverrides(
+      req({
+        requested_model: "gpt-5.4-mini",
+        messages: [
+          { role: "assistant", content: "prior implementation details ".repeat(10_000) },
+          { role: "user", content: "Please check the current status and report anything notable." },
+        ],
+        tools: [{ name: "shell_exec" }],
+      }),
+      cfg,
+      70_000,
+    );
+    expect(hits).toContainEqual({
+      rule: "cheap_model_low_risk",
+      kind: "set",
+      complexity: "simple",
+    });
+    expect(hits).toContainEqual({ rule: "tools_floor", kind: "floor", complexity: "standard" });
+    expect(hits).toContainEqual({ rule: "long_context", kind: "floor", complexity: "complex" });
+    expect(applyOverrides("complex", hits)).toBe("simple");
+  });
+
+  it.each([
+    "economy",
+    "deepseek-v4-flash",
+    "deepseek/deepseek-v4-flash",
+    "openrouter/deepseek-v4-flash",
+    "claude-haiku",
+    "claude-haiku-4-5-20251001",
+    "claude-3-5-haiku-20241022",
+    "anthropic/claude-haiku-4-5-20251001",
+    "zenmux-anthropic/claude-haiku-4.5",
+  ])("treats %s as a cheap-model hint", (requestedModel) => {
+    const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
+    const hits = evaluateOverrides(
+      req({
+        requested_model: requestedModel,
+        content: "Please check the current status and report anything notable.",
+      }),
+      cfg,
+      20,
+    );
+    expect(hits).toContainEqual({
+      rule: "cheap_model_low_risk",
+      kind: "set",
+      complexity: "simple",
+    });
+  });
+
+  it("does not fire for explicit heavy-model requests", () => {
+    const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
+    const hits = evaluateOverrides(
+      req({
+        requested_model: "gpt-5.5",
+        content: "Please check the current status and report anything notable.",
+      }),
+      cfg,
+      20,
+    );
+    expect(hits.find((h) => h.rule === "cheap_model_low_risk")).toBeUndefined();
+  });
+
+  it("does not fire for code-changing current turns", () => {
+    const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
+    const hits = evaluateOverrides(
+      req({
+        requested_model: "gpt-5.4-mini",
+        content: "Please inspect this bug, fix the function, and patch the tests.",
+      }),
+      cfg,
+      20,
+    );
+    expect(hits.find((h) => h.rule === "cheap_model_low_risk")).toBeUndefined();
+  });
+
+  it("does not fire when JSON or vision constraints are present", () => {
+    const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
+    expect(
+      evaluateOverrides(
+        req({
+          requested_model: "gpt-5.4-mini",
+          content: "Please check this and return structured fields.",
+          response_format: { type: "json_schema" },
+        }),
+        cfg,
+        20,
+      ).find((h) => h.rule === "cheap_model_low_risk"),
+    ).toBeUndefined();
+    expect(
+      evaluateOverrides(
+        req({
+          requested_model: "gpt-5.4-mini",
+          content: "Please inspect this screenshot status.",
+          attachments: [{ type: "image" }],
+        }),
+        cfg,
+        20,
+      ).find((h) => h.rule === "cheap_model_low_risk"),
+    ).toBeUndefined();
+  });
+});
+
 describe("evaluateOverrides — tools floor (floor → standard)", () => {
   it("raises a trivial tools request to the standard floor (edge)", () => {
     const cfg = makeConfig();

diff --git a/packages/core/src/classifier/overrides.ts b/packages/core/src/classifier/overrides.ts
@@ -16,7 +16,7 @@ import type { Complexity } from "./tiers.js";
 export type OverrideKind = "set" | "floor";
 
 export interface OverrideHit {
-  /** "heartbeat" | "exact_confirmation" | "low_cost_automation" | "formal_logic" | "tools_floor" | "long_context" | "short_message" */
+  /** "heartbeat" | "exact_confirmation" | "low_cost_automation" | "cheap_model_low_risk" | "formal_logic" | "tools_floor" | "long_context" | "short_message" */
   rule: string;
   kind: OverrideKind;
   complexity: Complexity;
@@ -31,7 +31,10 @@ const RANK: Record<Complexity, number> = Object.fromEntries(
   ORDER.map((tier, i) => [tier, i]),
 ) as Record<Complexity, number>;
 
-type OverrideInput = Pick<InternalRequest, "messages" | "tools" | "max_tokens">;
+type OverrideInput = Pick<
+  InternalRequest,
+  "messages" | "requested_model" | "response_format" | "attachments" | "tools" | "max_tokens"
+>;
 
 // Returns ALL matching overrides (possibly empty). The engine decides the final
 // tier via `applyOverrides`. This separation keeps detection pure and lets the
@@ -75,6 +78,15 @@ export function evaluateOverrides(
     hits.push({ rule: "low_cost_automation", kind: "set", complexity: "simple" });
   }
 
+  // Cheap-model low-risk current turn: a client explicitly requested a cheap
+  // model for a short read/check/status turn, but the transcript may carry a very
+  // large history and many tools. Scope the signal to the current user turn and
+  // require the requested model + low-risk marker so generic long-context work is
+  // not silently down-routed.
+  if (isCheapModelLowRiskTurn(req, lastUserText, cfg)) {
+    hits.push({ rule: "cheap_model_low_risk", kind: "set", complexity: "simple" });
+  }
+
   // Short-message shortcut: a tiny last user message with NO complex structural
   // signal (no code block / stack trace / over-long body) is trivially simple.
   if (isShortAndSimple(lastUserText, ov.short_message_max_chars, cfg)) {
@@ -138,6 +150,43 @@ function isShortAndSimple(text: string, maxChars: number, cfg: ClassifierRulesCo
   return true;
 }
 
+function isCheapModelLowRiskTurn(
+  req: OverrideInput,
+  text: string,
+  cfg: ClassifierRulesConfig,
+): boolean {
+  const cheap = cfg.overrides.cheap_model_low_risk;
+  if (cheap.requested_model_markers.length === 0 || cheap.low_risk_markers.length === 0) {
+    return false;
+  }
+  if (!modelMatches(req.requested_model, cheap.requested_model_markers)) return false;
+  if (isJsonResponseFormat(req.response_format)) return false;
+  if (hasImageAttachment(req.attachments)) return false;
+
+  const trimmed = text.trim();
+  if (trimmed.length === 0 || trimmed.length > cheap.current_turn_max_chars) return false;
+  if (detectCodeBlock(trimmed) > 0) return false;
+  if (detectStackTrace(trimmed) > 0) return false;
+  if (containsAny(trimmed, cheap.blocked_markers)) return false;
+  return containsAny(trimmed, cheap.low_risk_markers);
+}
+
+function modelMatches(model: string, markers: string[]): boolean {
+  const normalized = model.trim().toLowerCase();
+  if (normalized.length === 0) return false;
+  return markers.some((marker) => {
+    const m = marker.trim().toLowerCase();
+    if (m.length === 0) return false;
+    if (m.includes("*")) return globToRegExp(m).test(normalized);
+    return normalized === m;
+  });
+}
+
+function globToRegExp(glob: string): RegExp {
+  const escaped = glob.replace(/[.+?^${}()|[\]\\]/gu, "\\$&").replace(/\*/gu, ".*");
+  return new RegExp(`^${escaped}$`, "u");
+}
+
 // The short-message disqualifier must see BOTH the English signal dimensions and
 // their international (*_intl_kw) counterparts — otherwise a short Chinese analysis/
 // security/diagnostic prompt ("分析这个系统的根因") is wrongly force-pinned `simple`
@@ -171,6 +220,23 @@ function containsAny(text: string, keywords: string[]): boolean {
   return keywords.some((kw) => kw.length > 0 && haystack.includes(kw.toLowerCase()));
 }
 
+function isJsonResponseFormat(rf: OverrideInput["response_format"]): boolean {
+  if (!isRecord(rf)) return false;
+  const t = rf.type;
+  return typeof t === "string" && (t === "json_object" || t === "json_schema");
+}
+
+function hasImageAttachment(attachments: OverrideInput["attachments"]): boolean {
+  if (!Array.isArray(attachments) || attachments.length === 0) return false;
+  for (const att of attachments) {
+    if (!isRecord(att)) return true;
+    const t = att.type;
+    if (typeof t !== "string") return true;
+    if (t === "image" || t === "image_url" || t.startsWith("image")) return true;
+  }
+  return false;
+}
+
 function normalizeUtterance(text: string): string {
   return text
     .trim()

diff --git a/packages/shared/src/config/classifier-schema.test.ts b/packages/shared/src/config/classifier-schema.test.ts
@@ -26,6 +26,19 @@ function fullClassifier() {
           intent_markers: ["[cron:", "MONITOR.md"],
           no_reply_markers: ["NO_REPLY"],
         },
+        cheap_model_low_risk: {
+          requested_model_markers: [
+            "economy",
+            "gpt-5.4-mini",
+            "spark",
+            "*deepseek-v4-flash",
+            "claude-haiku",
+            "*claude-haiku-*",
+          ],
+          current_turn_max_chars: 300,
+          low_risk_markers: ["check", "status"],
+          blocked_markers: ["fix", "implement"],
+        },
         formal_logic_keywords: ["⊢"],
         tools_floor: "standard",
         long_context_token_threshold: 64000,
@@ -99,6 +112,12 @@ describe("ClassifierConfigSchema", () => {
       intent_markers: [],
       no_reply_markers: [],
     });
+    expect(parsed.overrides.cheap_model_low_risk).toEqual({
+      requested_model_markers: [],
+      current_turn_max_chars: 300,
+      low_risk_markers: [],
+      blocked_markers: [],
+    });
     expect(parsed.overrides.tools_floor).toBe("standard");
   });