Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions config/classifier.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,16 @@ classifier:
low_cost_automation:
intent_markers: ["[cron:", "chief-monitor", "architect-monitor", "MONITOR.md"]
no_reply_markers: ["NO_REPLY", "nothing to action", "无事不回复", "没事不回复", "无需回复"]
# Cheap-model low-risk keeps short current-turn read/check/status requests
# on the cheap lane when the client explicitly asked for a cheap model,
# even if the transcript carries a large history or many tools. It is
# scoped to the LAST user turn and excludes code-changing / JSON / vision
# requests, so explicit heavy-model or edit/debug work is not down-routed.
cheap_model_low_risk:
requested_model_markers: ["economy", "gpt-5.4-mini", "gpt-5.4-mini-*", "spark", "*deepseek-v4-flash", "claude-haiku", "*claude-haiku-*", "claude-3-5-haiku", "*claude-3-5-haiku-*"]
current_turn_max_chars: 300
low_risk_markers: ["check", "inspect", "status", "health", "read", "list", "show", "summarize", "report", "monitor", "look at", "see if", "查看", "检查", "状态", "健康", "读取", "列出", "展示", "总结", "报告", "监控"]
blocked_markers: ["debug", "fix", "implement", "refactor", "modify", "edit", "patch", "write", "create", "generate", "delete", "deploy", "release", "security", "vulnerability", "exploit", "prove", "derive", "calculate", "solve", "调试", "修复", "实现", "重构", "修改", "编辑", "补丁", "编写", "创建", "生成", "删除", "部署", "发布", "安全", "漏洞", "利用", "证明", "推导", "计算", "求解"]
formal_logic_keywords: ["⊢", "∴", "modus ponens", "first-order logic", "充分必要", "反证法", "归纳法"]
tools_floor: standard
long_context_token_threshold: 64000 # capability signal (NOT a task_type); bumped 50k->64k to match llm-router lanes.yaml long_context anchor
Expand Down
8 changes: 8 additions & 0 deletions implementation-notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@

---

## 2026-07-04 · cheap-model 短低风险当前轮不被长历史抬价(Classifier / routing,docs/03/04,原则 2/4/5)

- **背景(Lukin)**:生产 `openclaw` 24h 数据显示,部分请求显式请求 `gpt-5.4-mini` / cheap alias,最后一条 user 只有约 200 字且是 read/check/status 类低风险动作,但因为完整 transcript 很大、tools 很多,被 Layer-1 长上下文/工具信号抬到 `balanced/coding`,最终使用 `gpt-5.5`。
- **规则决策**:新增配置化 override `cheap_model_low_risk`。只有 `requested_model` 命中配置 cheap markers、最后一条 user 在 `current_turn_max_chars` 内、命中低风险 marker、且没有 blocked marker 时,才把 complexity 设为 `simple`。判断只看当前 user turn,不用历史正文。
- **安全边界**:显式 `gpt-5.5` 请求不受影响;JSON response_format、vision attachment、代码变更/调试/部署/安全/数学等 blocked marker 均不触发。真正的窗口 fit 和 provider 能力仍交给后续 capability filter / fallback。
- **配置取舍**:默认 markers 包含 `economy`、`gpt-5.4-mini` / `gpt-5.4-mini-*` / `spark`,并用 `*deepseek-v4-flash` / `*claude-haiku-*` / `*claude-3-5-haiku-*` 覆盖 bare、provider-prefixed 与 dated 形态;但没有把 `gpt54` 当成 economy hint,避免把用户想要的中档模型继续压到 mini。
- **验证计划**:新增 override 单测和 golden routing 回归,覆盖 cheap-model 短低风险长历史降到 economy、重模型请求不降级、code-changing/JSON/vision 不触发;再跑 targeted Vitest、typecheck、build。

## 2026-07-04 · 视觉上下文压缩以 observe/off 为默认接入(Request optimizer / native Anthropic,docs/04/05/07/11,原则 1/3/5/7/8)

- **背景(Lukin)**:pxpipe 证明“密集文本渲成图片”在特定模型/价格结构下可能降低输入 token 成本,但该方法不是无损压缩;模型视觉路径会丢失精确字符串、数字、路径、hash、ID 等细节。
Expand Down
65 changes: 65 additions & 0 deletions packages/core/src/classifier/golden-routing.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -649,3 +649,68 @@ describe("GOLDEN: cron monitor automation stays cheap", () => {
expect(got.selected_lane).toBe("economy");
});
});

// PROD regression (openclaw key, 2026-07-04): short "check/status" current
// turns that explicitly request a cheap model can carry a huge transcript. The
// current turn is low-risk, so long-history/tools signals must not upgrade it to
// gpt-5.5. Explicit heavy-model requests remain untouched.
describe("GOLDEN: cheap-model low-risk current turns stay cheap", () => {
it("short low-risk gpt-5.4-mini request with long history -> economy", async () => {
const request = req("Please check the current status and report anything notable.", {
requested_model: "gpt-5.4-mini",
messages: [
{ role: "assistant", content: "prior repository context ".repeat(18_000) },
{ role: "user", content: "Please check the current status and report anything notable." },
],
tools: Array.from({ length: 37 }, (_value, index) => ({
type: "function",
function: { name: `tool_${index}` },
})),
});

const got = await decide(request);
expect(got.task_type).toBe("chat");
expect(got.complexity).toBe("simple");
expect(got.selected_lane).toBe("economy");
});

it.each([
"economy",
"deepseek-v4-flash",
"claude-haiku",
])("short low-risk %s request with long history -> economy", async (requestedModel) => {
const request = req("Please check the current status and report anything notable.", {
requested_model: requestedModel,
messages: [
{ role: "assistant", content: "prior repository context ".repeat(18_000) },
{ role: "user", content: "Please check the current status and report anything notable." },
],
tools: Array.from({ length: 37 }, (_value, index) => ({
type: "function",
function: { name: `tool_${index}` },
})),
});

const got = await decide(request);
expect(got.task_type).toBe("chat");
expect(got.complexity).toBe("simple");
expect(got.selected_lane).toBe("economy");
});

it("explicit gpt-5.5 request with the same low-risk turn is not down-routed", async () => {
const request = req("Please check the current status and report anything notable.", {
requested_model: "gpt-5.5",
messages: [
{ role: "assistant", content: "prior repository context ".repeat(18_000) },
{ role: "user", content: "Please check the current status and report anything notable." },
],
tools: Array.from({ length: 37 }, (_value, index) => ({
type: "function",
function: { name: `tool_${index}` },
})),
});

const got = await decide(request);
expect(got.selected_lane).not.toBe("economy");
});
});
133 changes: 133 additions & 0 deletions packages/core/src/classifier/overrides.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,24 @@ function makeConfig(
function req(opts: {
content?: string;
messages?: { role: string; content: unknown }[];
requested_model?: string;
response_format?: Record<string, unknown> | null;
attachments?: unknown[] | null;
tools?: unknown[] | null;
max_tokens?: number | null;
}): {
messages: { role: string; content: unknown }[];
requested_model: string;
response_format: Record<string, unknown> | null;
attachments: unknown[] | null;
tools: unknown[] | null;
max_tokens: number | null;
} {
return {
messages: opts.messages ?? [{ role: "user", content: opts.content ?? "" }],
requested_model: opts.requested_model ?? "auto",
response_format: opts.response_format ?? null,
attachments: opts.attachments ?? null,
tools: opts.tools ?? null,
max_tokens: opts.max_tokens ?? null,
};
Expand Down Expand Up @@ -135,6 +144,130 @@ describe("evaluateOverrides — low-cost automation (set → simple)", () => {
});
});

describe("evaluateOverrides — cheap model low-risk current turn (set → simple)", () => {
const cheapModelLowRisk = {
requested_model_markers: [
"economy",
"gpt-5.4-mini",
"spark",
"*deepseek-v4-flash",
"claude-haiku",
"*claude-haiku-*",
"claude-3-5-haiku",
"*claude-3-5-haiku-*",
],
current_turn_max_chars: 300,
low_risk_markers: ["check", "inspect", "status", "read"],
blocked_markers: ["debug", "fix", "implement", "refactor", "patch"],
};

it("pins a short low-risk cheap-model request to simple despite long history/tools", () => {
const cfg = makeConfig({
cheap_model_low_risk: cheapModelLowRisk,
long_context_token_threshold: 1_000,
});
const hits = evaluateOverrides(
req({
requested_model: "gpt-5.4-mini",
messages: [
{ role: "assistant", content: "prior implementation details ".repeat(10_000) },
{ role: "user", content: "Please check the current status and report anything notable." },
],
tools: [{ name: "shell_exec" }],
}),
cfg,
70_000,
);
expect(hits).toContainEqual({
rule: "cheap_model_low_risk",
kind: "set",
complexity: "simple",
});
expect(hits).toContainEqual({ rule: "tools_floor", kind: "floor", complexity: "standard" });
expect(hits).toContainEqual({ rule: "long_context", kind: "floor", complexity: "complex" });
expect(applyOverrides("complex", hits)).toBe("simple");
});

it.each([
"economy",
"deepseek-v4-flash",
"deepseek/deepseek-v4-flash",
"openrouter/deepseek-v4-flash",
"claude-haiku",
"claude-haiku-4-5-20251001",
"claude-3-5-haiku-20241022",
"anthropic/claude-haiku-4-5-20251001",
"zenmux-anthropic/claude-haiku-4.5",
])("treats %s as a cheap-model hint", (requestedModel) => {
const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
const hits = evaluateOverrides(
req({
requested_model: requestedModel,
content: "Please check the current status and report anything notable.",
}),
cfg,
20,
);
expect(hits).toContainEqual({
rule: "cheap_model_low_risk",
kind: "set",
complexity: "simple",
});
});

it("does not fire for explicit heavy-model requests", () => {
const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
const hits = evaluateOverrides(
req({
requested_model: "gpt-5.5",
content: "Please check the current status and report anything notable.",
}),
cfg,
20,
);
expect(hits.find((h) => h.rule === "cheap_model_low_risk")).toBeUndefined();
});

it("does not fire for code-changing current turns", () => {
const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
const hits = evaluateOverrides(
req({
requested_model: "gpt-5.4-mini",
content: "Please inspect this bug, fix the function, and patch the tests.",
}),
cfg,
20,
);
expect(hits.find((h) => h.rule === "cheap_model_low_risk")).toBeUndefined();
});

it("does not fire when JSON or vision constraints are present", () => {
const cfg = makeConfig({ cheap_model_low_risk: cheapModelLowRisk });
expect(
evaluateOverrides(
req({
requested_model: "gpt-5.4-mini",
content: "Please check this and return structured fields.",
response_format: { type: "json_schema" },
}),
cfg,
20,
).find((h) => h.rule === "cheap_model_low_risk"),
).toBeUndefined();
expect(
evaluateOverrides(
req({
requested_model: "gpt-5.4-mini",
content: "Please inspect this screenshot status.",
attachments: [{ type: "image" }],
}),
cfg,
20,
).find((h) => h.rule === "cheap_model_low_risk"),
).toBeUndefined();
});
});

describe("evaluateOverrides — tools floor (floor → standard)", () => {
it("raises a trivial tools request to the standard floor (edge)", () => {
const cfg = makeConfig();
Expand Down
70 changes: 68 additions & 2 deletions packages/core/src/classifier/overrides.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import type { Complexity } from "./tiers.js";
export type OverrideKind = "set" | "floor";

export interface OverrideHit {
/** "heartbeat" | "exact_confirmation" | "low_cost_automation" | "formal_logic" | "tools_floor" | "long_context" | "short_message" */
/** "heartbeat" | "exact_confirmation" | "low_cost_automation" | "cheap_model_low_risk" | "formal_logic" | "tools_floor" | "long_context" | "short_message" */
rule: string;
kind: OverrideKind;
complexity: Complexity;
Expand All @@ -31,7 +31,10 @@ const RANK: Record<Complexity, number> = Object.fromEntries(
ORDER.map((tier, i) => [tier, i]),
) as Record<Complexity, number>;

type OverrideInput = Pick<InternalRequest, "messages" | "tools" | "max_tokens">;
type OverrideInput = Pick<
InternalRequest,
"messages" | "requested_model" | "response_format" | "attachments" | "tools" | "max_tokens"
>;

// Returns ALL matching overrides (possibly empty). The engine decides the final
// tier via `applyOverrides`. This separation keeps detection pure and lets the
Expand Down Expand Up @@ -75,6 +78,15 @@ export function evaluateOverrides(
hits.push({ rule: "low_cost_automation", kind: "set", complexity: "simple" });
}

// Cheap-model low-risk current turn: a client explicitly requested a cheap
// model for a short read/check/status turn, but the transcript may carry a very
// large history and many tools. Scope the signal to the current user turn and
// require the requested model + low-risk marker so generic long-context work is
// not silently down-routed.
if (isCheapModelLowRiskTurn(req, lastUserText, cfg)) {
hits.push({ rule: "cheap_model_low_risk", kind: "set", complexity: "simple" });
}

// Short-message shortcut: a tiny last user message with NO complex structural
// signal (no code block / stack trace / over-long body) is trivially simple.
if (isShortAndSimple(lastUserText, ov.short_message_max_chars, cfg)) {
Expand Down Expand Up @@ -138,6 +150,43 @@ function isShortAndSimple(text: string, maxChars: number, cfg: ClassifierRulesCo
return true;
}

function isCheapModelLowRiskTurn(
req: OverrideInput,
text: string,
cfg: ClassifierRulesConfig,
): boolean {
const cheap = cfg.overrides.cheap_model_low_risk;
if (cheap.requested_model_markers.length === 0 || cheap.low_risk_markers.length === 0) {
return false;
}
if (!modelMatches(req.requested_model, cheap.requested_model_markers)) return false;
if (isJsonResponseFormat(req.response_format)) return false;
if (hasImageAttachment(req.attachments)) return false;

const trimmed = text.trim();
if (trimmed.length === 0 || trimmed.length > cheap.current_turn_max_chars) return false;
if (detectCodeBlock(trimmed) > 0) return false;
if (detectStackTrace(trimmed) > 0) return false;
if (containsAny(trimmed, cheap.blocked_markers)) return false;
return containsAny(trimmed, cheap.low_risk_markers);
}

function modelMatches(model: string, markers: string[]): boolean {
const normalized = model.trim().toLowerCase();
if (normalized.length === 0) return false;
return markers.some((marker) => {
const m = marker.trim().toLowerCase();
if (m.length === 0) return false;
if (m.includes("*")) return globToRegExp(m).test(normalized);
return normalized === m;
});
}

function globToRegExp(glob: string): RegExp {
const escaped = glob.replace(/[.+?^${}()|[\]\\]/gu, "\\$&").replace(/\*/gu, ".*");
return new RegExp(`^${escaped}$`, "u");
}

// The short-message disqualifier must see BOTH the English signal dimensions and
// their international (*_intl_kw) counterparts — otherwise a short Chinese analysis/
// security/diagnostic prompt ("分析这个系统的根因") is wrongly force-pinned `simple`
Expand Down Expand Up @@ -171,6 +220,23 @@ function containsAny(text: string, keywords: string[]): boolean {
return keywords.some((kw) => kw.length > 0 && haystack.includes(kw.toLowerCase()));
}

function isJsonResponseFormat(rf: OverrideInput["response_format"]): boolean {
if (!isRecord(rf)) return false;
const t = rf.type;
return typeof t === "string" && (t === "json_object" || t === "json_schema");
}

function hasImageAttachment(attachments: OverrideInput["attachments"]): boolean {
if (!Array.isArray(attachments) || attachments.length === 0) return false;
for (const att of attachments) {
if (!isRecord(att)) return true;
const t = att.type;
if (typeof t !== "string") return true;
if (t === "image" || t === "image_url" || t.startsWith("image")) return true;
}
return false;
}

function normalizeUtterance(text: string): string {
return text
.trim()
Expand Down
19 changes: 19 additions & 0 deletions packages/shared/src/config/classifier-schema.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ function fullClassifier() {
intent_markers: ["[cron:", "MONITOR.md"],
no_reply_markers: ["NO_REPLY"],
},
cheap_model_low_risk: {
requested_model_markers: [
"economy",
"gpt-5.4-mini",
"spark",
"*deepseek-v4-flash",
"claude-haiku",
"*claude-haiku-*",
],
current_turn_max_chars: 300,
low_risk_markers: ["check", "status"],
blocked_markers: ["fix", "implement"],
},
formal_logic_keywords: ["⊢"],
tools_floor: "standard",
long_context_token_threshold: 64000,
Expand Down Expand Up @@ -99,6 +112,12 @@ describe("ClassifierConfigSchema", () => {
intent_markers: [],
no_reply_markers: [],
});
expect(parsed.overrides.cheap_model_low_risk).toEqual({
requested_model_markers: [],
current_turn_max_chars: 300,
low_risk_markers: [],
blocked_markers: [],
});
expect(parsed.overrides.tools_floor).toBe("standard");
});

Expand Down
Loading
Loading