diff --git a/apps/cli/src/commands.test.ts b/apps/cli/src/commands.test.ts
index 53e05a5..33acbce 100644
--- a/apps/cli/src/commands.test.ts
+++ b/apps/cli/src/commands.test.ts
@@ -28,7 +28,7 @@ function makeContext(overrides: Partial<SessionContext> = {}): SessionContext {
     creds: { apiKey: 'sk-abcdefghij' },
     sessionId: 'sess-xyz',
     sessions: new SessionManager({ root: '/tmp/x' }),
-    usage: { inputTokens: 100, outputTokens: 50, reasoningTokens: 0 },
+    usage: { inputTokens: 100, outputTokens: 50, reasoningTokens: 0, cacheReadTokens: 0 },
     ...overrides,
   };
 }
@@ -157,11 +157,21 @@ describe('built-in command behavior', () => {
   it('/cost computes pricing', async () => {
     const reg = new CommandRegistry();
     const ctx = makeContext({
-      usage: { inputTokens: 1_000_000, outputTokens: 500_000, reasoningTokens: 0 },
+      usage: {
+        inputTokens: 1_000_000,
+        outputTokens: 500_000,
+        reasoningTokens: 0,
+        cacheReadTokens: 400_000,
+      },
     });
     const out = await reg.match('/cost')!.cmd.run([], ctx);
-    expect(out.join('\n')).toMatch(/Tokens/);
-    expect(out.join('\n')).toMatch(/Total/);
+    const text = out.join('\n');
+    expect(text).toMatch(/Tokens/);
+    expect(text).toMatch(/Total/);
+    // Cache-aware: 400k of 1M input were cache hits → shown + credited.
+    expect(text).toMatch(/cache hits: 400,000/);
+    expect(text).toMatch(/40%/);
+    expect(text).toMatch(/cache saved/i);
   });
 
   it('/context shows window usage', async () => {
diff --git a/apps/cli/src/commands.ts b/apps/cli/src/commands.ts
index 66b8106..3f45a59 100644
--- a/apps/cli/src/commands.ts
+++ b/apps/cli/src/commands.ts
@@ -13,6 +13,7 @@ import type {
 } from '@deepcode/core';
 import {
   contextWindowFor,
+  estimateCost,
   redact,
   EFFORT_PARAMS,
   type Credentials,
@@ -111,7 +112,12 @@ export interface SessionContext {
   credsStore?: CredentialsStore;
   sessionId: string;
   sessions: SessionManager;
-  usage: { inputTokens: number; outputTokens: number; reasoningTokens: number };
+  usage: {
+    inputTokens: number;
+    outputTokens: number;
+    reasoningTokens: number;
+    cacheReadTokens: number;
+  };
   /** Set true to terminate the REPL after this command. */
   exitRequested?: boolean;
   /** Replace history entirely (used by /clear, /resume). */
@@ -304,20 +310,21 @@ export const CostCommand: SlashCommand = {
   aliases: ['/usage'],
   description: 'Show token usage and cost estimate.',
   run(_args, ctx) {
-    // Pricing per docs/design/effort-levels.md §2.4
-    const inputYuan = (ctx.usage.inputTokens / 1_000_000) * 1.0;
-    const outputYuan =
-      ctx.model === 'deepseek-reasoner'
-        ? (ctx.usage.outputTokens / 1_000_000) * 16.0
-        : (ctx.usage.outputTokens / 1_000_000) * 2.0;
-    const reasoningYuan =
-      ctx.model === 'deepseek-reasoner' ? (ctx.usage.reasoningTokens / 1_000_000) * 4.0 : 0;
-    const total = inputYuan + outputYuan + reasoningYuan;
-    return [
-      `Tokens — in: ${ctx.usage.inputTokens.toLocaleString()}, out: ${ctx.usage.outputTokens.toLocaleString()}, reasoning: ${ctx.usage.reasoningTokens.toLocaleString()}`,
-      `Estimate — input: ¥${inputYuan.toFixed(4)}, output: ¥${outputYuan.toFixed(4)}, reasoning: ¥${reasoningYuan.toFixed(4)}`,
-      `Total this session: ¥${total.toFixed(4)}`,
+    // Cache-aware pricing per docs/design/effort-levels.md §2.4. DeepSeek's
+    // prompt caching is automatic server-side; cache-hit input tokens bill at
+    // ~10% of a miss, so a stable prompt prefix across turns saves real money.
+    const c = estimateCost(ctx.usage, ctx.model);
+    const cacheHits = Math.min(ctx.usage.cacheReadTokens, ctx.usage.inputTokens);
+    const hitPct = (c.cacheHitRate * 100).toFixed(0);
+    const lines = [
+      `Tokens — in: ${ctx.usage.inputTokens.toLocaleString()} (cache hits: ${cacheHits.toLocaleString()}, ${hitPct}%), out: ${ctx.usage.outputTokens.toLocaleString()}, reasoning: ${ctx.usage.reasoningTokens.toLocaleString()}`,
+      `Estimate — input ¥${(c.cacheMissYuan + c.cacheHitYuan).toFixed(4)} (miss ¥${c.cacheMissYuan.toFixed(4)} + cache ¥${c.cacheHitYuan.toFixed(4)}), output ¥${c.outputYuan.toFixed(4)}, reasoning ¥${c.reasoningYuan.toFixed(4)}`,
+      `Total this session: ¥${c.totalYuan.toFixed(4)}`,
     ];
+    if (c.cacheSavingsYuan > 0) {
+      lines.push(`Prompt cache saved ¥${c.cacheSavingsYuan.toFixed(4)} vs no caching.`);
+    }
+    return lines;
   },
 };
 
diff --git a/apps/cli/src/parity-commands.test.ts b/apps/cli/src/parity-commands.test.ts
index 6170ae3..ec6c948 100644
--- a/apps/cli/src/parity-commands.test.ts
+++ b/apps/cli/src/parity-commands.test.ts
@@ -30,7 +30,7 @@ function ctx(overrides: Partial<SessionContext> = {}): SessionContext {
     creds: { apiKey: 'sk-test' },
     sessionId: 's1',
     sessions: new SessionManager({ root: '/tmp/x' }),
-    usage: { inputTokens: 0, outputTokens: 0, reasoningTokens: 0 },
+    usage: { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, cacheReadTokens: 0 },
     ...overrides,
   };
 }
diff --git a/apps/cli/src/repl.ts b/apps/cli/src/repl.ts
index cbe27d1..a00ab51 100644
--- a/apps/cli/src/repl.ts
+++ b/apps/cli/src/repl.ts
@@ -421,7 +421,7 @@ export async function startRepl(opts: ReplOpts): Promise<number> {
     credsStore,
     sessionId: session.id,
     sessions,
-    usage: { inputTokens: 0, outputTokens: 0, reasoningTokens: 0 },
+    usage: { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, cacheReadTokens: 0 },
     mcpServers,
     mcpErrors,
     wiredPlugins: pluginsWire?.plugins.map((p) => ({
@@ -637,6 +637,7 @@ export async function startRepl(opts: ReplOpts): Promise<number> {
     ctx.usage.inputTokens += result.usage.inputTokens;
     ctx.usage.outputTokens += result.usage.outputTokens;
     ctx.usage.reasoningTokens += result.usage.reasoningTokens;
+    ctx.usage.cacheReadTokens += result.usage.cacheReadTokens;
     // M3c-rest: honor ExitPlanMode tool signal — flip plan → default
     if (result.modeSignal?.exitPlanMode && ctx.mode === 'plan') {
       ctx.mode = 'default';
diff --git a/packages/core/src/agent.ts b/packages/core/src/agent.ts
index a377cfe..bc1b02b 100644
--- a/packages/core/src/agent.ts
+++ b/packages/core/src/agent.ts
@@ -121,7 +121,12 @@ export interface RunAgentResult {
   /** Total provider round-trips executed. */
   turnsUsed: number;
   /** Aggregate token usage. */
-  usage: { inputTokens: number; outputTokens: number; reasoningTokens: number };
+  usage: {
+    inputTokens: number;
+    outputTokens: number;
+    reasoningTokens: number;
+    cacheReadTokens: number;
+  };
   /** Reason the loop terminated. */
   stopReason: 'end_turn' | 'max_turns' | 'aborted' | 'error';
   /** Mode-control signals flipped by tools during this run (M3c-rest). */
@@ -380,7 +385,7 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
     });
   }
 
-  const totalUsage = { inputTokens: 0, outputTokens: 0, reasoningTokens: 0 };
+  const totalUsage = { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, cacheReadTokens: 0 };
   let turnsUsed = 0;
 
   // Stop hook — fires when the TOP-LEVEL agent finishes a run (a sub-agent's
@@ -437,6 +442,7 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
     totalUsage.inputTokens += result.usage.inputTokens;
     totalUsage.outputTokens += result.usage.outputTokens;
     totalUsage.reasoningTokens += result.usage.reasoningTokens;
+    totalUsage.cacheReadTokens += result.usage.cacheReadTokens;
     opts.onEvent?.({
       type: 'usage',
       inputTokens: result.usage.inputTokens,
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index b4aeea0..344b3d7 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -15,6 +15,8 @@ export {
   DEFAULT_CONTEXT_WINDOW,
   EFFORT_PARAMS,
   contextWindowFor,
+  estimateCost,
+  type CostBreakdown,
   type DeepSeekProviderOpts,
   type Provider,
   type ProviderResult,
diff --git a/packages/core/src/providers/index.ts b/packages/core/src/providers/index.ts
index fdfe50f..b712596 100644
--- a/packages/core/src/providers/index.ts
+++ b/packages/core/src/providers/index.ts
@@ -16,3 +16,5 @@ export {
   contextWindowFor,
 } from './deepseek.js';
 export type { DeepSeekProviderOpts } from './deepseek.js';
+export { estimateCost } from './pricing.js';
+export type { CostBreakdown } from './pricing.js';
diff --git a/packages/core/src/providers/pricing.test.ts b/packages/core/src/providers/pricing.test.ts
new file mode 100644
index 0000000..1c44ea6
--- /dev/null
+++ b/packages/core/src/providers/pricing.test.ts
@@ -0,0 +1,57 @@
+import { describe, expect, it } from 'vitest';
+import { estimateCost } from './pricing.js';
+import type { ProviderUsage } from './types.js';
+
+function usage(p: Partial<ProviderUsage>): ProviderUsage {
+  return { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, cacheReadTokens: 0, ...p };
+}
+
+describe('estimateCost', () => {
+  it('prices deepseek-chat input + output with no cache hits', () => {
+    const c = estimateCost(
+      usage({ inputTokens: 1_000_000, outputTokens: 1_000_000 }),
+      'deepseek-chat',
+    );
+    expect(c.cacheMissYuan).toBeCloseTo(1.0, 6);
+    expect(c.cacheHitYuan).toBe(0);
+    expect(c.outputYuan).toBeCloseTo(2.0, 6);
+    expect(c.reasoningYuan).toBe(0);
+    expect(c.totalYuan).toBeCloseTo(3.0, 6);
+    expect(c.cacheHitRate).toBe(0);
+    expect(c.cacheSavingsYuan).toBe(0);
+  });
+
+  it('credits cache-hit tokens at 10% (inputTokens is inclusive of cache hits)', () => {
+    // 1M input of which 800k are cache hits → 200k miss @¥1/M + 800k hit @¥0.1/M.
+    const c = estimateCost(
+      usage({ inputTokens: 1_000_000, cacheReadTokens: 800_000 }),
+      'deepseek-chat',
+    );
+    expect(c.cacheMissYuan).toBeCloseTo(0.2, 6); // 200k @ ¥1/M
+    expect(c.cacheHitYuan).toBeCloseTo(0.08, 6); // 800k @ ¥0.1/M
+    expect(c.totalYuan).toBeCloseTo(0.28, 6);
+    expect(c.cacheHitRate).toBeCloseTo(0.8, 6);
+    expect(c.cacheSavingsYuan).toBeCloseTo(0.72, 6); // 800k @ (1.0−0.1)/M
+  });
+
+  it('prices reasoner output + reasoning higher', () => {
+    const c = estimateCost(
+      usage({ outputTokens: 1_000_000, reasoningTokens: 1_000_000 }),
+      'deepseek-reasoner',
+    );
+    expect(c.outputYuan).toBeCloseTo(16.0, 6);
+    expect(c.reasoningYuan).toBeCloseTo(4.0, 6);
+  });
+
+  it('clamps cache hits to input and falls back to chat pricing for unknown models', () => {
+    const c = estimateCost(usage({ inputTokens: 100, cacheReadTokens: 999 }), 'mystery-model');
+    expect(c.cacheHitRate).toBe(1); // hits clamped to ≤ input
+    expect(c.cacheMissYuan).toBe(0); // all input was a cache hit
+  });
+
+  it('is zero for an empty session', () => {
+    const c = estimateCost(usage({}), 'deepseek-chat');
+    expect(c.totalYuan).toBe(0);
+    expect(c.cacheHitRate).toBe(0);
+  });
+});
diff --git a/packages/core/src/providers/pricing.ts b/packages/core/src/providers/pricing.ts
new file mode 100644
index 0000000..9f9d179
--- /dev/null
+++ b/packages/core/src/providers/pricing.ts
@@ -0,0 +1,77 @@
+// DeepSeek cost estimation (CNY), crediting server-side prompt caching.
+// Prices per docs/design/effort-levels.md §2.4 (per 1M tokens):
+//   model              input(miss)  cache-hit  output   reasoning
+//   deepseek-chat      ¥1           ¥0.1       ¥2       —
+//   deepseek-reasoner  ¥1           ¥0.1       ¥16      ¥4 (reasoning_content)
+//
+// DeepSeek's `prompt_tokens` (→ usage.inputTokens) is INCLUSIVE of the
+// cache-hit tokens (→ usage.cacheReadTokens), so cache-miss = input − cache-hit.
+// Cache hits bill at ~10% of a miss, so crediting them matters for long sessions
+// with a stable prompt prefix (the agent's system prompt + early turns).
+
+import type { ProviderUsage } from './types.js';
+
+interface ModelPricing {
+  /** Cache-miss prompt tokens, ¥ per 1M. */
+  inputMissPerM: number;
+  /** Cache-hit prompt tokens, ¥ per 1M. */
+  cacheHitPerM: number;
+  /** Completion tokens, ¥ per 1M. */
+  outputPerM: number;
+  /** reasoning_content tokens, ¥ per 1M (reasoner only). */
+  reasoningPerM: number;
+}
+
+const PRICING: Record<string, ModelPricing> = {
+  'deepseek-chat': { inputMissPerM: 1.0, cacheHitPerM: 0.1, outputPerM: 2.0, reasoningPerM: 0 },
+  'deepseek-reasoner': {
+    inputMissPerM: 1.0,
+    cacheHitPerM: 0.1,
+    outputPerM: 16.0,
+    reasoningPerM: 4.0,
+  },
+};
+
+export interface CostBreakdown {
+  /** Cache-miss input cost (¥). */
+  cacheMissYuan: number;
+  /** Cache-hit input cost (¥) — the discounted prompt-cache reads. */
+  cacheHitYuan: number;
+  outputYuan: number;
+  reasoningYuan: number;
+  totalYuan: number;
+  /** cacheReadTokens / inputTokens, 0..1 (0 when no input). */
+  cacheHitRate: number;
+  /** ¥ saved vs paying the full miss price for every input token. */
+  cacheSavingsYuan: number;
+}
+
+/**
+ * Estimate session cost in CNY from cumulative usage, crediting DeepSeek's
+ * cheaper cache-hit input tokens. Unknown models fall back to deepseek-chat
+ * pricing. Pure — safe to call anywhere.
+ */
+export function estimateCost(usage: ProviderUsage, model: string): CostBreakdown {
+  const p = PRICING[model] ?? PRICING['deepseek-chat']!;
+  const hitTokens = Math.max(0, Math.min(usage.cacheReadTokens, usage.inputTokens));
+  const missTokens = Math.max(0, usage.inputTokens - hitTokens);
+
+  const cacheMissYuan = (missTokens / 1_000_000) * p.inputMissPerM;
+  const cacheHitYuan = (hitTokens / 1_000_000) * p.cacheHitPerM;
+  const outputYuan = (usage.outputTokens / 1_000_000) * p.outputPerM;
+  const reasoningYuan = (usage.reasoningTokens / 1_000_000) * p.reasoningPerM;
+
+  // What those cache-hit tokens WOULD have cost at the miss price, minus what
+  // they actually cost — i.e. the prompt-cache discount.
+  const cacheSavingsYuan = (hitTokens / 1_000_000) * (p.inputMissPerM - p.cacheHitPerM);
+
+  return {
+    cacheMissYuan,
+    cacheHitYuan,
+    outputYuan,
+    reasoningYuan,
+    totalYuan: cacheMissYuan + cacheHitYuan + outputYuan + reasoningYuan,
+    cacheHitRate: usage.inputTokens > 0 ? hitTokens / usage.inputTokens : 0,
+    cacheSavingsYuan,
+  };
+}