From 569ee080e7903d29ee12e5b69b351074a964c778 Mon Sep 17 00:00:00 2001
From: oratis <wangharp@gmail.com>
Date: Fri, 29 May 2026 23:48:38 +0800
Subject: [PATCH 1/2] chore(tooling): enforce lint in pre-commit + ignore Rust
 build output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

eslint was scanning apps/desktop/src-tauri/target/ (generated Cargo JS),
producing 49 spurious errors locally — CI only escaped because it lints
before cargo build. Add target/ + release-artifacts/ to the ignore list,
then wire `pnpm lint` into the pre-commit hook so lint regressions are
caught before they land instead of only in CI.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .husky/pre-commit | 3 +++
 eslint.config.js  | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/.husky/pre-commit b/.husky/pre-commit
index 6cc60ec..0987320 100755
--- a/.husky/pre-commit
+++ b/.husky/pre-commit
@@ -4,6 +4,9 @@
 
 set -e
 
+echo "▎ pre-commit: lint"
+pnpm lint
+
 echo "▎ pre-commit: typecheck"
 pnpm -r typecheck
 
diff --git a/eslint.config.js b/eslint.config.js
index 811a7b6..abf5eef 100644
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -13,7 +13,9 @@ export default [
       '**/dist/**',
       '**/dist-electron/**',
       '**/node_modules/**',
+      '**/target/**', // Rust/Cargo build output (generated JS in src-tauri/target)
       '**/.tsbuildinfo',
+      'release-artifacts/**',
       'apps/desktop/electron/**', // requires electron types — pending M6-rest
     ],
   },

From 0af6ba1470c84c8684ac38fb24b134b6795b078b Mon Sep 17 00:00:00 2001
From: oratis <wangharp@gmail.com>
Date: Fri, 29 May 2026 23:48:57 +0800
Subject: [PATCH 2/2] fix(core): agent-loop & provider correctness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three independent fixes in the core engine:

1. Auto-compact fired every turn once over the threshold. `shouldCompact`
   read the cumulative `totalUsage` (summed across all turns, never reset),
   so it both triggered far too early (each turn re-sends the whole history,
   inflating the sum) and, once over the line, re-compacted the already-
   compacted history on every subsequent turn. Use the latest turn's
   `result.usage.inputTokens` — the true current-context size — instead.

2. Read-only tools now execute concurrently. The per-turn tool loop awaited
   each call sequentially; Read/Grep/Glob/WebFetch/WebSearch are side-effect-
   free and independent, so they run via Promise.all while mutating tools
   (Edit/Write/Bash/…) stay sequential to preserve snapshot ordering and
   one-at-a-time approval prompts. Results are reassembled in the model's
   original order via tool_use_id.

3. A max_tokens-truncated tool call is no longer executed as garbage. When
   the completion is cut off mid-arguments the partial JSON won't parse and
   `input` became {} — e.g. a Write with no file_path. The DeepSeek provider
   now drops such calls and reports stopReason 'max_tokens' so the loop ends
   cleanly instead of executing a malformed call.

Adds regression tests for all three.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/core/src/agent.test.ts              | 151 +++++++++++++++++++
 packages/core/src/agent.ts                   |  92 ++++++++---
 packages/core/src/providers/deepseek.test.ts |  66 ++++++++
 packages/core/src/providers/deepseek.ts      |  34 ++++-
 4 files changed, 311 insertions(+), 32 deletions(-)

diff --git a/packages/core/src/agent.test.ts b/packages/core/src/agent.test.ts
index 5d3a019..850d099 100644
--- a/packages/core/src/agent.test.ts
+++ b/packages/core/src/agent.test.ts
@@ -278,6 +278,157 @@ describe('runAgent', () => {
     }
   });
 
+  it('runs multiple read-only tool calls concurrently and preserves result order', async () => {
+    const events2: string[] = [];
+    const delay = (ms: number) => new Promise((r) => setTimeout(r, ms));
+    const slowReadOnly = (name: string) => ({
+      name,
+      definition: { name, description: name, inputSchema: { type: 'object', properties: {} } },
+      async execute() {
+        events2.push(`start:${name}`);
+        await delay(20);
+        events2.push(`end:${name}`);
+        return { content: `${name} done` };
+      },
+    });
+    // Custom registry with two read-only-named tools (Grep + Glob ∈ READ_ONLY_TOOLS).
+    const tools = new ToolRegistry([
+      slowReadOnly('Grep'),
+      slowReadOnly('Glob'),
+    ] as unknown as Parameters<typeof ToolRegistry.prototype.register>[0][]);
+
+    const provider = new MockProvider([
+      {
+        content: [
+          { type: 'text', text: 'searching' },
+          { type: 'tool_use', id: 'g1', name: 'Grep', input: {} },
+          { type: 'tool_use', id: 'g2', name: 'Glob', input: {} },
+        ],
+        stopReason: 'tool_use',
+        usage: { inputTokens: 1, outputTokens: 1, reasoningTokens: 0, cacheReadTokens: 0 },
+      },
+      endTurn('done'),
+    ]);
+
+    const result = await runAgent({
+      provider,
+      tools,
+      systemPrompt: '',
+      userMessage: 'find things',
+      model: 'deepseek-chat',
+      cwd,
+    });
+
+    // Concurrency: both tools start before either finishes.
+    expect(events2.slice(0, 2).every((e) => e.startsWith('start:'))).toBe(true);
+    expect(events2.slice(2).every((e) => e.startsWith('end:'))).toBe(true);
+
+    // Result order matches the model's call order (Grep then Glob) regardless of
+    // which promise settled first.
+    const toolResultMsg = result.history[2]!; // user msg with tool_result blocks
+    expect(toolResultMsg.role).toBe('user');
+    const ids = toolResultMsg.content
+      .filter((b): b is Extract<ContentBlock, { type: 'tool_result' }> => b.type === 'tool_result')
+      .map((b) => b.tool_use_id);
+    expect(ids).toEqual(['g1', 'g2']);
+  });
+
+  it('does not auto-compact on cumulative usage when each turn is below threshold', async () => {
+    // Regression: shouldCompact must use the *current* turn's input tokens, not
+    // the cumulative sum across turns. contextWindow 100, threshold 0.8 → trigger
+    // at 80. Each turn reports inputTokens 30 (below 80), so the per-turn proxy
+    // never crosses — but the cumulative sum (30+30+30=90) would, under the old
+    // buggy logic, fire compaction on turn 3. Assert it never fires.
+    await fs.writeFile(join(cwd, 'x.txt'), 'data');
+
+    // A provider that counts how many times the compaction summarizer runs
+    // (identified by the compaction system prompt + empty tool list).
+    let summarizerCalls = 0;
+    const turn = (): ProviderResult => ({
+      content: withToolCall('working', {
+        type: 'tool_use',
+        id: `c${Math.random()}`,
+        name: 'Read',
+        input: { file_path: 'x.txt' },
+      }),
+      stopReason: 'tool_use',
+      usage: { inputTokens: 30, outputTokens: 0, reasoningTokens: 0, cacheReadTokens: 0 },
+    });
+    const scripted: ProviderResult[] = [turn(), turn(), endTurn('done')];
+    const countingProvider: Provider = {
+      name: 'counting',
+      async runTurn(opts: ProviderRunOpts): Promise<ProviderResult> {
+        if (opts.systemPrompt.startsWith('You compress long agent conversations')) {
+          summarizerCalls++;
+          return endTurn('summary');
+        }
+        const next = scripted.shift();
+        if (!next) throw new Error('no scripted response');
+        return next;
+      },
+    };
+
+    const result = await runAgent({
+      provider: countingProvider,
+      tools: new ToolRegistry(),
+      systemPrompt: 'agent',
+      userMessage: 'go',
+      model: 'deepseek-chat',
+      cwd,
+      autoCompact: { contextWindow: 100, threshold: 0.8 },
+    });
+
+    expect(result.stopReason).toBe('end_turn');
+    expect(summarizerCalls).toBe(0);
+  });
+
+  it('auto-compacts once when a single turn crosses the threshold', async () => {
+    // Inverse of the above: when the *current* turn's input alone exceeds the
+    // threshold (90 > 80), compaction should fire. History after one tool turn
+    // is short, so compact() keeps it verbatim, but the summarizer is still
+    // invoked — proving the trigger path is live.
+    await fs.writeFile(join(cwd, 'x.txt'), 'data');
+    let summarizerCalls = 0;
+    const scripted: ProviderResult[] = [
+      {
+        content: withToolCall('working', {
+          type: 'tool_use',
+          id: 'big',
+          name: 'Read',
+          input: { file_path: 'x.txt' },
+        }),
+        stopReason: 'tool_use',
+        usage: { inputTokens: 90, outputTokens: 0, reasoningTokens: 0, cacheReadTokens: 0 },
+      },
+      endTurn('done'),
+    ];
+    const provider: Provider = {
+      name: 'counting',
+      async runTurn(opts: ProviderRunOpts): Promise<ProviderResult> {
+        if (opts.systemPrompt.startsWith('You compress long agent conversations')) {
+          summarizerCalls++;
+          return endTurn('summary');
+        }
+        const next = scripted.shift();
+        if (!next) throw new Error('no scripted response');
+        return next;
+      },
+    };
+
+    await runAgent({
+      provider,
+      tools: new ToolRegistry(),
+      systemPrompt: 'agent',
+      userMessage: 'go',
+      model: 'deepseek-chat',
+      cwd,
+      // Tiny keep window so compact() doesn't short-circuit on the short history.
+      autoCompact: { contextWindow: 100, threshold: 0.8, keepFirstPairs: 0, keepLastMessages: 1 },
+    });
+
+    expect(summarizerCalls).toBe(1);
+  });
+
   it('honors systemReminders: false to skip injection entirely', async () => {
     const provider = new MockProvider([endTurn('hi')]);
     const tools = new ToolRegistry();
diff --git a/packages/core/src/agent.ts b/packages/core/src/agent.ts
index ae58f9e..d3e1891 100644
--- a/packages/core/src/agent.ts
+++ b/packages/core/src/agent.ts
@@ -101,6 +101,14 @@ export interface RunAgentResult {
 
 const DEFAULT_MAX_TURNS = 16;
 
+/**
+ * Tools with no side effects whose results don't depend on each other — safe to
+ * execute concurrently within a single turn. Everything else (Edit/Write/Bash/
+ * TodoWrite/AskUserQuestion/ExitPlanMode) runs sequentially to preserve snapshot
+ * ordering, mutation order, and one-at-a-time interactive prompts.
+ */
+const READ_ONLY_TOOLS = new Set(['Read', 'Grep', 'Glob', 'WebFetch', 'WebSearch']);
+
 /**
  * Runs the agent loop until the model produces an end_turn (no tool calls),
  * or `maxTurns` is reached, or the abort signal fires.
@@ -233,14 +241,27 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
       return { history, turnsUsed, usage: totalUsage, stopReason: 'end_turn', modeSignal };
     }
 
-    // Execute tool calls and append a single user-role message with tool_result blocks
-    const toolResults: ToolResultBlock[] = [];
-    for (const block of result.content) {
-      if (block.type !== 'tool_use') continue;
-      const toolUse = block as ToolUseBlock;
+    // Execute tool calls and append a single user-role message with tool_result
+    // blocks. Two phases:
+    //   1. (sequential) resolve handler + permission for each call. Approval
+    //      prompts must never overlap, so gating stays strictly ordered.
+    //   2. (mixed) execute. Side-effect-free reads run concurrently via
+    //      Promise.all (the common "model emits 3 Reads at once" case); tools
+    //      that mutate state / snapshot run sequentially to preserve ordering.
+    // tool_result blocks carry their tool_use_id, so the final array is
+    // re-assembled in the model's original order regardless of finish order.
+    const toolBlocks = result.content.filter(
+      (b): b is ToolUseBlock => b.type === 'tool_use',
+    );
+    const resultsById = new Map<string, ToolResultBlock>();
+    type Ready = { toolUse: ToolUseBlock; handler: NonNullable<ReturnType<typeof opts.tools.get>> };
+    const ready: Ready[] = [];
+
+    // Phase 1 — sequential gate + approval.
+    for (const toolUse of toolBlocks) {
       const handler = opts.tools.get(toolUse.name);
       if (!handler) {
-        toolResults.push({
+        resultsById.set(toolUse.id, {
           type: 'tool_result',
           tool_use_id: toolUse.id,
           content: `Error: tool not found: ${toolUse.name}`,
@@ -268,7 +289,7 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
           allowed = decision === true || decision === 'always';
         }
         if (!allowed) {
-          toolResults.push({
+          resultsById.set(toolUse.id, {
             type: 'tool_result',
             tool_use_id: toolUse.id,
             content: `Tool call blocked: ${verdict.reason}`,
@@ -287,12 +308,16 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
         }
       }
 
-      // Pre-execution snapshot (Edit/Write only)
-      if (
-        opts.enableSnapshots !== false &&
-        opts.session &&
-        (toolUse.name === 'Edit' || toolUse.name === 'Write')
-      ) {
+      ready.push({ toolUse, handler });
+    }
+
+    // Runs one approved tool end-to-end: pre-snapshot, execute, PostToolUse
+    // hook, post-snapshot, event + result. Side-effect-free tools call this
+    // concurrently; mutating tools call it one at a time (see partition below).
+    const execOne = async ({ toolUse, handler }: Ready): Promise<void> => {
+      const isFileMutation = toolUse.name === 'Edit' || toolUse.name === 'Write';
+
+      if (opts.enableSnapshots !== false && opts.session && isFileMutation) {
         const filePath = (toolUse.input as { file_path?: string }).file_path;
         if (filePath) {
           await opts.session.manager.snapshot({
@@ -327,13 +352,7 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
         });
       }
 
-      // Post-execution snapshot
-      if (
-        opts.enableSnapshots !== false &&
-        opts.session &&
-        (toolUse.name === 'Edit' || toolUse.name === 'Write') &&
-        !tr.isError
-      ) {
+      if (opts.enableSnapshots !== false && opts.session && isFileMutation && !tr.isError) {
         const filePath = (toolUse.input as { file_path?: string }).file_path;
         if (filePath) {
           await opts.session.manager.snapshot({
@@ -347,13 +366,26 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
       }
 
       opts.onEvent?.({ type: 'tool_result', id: toolUse.id, result: tr });
-      toolResults.push({
+      resultsById.set(toolUse.id, {
         type: 'tool_result',
         tool_use_id: toolUse.id,
         content: tr.content,
         is_error: tr.isError,
       });
-    }
+    };
+
+    // Phase 2 — execute. Read-only tools have no side effects and don't touch
+    // snapshotSeq, so they're safe to run concurrently; everything else stays
+    // sequential to keep snapshot ordering deterministic.
+    const parallel = ready.filter((r) => READ_ONLY_TOOLS.has(r.toolUse.name));
+    const serial = ready.filter((r) => !READ_ONLY_TOOLS.has(r.toolUse.name));
+    await Promise.all(parallel.map(execOne));
+    for (const r of serial) await execOne(r);
+
+    // Re-assemble in the model's original tool-call order.
+    const toolResults: ToolResultBlock[] = toolBlocks
+      .map((b) => resultsById.get(b.id))
+      .filter((r): r is ToolResultBlock => r !== undefined);
 
     const resultMsg: StoredMessage = {
       role: 'user',
@@ -363,12 +395,22 @@ export async function runAgent(opts: RunAgentOptions): Promise<RunAgentResult> {
     history.push(resultMsg);
     if (opts.session) await opts.session.manager.append(opts.session.id, resultMsg);
 
-    // M3c: auto-compact if usage crossed threshold
+    // M3c: auto-compact if the *current* context crossed the threshold.
+    //
+    // Use this turn's usage (result.usage), NOT the cumulative totalUsage.
+    // `result.usage.inputTokens` is exactly the size of the history we just
+    // sent to the model, so it is the true current-context proxy. Cumulative
+    // usage is wrong on two counts: it sums every turn's input (each turn
+    // re-sends the whole history, so it inflates far past the real window and
+    // crosses the threshold too early), and it never shrinks after a compaction
+    // — meaning once over the line it would re-compact the already-compacted
+    // history on every subsequent turn. The next turn's inputTokens naturally
+    // reflects the freshly-compacted (smaller) context, so this self-corrects.
     if (
       opts.autoCompact &&
       shouldCompact({
-        inputTokens: totalUsage.inputTokens,
-        outputTokens: totalUsage.outputTokens,
+        inputTokens: result.usage.inputTokens,
+        outputTokens: result.usage.outputTokens,
         contextWindow: opts.autoCompact.contextWindow,
         threshold: opts.autoCompact.threshold,
       })
diff --git a/packages/core/src/providers/deepseek.test.ts b/packages/core/src/providers/deepseek.test.ts
index 840acef..94603ae 100644
--- a/packages/core/src/providers/deepseek.test.ts
+++ b/packages/core/src/providers/deepseek.test.ts
@@ -139,6 +139,72 @@ describe('DeepSeekProvider', () => {
       expect(result.content[0].input).toEqual({ file_path: 'src/a.ts' });
     }
   });
+
+  it('drops a tool call truncated by max_tokens instead of executing garbage', async () => {
+    // Model started a Write but the completion was cut off mid-arguments
+    // (finish_reason 'length'). The partial JSON won't parse.
+    const chunks = [
+      { choices: [{ delta: { content: 'Creating the file' } }] },
+      {
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                { index: 0, id: 'call_w', function: { name: 'Write', arguments: '{"file_path":"a.js","content":"cons' } },
+              ],
+            },
+          },
+        ],
+      },
+      {
+        choices: [{ delta: {}, finish_reason: 'length' }],
+        usage: { prompt_tokens: 10, completion_tokens: 3000 },
+      },
+    ];
+    const p = new DeepSeekProvider({ apiKey: 'sk-test', fetch: mockFetch(chunks) });
+    const result = await p.runTurn({
+      model: 'deepseek-chat',
+      systemPrompt: '',
+      tools: [{ name: 'Write', description: '', inputSchema: { type: 'object', properties: {} } }],
+      messages: [{ role: 'user', content: [{ type: 'text', text: 'write it' }] }],
+    });
+    // The malformed Write must NOT appear as an executable tool_use.
+    expect(result.content.some((b) => b.type === 'tool_use')).toBe(false);
+    // The partial text is preserved.
+    expect(result.content.some((b) => b.type === 'text')).toBe(true);
+    // The loop should stop, not try to execute the truncated call.
+    expect(result.stopReason).toBe('max_tokens');
+  });
+
+  it('keeps a valid call but drops a truncated sibling in the same turn', async () => {
+    const chunks = [
+      {
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                { index: 0, id: 'ok1', function: { name: 'Read', arguments: '{"file_path":"a.ts"}' } },
+                { index: 1, id: 'bad', function: { name: 'Write', arguments: '{"file_path":"b.ts","content":"x' } },
+              ],
+            },
+          },
+        ],
+      },
+      { choices: [{ delta: {}, finish_reason: 'length' }], usage: { prompt_tokens: 5, completion_tokens: 8 } },
+    ];
+    const p = new DeepSeekProvider({ apiKey: 'sk-test', fetch: mockFetch(chunks) });
+    const result = await p.runTurn({
+      model: 'deepseek-chat',
+      systemPrompt: '',
+      tools: [],
+      messages: [{ role: 'user', content: [{ type: 'text', text: 'go' }] }],
+    });
+    const toolUses = result.content.filter((b) => b.type === 'tool_use');
+    expect(toolUses).toHaveLength(1);
+    expect(toolUses[0]?.type === 'tool_use' && toolUses[0].name).toBe('Read');
+    // At least one valid call survived → still a tool_use turn.
+    expect(result.stopReason).toBe('tool_use');
+  });
 });
 
 describe('DeepSeekProvider message conversion', () => {
diff --git a/packages/core/src/providers/deepseek.ts b/packages/core/src/providers/deepseek.ts
index d26ce5c..f0e3612 100644
--- a/packages/core/src/providers/deepseek.ts
+++ b/packages/core/src/providers/deepseek.ts
@@ -166,20 +166,40 @@ export class DeepSeekProvider implements Provider {
     if (text) {
       content.push({ type: 'text', text });
     }
+    // Assemble tool calls, dropping any that were truncated mid-stream. When the
+    // completion hits max_tokens while emitting a tool call, the accumulated
+    // `args` is partial JSON that won't parse — executing it produces garbage
+    // (e.g. a Write with no file_path). A call with no name at all is a stray
+    // streaming artifact. Both are dropped; a genuinely empty-arg call (args ===
+    // '' or '{}', e.g. ExitPlanMode) is kept and validated by the tool itself.
+    let validToolCalls = 0;
+    let truncatedToolCall = false;
     for (const call of toolCalls.values()) {
-      const toolUse: ToolUseBlock = {
+      if (!call.name) {
+        truncatedToolCall = true;
+        continue;
+      }
+      const parsed = safeParseJson(call.args);
+      if (call.args !== '' && parsed === null) {
+        truncatedToolCall = true;
+        continue;
+      }
+      content.push({
         type: 'tool_use',
         id: call.id,
         name: call.name,
-        input: safeParseJson(call.args) ?? {},
-      };
-      content.push(toolUse);
+        input: parsed ?? {},
+      } satisfies ToolUseBlock);
+      validToolCalls++;
     }
 
+    // stopReason precedence: only claim 'tool_use' when at least one call
+    // survived. If everything was truncated (or finish_reason was 'length'),
+    // report 'max_tokens' so the agent loop ends cleanly instead of executing a
+    // malformed call or looping on an empty tool turn.
     let stopReason: ProviderResult['stopReason'];
-    if (finish === 'tool_calls' || toolCalls.size > 0) stopReason = 'tool_use';
-    else if (finish === 'length') stopReason = 'max_tokens';
-    else if (finish === 'stop') stopReason = 'end_turn';
+    if (validToolCalls > 0) stopReason = 'tool_use';
+    else if (finish === 'length' || truncatedToolCall) stopReason = 'max_tokens';
     else stopReason = 'end_turn';
 
     return {