From dd29d6d2f447c77120345f9d1073eff0363158e7 Mon Sep 17 00:00:00 2001 From: lishuceo Date: Mon, 1 Jun 2026 10:13:45 +0800 Subject: [PATCH 1/3] =?UTF-8?q?fix:=20=E6=94=B6=E5=88=B0=20result=20?= =?UTF-8?q?=E6=B6=88=E6=81=AF=E5=90=8E=E4=B8=BB=E5=8A=A8=E8=B7=B3=E5=87=BA?= =?UTF-8?q?=20SDK=20=E6=B6=88=E6=81=AF=E5=BE=AA=E7=8E=AF,=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=E8=AF=AF=E5=88=A4=20idle=20=E8=B6=85=E6=97=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 线上案例:idle timeout 日志中 lastResetSource=msg:result:success、 elapsedMs=851539(14分钟),说明 query 在约第 4 分钟已发出 result(success) 终止消息,但 SDK 偶发未关闭异步迭代器,for-await 继续阻塞等待永不到来的下一条 消息,600s 后被 idle timer 误判超时并 abort,把一个已经成功的 query 当成错误 抛给用户。 修复:给消息循环加 messageLoop 标签,case 'result' 处理后 break messageLoop 立即结束循环(result 是 query 的终止消息,其后不会再有有效消息),不再依赖 SDK 自然关闭流。break 加标签确保跳出 for-await 而非仅 switch。 Co-Authored-By: Claude Opus 4.8 (1M context) --- src/claude/executor.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/claude/executor.ts b/src/claude/executor.ts index ab94cd1..8001312 100644 --- a/src/claude/executor.ts +++ b/src/claude/executor.ts @@ -984,6 +984,7 @@ export class ClaudeExecutor { try { // 遍历 SDK 流式消息 + messageLoop: for await (const message of q) { // 每收到消息重置 idle 计时器 resetIdleTimer(`msg:${message.type}${'subtype' in message ? ':' + (message as Record).subtype : ''}`); @@ -1123,7 +1124,12 @@ export class ClaudeExecutor { case 'result': resultMessage = message; - break; + // result 是 query 的终止消息。SDK 偶发在发出 result 后不关闭异步迭代器, + // 导致 for-await 继续阻塞等待永不到来的下一条消息,最终被 idle timer 误判 + // 为超时(即使 query 已成功),把成功结果当成错误抛给用户(见 idle timeout + // lastResetSource=msg:result:success 的线上案例)。收到 result 立即跳出循环, + // 主动结束,避免空转到超时。break 标签确保跳出 for-await 而非仅 switch。 + break messageLoop; default: // tool_progress, stream_event 等其他消息类型 — 记录以便诊断 idle timeout 间隙 From d29a0a5ec7a252001a7e0c54e323ad4be3227a5c Mon Sep 17 00:00:00 2001 From: lishuceo Date: Mon, 1 Jun 2026 10:13:45 +0800 Subject: [PATCH 2/3] =?UTF-8?q?test:=20=E5=A2=9E=E5=8A=A0=20result=20?= =?UTF-8?q?=E5=90=8E=E6=B5=81=E5=8D=A1=E6=AD=BB=E7=9A=84=20idle-timeout=20?= =?UTF-8?q?=E5=9B=9E=E5=BD=92=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 构造 SDK 在发出 result(success) 后 next() 永不 resolve 的迭代器,验证 execute() 仍能立即返回成功结果且只读取 init+result 两条消息。该测试在修复 前会因第三次 next() 永久阻塞而超时失败,修复后通过。 Co-Authored-By: Claude Opus 4.8 (1M context) --- src/claude/__tests__/executor.test.ts | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/claude/__tests__/executor.test.ts b/src/claude/__tests__/executor.test.ts index 70f4917..7f714f1 100644 --- a/src/claude/__tests__/executor.test.ts +++ b/src/claude/__tests__/executor.test.ts @@ -581,4 +581,37 @@ describe('ClaudeExecutor', () => { expect(opts.env).toBeUndefined(); }); }); + + describe('result message terminates loop (idle-timeout regression)', () => { + it('should return success immediately when SDK stream hangs after the result message', async () => { + // 复现线上 bug:SDK 发出 result(success) 后不关闭异步迭代器,next() 永不 resolve。 + // 修复前 for-await 会继续阻塞等待下一条永不到来的消息,600s 后被 idle timer 误判 + // 为超时,把已成功的 query 当成错误抛出(日志特征 lastResetSource=msg:result:success)。 + // 修复后收到 result 立即 break,不再调用 next(),query 正常返回成功结果。 + const messages = [ + { type: 'system', subtype: 'init', session_id: 'sess-1', model: 'claude', tools: [] }, + { type: 'result', subtype: 'success', session_id: 'sess-1', result: 'hello', duration_ms: 100 }, + ]; + let nextCallCount = 0; + const returnSpy = vi.fn(() => Promise.resolve({ value: undefined, done: true })); + mockQueryInstance[Symbol.asyncIterator].mockReturnValue({ + next: () => { + nextCallCount++; + if (nextCallCount <= messages.length) { + return Promise.resolve({ value: messages[nextCallCount - 1], done: false }); + } + // 第三次及以后:永不 resolve(模拟 SDK 在 result 后卡住不关闭流) + return new Promise(() => {}); + }, + return: returnSpy, + }); + + const result = await executor.execute(makeInput()); + + expect(result.success).toBe(true); + expect(result.output).toBe('hello'); + // 只应读取 init + result 两条消息,绝不尝试读取第三条(那会永久阻塞) + expect(nextCallCount).toBe(2); + }); + }); }); From 5327d5a6ed6525021dc50ffc45248955f91bed5d Mon Sep 17 00:00:00 2001 From: lishuceo Date: Mon, 1 Jun 2026 14:34:55 +0800 Subject: [PATCH 3/3] =?UTF-8?q?test:=20=E5=9B=9E=E5=BD=92=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E8=A1=A5=E5=85=85=20returnSpy=20=E6=96=AD=E8=A8=80,?= =?UTF-8?q?=E6=98=BE=E5=BC=8F=E9=AA=8C=E8=AF=81=20iterator=20=E6=B8=85?= =?UTF-8?q?=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 采纳 review 建议:for-await 通过 break 提前退出时会调用 iterator.return(), 触发 SDK 内部清理。补一条 expect(returnSpy).toHaveBeenCalled() 让该语义在 测试中显式可见,而非仅靠 nextCallCount===2 间接推断。 Co-Authored-By: Claude Opus 4.8 (1M context) --- src/claude/__tests__/executor.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/claude/__tests__/executor.test.ts b/src/claude/__tests__/executor.test.ts index 7f714f1..e424567 100644 --- a/src/claude/__tests__/executor.test.ts +++ b/src/claude/__tests__/executor.test.ts @@ -612,6 +612,8 @@ describe('ClaudeExecutor', () => { expect(result.output).toBe('hello'); // 只应读取 init + result 两条消息,绝不尝试读取第三条(那会永久阻塞) expect(nextCallCount).toBe(2); + // for-await 通过 break 提前退出时会调用 iterator.return(),触发 SDK 内部清理 + expect(returnSpy).toHaveBeenCalled(); }); }); });