From 7d0c35bac1b3f93b9de0a21410e95c82942b7819 Mon Sep 17 00:00:00 2001 From: Contentrain Date: Fri, 15 May 2026 17:02:03 +0300 Subject: [PATCH 1/2] fix(chat): preserve assistant text blocks across tool iterations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The conversation loop streams assistant text deltas to SSE, but the per-iteration `assistantBlocks` was rebuilt only from `currentToolCalls` when pushing back into `config.messages` for the next Anthropic call. Two concrete regressions fell out of this: 1. Multi-turn protocol amputation. The next iteration saw the prior assistant turn as `[tool_use]` only — Claude's own narration ("I'll check the schema first, then update X") was missing from its view of the conversation. Anthropic accepts the payload but downstream tool sequencing degrades. 2. Empty `done.lastContent` on text-only end_turn. The streaming branch never set `lastAssistantContent`, so the most common chat shape (a plain text reply with no tool use) reached `saveChatResult` with `[]` — DB ended up storing the `'[tool calls]'` placeholder from the fallback at db.ts:250 instead of the actual assistant text. Fix is purely in-memory: `assistantBlocks` is now allocated at the top of each iteration and populated as content blocks come in (streamed text accumulates into a single block flushed at tool_use_start / message_end; tool_use is pushed alongside; non-streaming branch pushes every block from `response.content`). The old reconstruction at the tool-execution boundary is gone, and `lastAssistantContent` is assigned once outside the streaming/non-streaming split so end_turn and tool_use both produce the correct final shape. Four regression tests cover: text-only end_turn, streamed text → tool → next iteration, interleaved text/tool ordering, and non-streaming iter 2+ text + tool_use. No schema or persistence changes — DB storage of intermediate tool iterations and tool_result blocks stays out of scope for the upcoming tool-trace-persistence PR. --- server/utils/conversation-engine.ts | 38 +++- .../conversation-engine-regression.test.ts | 210 +++++++++++++++++- 2 files changed, 237 insertions(+), 11 deletions(-) diff --git a/server/utils/conversation-engine.ts b/server/utils/conversation-engine.ts index 67415e16..c4b259d2 100644 --- a/server/utils/conversation-engine.ts +++ b/server/utils/conversation-engine.ts @@ -95,28 +95,51 @@ export async function* runConversationLoop( iteration++ const isFirstIteration = iteration === 1 const currentToolCalls: Array<{ id: string, name: string, input: unknown }> = [] + const assistantBlocks: AIContentBlock[] = [] let stopReason: string | undefined if (isFirstIteration) { + let currentText = '' + const flushText = () => { + if (!currentText) return + assistantBlocks.push({ type: 'text', text: currentText }) + currentText = '' + } + for await (const streamEvent of aiProvider.streamCompletion( { model: config.model, system: config.systemPrompt, messages: config.messages, tools: config.tools, maxTokens: 4096, abortSignal: config.abortSignal }, config.apiKey, )) { switch (streamEvent.type) { case 'text': + currentText += streamEvent.content ?? '' yield { type: 'text', content: streamEvent.content } break case 'tool_use_start': + flushText() yield { type: 'tool_use', id: streamEvent.toolId, name: streamEvent.toolName } break - case 'tool_use_end': - currentToolCalls.push({ + case 'tool_use_end': { + const toolCall = { id: streamEvent.toolId!, name: streamEvent.toolName!, input: (typeof streamEvent.toolInput === 'object' && streamEvent.toolInput !== null) ? streamEvent.toolInput : {}, + } + currentToolCalls.push({ + id: toolCall.id, + name: toolCall.name, + input: toolCall.input, + }) + assistantBlocks.push({ + type: 'tool_use', + id: toolCall.id, + name: toolCall.name, + input: toolCall.input, }) break + } case 'message_end': + flushText() totalInputTokens += streamEvent.usage?.inputTokens ?? 0 totalOutputTokens += streamEvent.usage?.outputTokens ?? 0 stopReason = streamEvent.stopReason @@ -148,20 +171,15 @@ export async function* runConversationLoop( input: (typeof block.input === 'object' && block.input !== null) ? block.input : {}, }) } + assistantBlocks.push(block) } - lastAssistantContent = response.content } + lastAssistantContent = assistantBlocks + if (stopReason !== 'tool_use' || currentToolCalls.length === 0) break // === TOOL EXECUTION with state guard + workflow-aware auto-merge === - const assistantBlocks: AIContentBlock[] = currentToolCalls.map(tc => ({ - type: 'tool_use' as const, - id: tc.id, - name: tc.name, - input: tc.input, - })) - const toolResultBlocks: AIContentBlock[] = [] for (const tc of currentToolCalls) { diff --git a/tests/unit/conversation-engine-regression.test.ts b/tests/unit/conversation-engine-regression.test.ts index 274eba48..d03f447e 100644 --- a/tests/unit/conversation-engine-regression.test.ts +++ b/tests/unit/conversation-engine-regression.test.ts @@ -1,12 +1,90 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' +import type { AIMessage, AIProvider } from '../../server/providers/ai' import type { GitProvider } from '../../server/providers/git' import type { AgentPermissions } from '../../server/utils/agent-permissions' -import type { ChatUIContext } from '../../server/utils/agent-types' +import type { ChatUIContext, ProjectPhase } from '../../server/utils/agent-types' async function loadConversationEngineModule() { return import('../../server/utils/conversation-engine') } +function emptyAffectedValue() { + return { models: [], locales: [], snapshotChanged: false, branchesChanged: false } +} + +function stubLoopGlobals(aiProvider: Partial) { + vi.stubGlobal('emptyAffected', vi.fn(emptyAffectedValue)) + vi.stubGlobal('mergeAffected', vi.fn((a, b) => ({ + models: [...new Set([...a.models, ...b.models])], + locales: [...new Set([...a.locales, ...b.locales])], + snapshotChanged: a.snapshotChanged || b.snapshotChanged, + branchesChanged: a.branchesChanged || b.branchesChanged, + }))) + vi.stubGlobal('checkStateTransition', vi.fn().mockReturnValue({ + allowed: false, + reason: 'blocked by test', + suggestion: 'continue', + })) + vi.stubGlobal('useAIProvider', vi.fn().mockReturnValue(aiProvider)) +} + +function createToolContext() { + return { + engine: {} as never, + git: {} as GitProvider, + userEmail: 'user@example.com', + userId: 'user-1', + contentRoot: 'content', + workflow: 'auto-merge', + permissions: { + workspaceRole: 'owner', + projectRole: null, + specificModels: false, + allowedModels: [], + allowedLocales: [], + availableTools: ['test_tool', 'second_tool'], + } as AgentPermissions, + plan: 'pro', + projectId: 'project-1', + workspaceId: 'workspace-1', + uiContext: { + activeModelId: null, + activeLocale: 'en', + activeEntryId: null, + panelState: 'overview', + activeBranch: null, + } as ChatUIContext, + phase: 'active' as ProjectPhase, + } +} + +async function collectConversationEvents(input: { + aiProvider: Partial + messages?: AIMessage[] + maxToolIterations?: number +}) { + stubLoopGlobals(input.aiProvider) + const { runConversationLoop } = await loadConversationEngineModule() + const messages = input.messages ?? [{ role: 'user', content: 'hello' } as AIMessage] + const events = [] + + for await (const evt of runConversationLoop( + { + model: 'claude-test', + apiKey: 'sk-test', + systemPrompt: 'system', + messages, + tools: [{ name: 'test_tool', description: 'test', inputSchema: { type: 'object' } }], + maxToolIterations: input.maxToolIterations, + }, + createToolContext(), + )) { + events.push(evt) + } + + return { events, messages } +} + describe('conversation engine regression', () => { beforeEach(() => { vi.resetModules() @@ -16,6 +94,136 @@ describe('conversation engine regression', () => { vi.unstubAllGlobals() }) + it('returns streamed text-only responses in done.lastContent', async () => { + const { events, messages } = await collectConversationEvents({ + aiProvider: { + streamCompletion: async function* () { + yield { type: 'text', content: 'Hello ' } + yield { type: 'text', content: 'world.' } + yield { + type: 'message_end', + stopReason: 'end_turn', + usage: { inputTokens: 7, outputTokens: 3 }, + } + }, + createCompletion: vi.fn(), + }, + }) + + const done = events[events.length - 1]! + expect(done).toMatchObject({ + type: 'done', + usage: { inputTokens: 7, outputTokens: 3 }, + lastContent: [{ type: 'text', text: 'Hello world.' }], + }) + expect(messages).toHaveLength(1) + }) + + it('preserves streamed assistant text before tool use in the next model message', async () => { + const { events, messages } = await collectConversationEvents({ + aiProvider: { + streamCompletion: async function* () { + yield { type: 'text', content: 'I will inspect the project.' } + yield { type: 'tool_use_start', toolId: 'tool-1', toolName: 'test_tool' } + yield { type: 'tool_use_end', toolId: 'tool-1', toolName: 'test_tool', toolInput: { model: 'posts' } } + yield { + type: 'message_end', + stopReason: 'tool_use', + usage: { inputTokens: 10, outputTokens: 5 }, + } + }, + createCompletion: vi.fn().mockResolvedValue({ + content: [{ type: 'text', text: 'Done.' }], + stopReason: 'end_turn', + usage: { inputTokens: 4, outputTokens: 2 }, + }), + }, + }) + + expect(messages[1]).toEqual({ + role: 'assistant', + content: [ + { type: 'text', text: 'I will inspect the project.' }, + { type: 'tool_use', id: 'tool-1', name: 'test_tool', input: { model: 'posts' } }, + ], + }) + expect(messages[2]).toMatchObject({ + role: 'user', + content: [{ type: 'tool_result', toolUseId: 'tool-1' }], + }) + expect(events[events.length - 1]).toMatchObject({ + type: 'done', + lastContent: [{ type: 'text', text: 'Done.' }], + }) + }) + + it('preserves streamed interleaved text and tool_use block order', async () => { + const { events, messages } = await collectConversationEvents({ + maxToolIterations: 1, + aiProvider: { + streamCompletion: async function* () { + yield { type: 'text', content: 'First step.' } + yield { type: 'tool_use_start', toolId: 'tool-1', toolName: 'test_tool' } + yield { type: 'tool_use_end', toolId: 'tool-1', toolName: 'test_tool', toolInput: { first: true } } + yield { type: 'text', content: 'Second step.' } + yield { type: 'tool_use_start', toolId: 'tool-2', toolName: 'second_tool' } + yield { type: 'tool_use_end', toolId: 'tool-2', toolName: 'second_tool', toolInput: { second: true } } + yield { + type: 'message_end', + stopReason: 'tool_use', + usage: { inputTokens: 12, outputTokens: 6 }, + } + }, + createCompletion: vi.fn(), + }, + }) + + const expectedBlocks = [ + { type: 'text', text: 'First step.' }, + { type: 'tool_use', id: 'tool-1', name: 'test_tool', input: { first: true } }, + { type: 'text', text: 'Second step.' }, + { type: 'tool_use', id: 'tool-2', name: 'second_tool', input: { second: true } }, + ] + expect(messages[1]).toEqual({ role: 'assistant', content: expectedBlocks }) + expect(events[events.length - 1]).toMatchObject({ + type: 'done', + lastContent: expectedBlocks, + }) + }) + + it('preserves non-streaming assistant text before tool use in later iterations', async () => { + const { messages } = await collectConversationEvents({ + maxToolIterations: 2, + aiProvider: { + streamCompletion: async function* () { + yield { type: 'tool_use_start', toolId: 'tool-1', toolName: 'test_tool' } + yield { type: 'tool_use_end', toolId: 'tool-1', toolName: 'test_tool', toolInput: { first: true } } + yield { + type: 'message_end', + stopReason: 'tool_use', + usage: { inputTokens: 5, outputTokens: 2 }, + } + }, + createCompletion: vi.fn().mockResolvedValue({ + content: [ + { type: 'text', text: 'I need one more check.' }, + { type: 'tool_use', id: 'tool-2', name: 'test_tool', input: { second: true } }, + ], + stopReason: 'tool_use', + usage: { inputTokens: 6, outputTokens: 3 }, + }), + }, + }) + + expect(messages[3]).toEqual({ + role: 'assistant', + content: [ + { type: 'text', text: 'I need one more check.' }, + { type: 'tool_use', id: 'tool-2', name: 'test_tool', input: { second: true } }, + ], + }) + }) + it('emits webhook events for content-mutating tools', async () => { const { emptyAffected } = await import('../../server/utils/agent-types') const git = {} as GitProvider From 4659c05b5b41498fedcccc03684a79a6b588a11c Mon Sep 17 00:00:00 2001 From: Contentrain Date: Fri, 15 May 2026 17:08:45 +0300 Subject: [PATCH 2/2] test(chat): update integration assertion to expect populated assistantText MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The integration test was asserting that `saveChatResult` is called with `('', [])` for assistantText and assistantContent — which is the exact buggy behavior the previous commit fixed. With streamed text now correctly preserved end-to-end, the chat-route test must expect the populated values it receives from the mock provider. --- tests/integration/chat-route.integration.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/chat-route.integration.test.ts b/tests/integration/chat-route.integration.test.ts index 702eb8dd..4ab4cacc 100644 --- a/tests/integration/chat-route.integration.test.ts +++ b/tests/integration/chat-route.integration.test.ts @@ -194,8 +194,8 @@ describe('chat route integration', () => { expect(saveChatResult).toHaveBeenCalledWith( 'conversation-new', 'hello', - '', - [], + 'Hello from the agent.', + [{ type: 'text', text: 'Hello from the agent.' }], expect.any(String), 12, 24,