Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 75 additions & 3 deletions web/src/realtime/hooks/contextFormatters.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import { describe, expect, it } from 'vitest'
import type { DecryptedMessage } from '@/types/api'
import { extractLastAssistantSpeakable, formatMessage, formatNewMessages, formatReadyEvent } from './contextFormatters'
import {
extractLastAssistantSpeakable,
formatMessage,
formatNewMessages,
formatPermissionRequest,
formatReadyEvent,
} from './contextFormatters'

function msg(partial: Pick<DecryptedMessage, 'id' | 'seq' | 'content'>): DecryptedMessage {
return {
Expand Down Expand Up @@ -122,6 +128,17 @@ describe('formatReadyEvent', () => {
const event = formatReadyEvent(sessionId, ' ')
expect(event).toContain('Use the latest agent message already present in context')
})

it('uses the provided agent label', () => {
const event = formatReadyEvent(sessionId, null, 'Codex')
expect(event).toContain('Codex finished working')
expect(event).not.toContain('Claude Code')
})

it('defaults to coding agent label', () => {
const event = formatReadyEvent(sessionId)
expect(event).toContain('coding agent finished working')
})
})

describe('formatMessage', () => {
Expand All @@ -141,7 +158,7 @@ describe('formatMessage', () => {
}
}))

expect(formatted).toContain('Claude Code:')
expect(formatted).toContain('coding agent:')
expect(formatted).toContain('<text>Indexed 5,018 items in the search database.</text>')
})

Expand Down Expand Up @@ -188,7 +205,54 @@ describe('formatMessage', () => {
}))

expect(formatted).toContain('Here is the result.')
expect(formatted).toContain('Claude Code is using Bash')
expect(formatted).toContain('coding agent is using Bash')
})

it('uses the provided label for assistant text', () => {
const formatted = formatMessage(
msg({ id: '1', seq: 1, content: { role: 'assistant', content: 'Refactor complete.' } }),
'Cursor'
)
expect(formatted).toContain('Cursor:')
expect(formatted).toContain('Refactor complete.')
expect(formatted).not.toContain('Claude Code')
})

it('defaults to coding agent when no label is given', () => {
const formatted = formatMessage(
msg({ id: '1', seq: 1, content: { role: 'assistant', content: 'Done.' } })
)
expect(formatted).toContain('coding agent:')
expect(formatted).not.toContain('Claude Code')
})

it('uses the provided label for tool-call lines', () => {
const formatted = formatMessage(
msg({
id: '1',
seq: 1,
content: {
role: 'assistant',
content: [{ type: 'tool_use', name: 'Bash', input: { command: 'ls' } }]
}
}),
'Gemini'
)
expect(formatted).toContain('Gemini is using Bash')
expect(formatted).not.toContain('Claude Code')
})
})

describe('formatPermissionRequest', () => {
it('uses the provided label', () => {
const result = formatPermissionRequest('sid', 'rid', 'Bash', {}, 'OpenCode')
expect(result).toContain('OpenCode is requesting permission')
expect(result).not.toContain('Claude Code')
})

it('defaults to coding agent', () => {
const result = formatPermissionRequest('sid', 'rid', 'Bash', {})
expect(result).toContain('coding agent is requesting permission')
})
})

Expand All @@ -214,4 +278,12 @@ describe('formatNewMessages', () => {
expect(update).toContain('New messages in session: session-1')
expect(update).toContain('Local database file size is 2.43 GiB.')
})

it('uses the provided label in formatted message output', () => {
const result = formatNewMessages('session-1', [
msg({ id: '1', seq: 1, content: { role: 'assistant', content: 'Build succeeded.' } })
], 'Cursor')
expect(result).toContain('Cursor:')
expect(result).not.toContain('Claude Code')
})
})
41 changes: 21 additions & 20 deletions web/src/realtime/hooks/contextFormatters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ function unwrapOutputContent(content: unknown): { roleOverride: NormalizedRole |
return { roleOverride, content: messageContent }
}

function formatPlainText(role: NormalizedRole | null, text: string): string {
function formatPlainText(role: NormalizedRole | null, text: string, agentLabel = 'coding agent'): string {
if (role === 'assistant') {
return `Claude Code: \n<text>${text}</text>`
return `${agentLabel}: \n<text>${text}</text>`
}
return `User sent message: \n<text>${text}</text>`
}
Expand All @@ -80,9 +80,10 @@ export function formatPermissionRequest(
sessionId: string,
requestId: string,
toolName: string,
toolArgs: unknown
toolArgs: unknown,
agentLabel = 'coding agent'
): string {
return `Claude Code is requesting permission to use ${toolName} (session ${sessionId}):
return `${agentLabel} is requesting permission to use ${toolName} (session ${sessionId}):
<request_id>${requestId}</request_id>
<tool_name>${toolName}</tool_name>
<tool_args>${JSON.stringify(toolArgs)}</tool_args>`
Expand All @@ -91,7 +92,7 @@ export function formatPermissionRequest(
/**
* Format a single message for voice context
*/
export function formatMessage(message: DecryptedMessage): string | null {
export function formatMessage(message: DecryptedMessage, agentLabel = 'coding agent'): string | null {
const { role, content: wrappedContent } = unwrapRoleWrappedContent(message)
const { roleOverride, content } = unwrapOutputContent(wrappedContent)
const normalizedRole = roleOverride ?? role
Expand All @@ -103,7 +104,7 @@ export function formatMessage(message: DecryptedMessage): string | null {
const speakable = !isContentArray(content) ? extractSpeakableFromContent(content) : null
if (speakable) {
const roleForFormat = normalizedRole === 'user' ? 'user' : 'assistant'
return formatPlainText(roleForFormat, speakable)
return formatPlainText(roleForFormat, speakable, agentLabel)
}

if (!isContentArray(content)) {
Expand All @@ -122,13 +123,13 @@ export function formatMessage(message: DecryptedMessage): string | null {

for (const item of content) {
if (item.type === 'text' && item.text) {
lines.push(formatPlainText(isAssistant ? 'assistant' : 'user', item.text))
lines.push(formatPlainText(isAssistant ? 'assistant' : 'user', item.text, agentLabel))
} else if (item.type === 'tool_use' && !VOICE_CONFIG.DISABLE_TOOL_CALLS) {
const name = item.name || 'unknown'
if (VOICE_CONFIG.LIMITED_TOOL_CALLS) {
lines.push(`Claude Code is using ${name}`)
lines.push(`${agentLabel} is using ${name}`)
} else {
lines.push(`Claude Code is using ${name} with arguments: <arguments>${JSON.stringify(item.input)}</arguments>`)
lines.push(`${agentLabel} is using ${name} with arguments: <arguments>${JSON.stringify(item.input)}</arguments>`)
}
}
}
Expand Down Expand Up @@ -214,34 +215,34 @@ export function extractLastAssistantSpeakable(messages: DecryptedMessage[]): str
return null
}

export function formatNewSingleMessage(sessionId: string, message: DecryptedMessage): string | null {
const formatted = formatMessage(message)
export function formatNewSingleMessage(sessionId: string, message: DecryptedMessage, agentLabel = 'coding agent'): string | null {
const formatted = formatMessage(message, agentLabel)
if (!formatted) {
return null
}
return 'New message in session: ' + sessionId + '\n\n' + formatted
}

export function formatNewMessages(sessionId: string, messages: DecryptedMessage[]): string | null {
export function formatNewMessages(sessionId: string, messages: DecryptedMessage[], agentLabel = 'coding agent'): string | null {
const formatted = [...messages]
.sort((a, b) => (a.seq ?? 0) - (b.seq ?? 0))
.map(formatMessage)
.map(m => formatMessage(m, agentLabel))
.filter(Boolean)
if (formatted.length === 0) {
return null
}
return 'New messages in session: ' + sessionId + '\n\n' + formatted.join('\n\n')
}

export function formatHistory(sessionId: string, messages: DecryptedMessage[]): string {
export function formatHistory(sessionId: string, messages: DecryptedMessage[], agentLabel = 'coding agent'): string {
const messagesToFormat = VOICE_CONFIG.MAX_HISTORY_MESSAGES > 0
? messages.slice(-VOICE_CONFIG.MAX_HISTORY_MESSAGES)
: messages
const formatted = messagesToFormat.map(formatMessage).filter(Boolean)
const formatted = messagesToFormat.map(m => formatMessage(m, agentLabel)).filter(Boolean)
return 'History of messages in session: ' + sessionId + '\n\n' + formatted.join('\n\n')
}

export function formatSessionFull(session: Session | null, messages: DecryptedMessage[]): string {
export function formatSessionFull(session: Session | null, messages: DecryptedMessage[], agentLabel = 'coding agent'): string {
if (!session) {
return 'Session not available'
}
Expand All @@ -262,7 +263,7 @@ export function formatSessionFull(session: Session | null, messages: DecryptedMe

lines.push('## Our interaction history so far')
lines.push('')
lines.push(formatHistory(session.id, messages))
lines.push(formatHistory(session.id, messages, agentLabel))

return lines.join('\n\n')
}
Expand All @@ -279,10 +280,10 @@ export function formatSessionFocus(sessionId: string, _metadata?: SessionMetadat
return `Session became focused: ${sessionId}`
}

export function formatReadyEvent(sessionId: string, lastAssistantText?: string | null): string {
export function formatReadyEvent(sessionId: string, lastAssistantText?: string | null, agentLabel = 'coding agent'): string {
const trimmed = lastAssistantText?.trim()
if (trimmed) {
return `The coding agent finished working in session: ${sessionId}. Summarize this for the human immediately:\n<text>${trimmed}</text>`
return `${agentLabel} finished working in session: ${sessionId}. Summarize this for the human immediately:\n<text>${trimmed}</text>`
}
return `The coding agent finished working in session: ${sessionId}. Use the latest agent message already present in context and summarize it for the human immediately.`
return `${agentLabel} finished working in session: ${sessionId}. Use the latest agent message already present in context and summarize it for the human immediately.`
}
21 changes: 15 additions & 6 deletions web/src/realtime/hooks/voiceHooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,20 @@ import {
extractLastAssistantSpeakable
} from './contextFormatters'
import { VOICE_CONFIG } from '../voiceConfig'
import type { DecryptedMessage, Session } from '@/types/api'
import { getFlavorLabel, isKnownFlavor } from '@hapi/protocol'
import type { DecryptedMessage, Session, SessionMetadataSummary } from '@/types/api'

interface SessionMetadata {
summary?: { text?: string }
path?: string
machineId?: string
}

function getAgentLabel(session: Session | null): string {
const flavor = (session?.metadata as SessionMetadataSummary | undefined)?.flavor
return isKnownFlavor(flavor) ? getFlavorLabel(flavor) : 'coding agent'
}

// Track which sessions have been reported
const shownSessions = new Set<string>()
let lastFocusSession: string | null = null
Expand Down Expand Up @@ -65,7 +71,7 @@ function reportSession(sessionId: string) {
if (!session) return

const messages = messagesGetter?.(sessionId) ?? []
const contextUpdate = formatSessionFull(session, messages)
const contextUpdate = formatSessionFull(session, messages, getAgentLabel(session))
reportContextualUpdate(contextUpdate)
}

Expand Down Expand Up @@ -110,8 +116,9 @@ export const voiceHooks = {
onPermissionRequested(sessionId: string, requestId: string, toolName: string, toolArgs: unknown) {
if (VOICE_CONFIG.DISABLE_PERMISSION_REQUESTS) return

const session = sessionGetter?.(sessionId) ?? null
reportSession(sessionId)
reportTextUpdate(formatPermissionRequest(sessionId, requestId, toolName, toolArgs))
reportTextUpdate(formatPermissionRequest(sessionId, requestId, toolName, toolArgs, getAgentLabel(session)))
},

/**
Expand All @@ -120,8 +127,9 @@ export const voiceHooks = {
onMessages(sessionId: string, messages: DecryptedMessage[]) {
if (VOICE_CONFIG.DISABLE_MESSAGES) return

const session = sessionGetter?.(sessionId) ?? null
reportSession(sessionId)
reportContextualUpdate(formatNewMessages(sessionId, messages))
reportContextualUpdate(formatNewMessages(sessionId, messages, getAgentLabel(session)))
},

/**
Expand All @@ -136,7 +144,7 @@ export const voiceHooks = {
const session = sessionGetter?.(sessionId) ?? null
const messages = messagesGetter?.(sessionId) ?? []

let prompt = 'THIS IS AN ACTIVE SESSION: \n\n' + formatSessionFull(session, messages)
const prompt = 'THIS IS AN ACTIVE SESSION: \n\n' + formatSessionFull(session, messages, getAgentLabel(session))
shownSessions.add(sessionId)

return prompt
Expand All @@ -148,10 +156,11 @@ export const voiceHooks = {
onReady(sessionId: string) {
if (VOICE_CONFIG.DISABLE_READY_EVENTS) return

const session = sessionGetter?.(sessionId) ?? null
reportSession(sessionId)
const messages = messagesGetter?.(sessionId) ?? []
const lastAssistantText = extractLastAssistantSpeakable(messages)
reportTextUpdate(formatReadyEvent(sessionId, lastAssistantText))
reportTextUpdate(formatReadyEvent(sessionId, lastAssistantText, getAgentLabel(session)))
},

/**
Expand Down