diff --git a/src/api/generated/data-contracts.ts b/src/api/generated/data-contracts.ts index 985eb1b..5ebd0b4 100644 --- a/src/api/generated/data-contracts.ts +++ b/src/api/generated/data-contracts.ts @@ -838,14 +838,6 @@ export interface DeepgramTtsSettings { * @exclusiveMin true */ sampleRate?: number; - /** - * Bit rate for audio output (e.g., 32000, 64000, 128000). Applies to certain formats like mp3, opus, aac - * @min 0 - * @exclusiveMin true - */ - bitRate?: number; - /** Audio container format. Use "none" for raw audio, "wav" for WAV container, "ogg" for Ogg container */ - container?: "none" | "wav" | "ogg"; /** Markers to identify sections of text that should not be spoken */ noSpeechMarkers?: { start: string; @@ -855,6 +847,13 @@ export interface DeepgramTtsSettings { removeExclamationMarks?: boolean; /** Whether to use sentence splitter for text processing, defaults to true */ useSentenceSplitter?: boolean; + /** + * Speaking rate multiplier (0.25 to 4.0, default: 1.0) + * @min 0.25 + * @max 4 + */ + speed?: number; + [key: string]: any; } export interface CartesiaTtsSettings { @@ -8004,11 +8003,65 @@ export interface ProjectExchangeV1 { userProfileVariableDescriptors?: FieldDescriptor[]; /** Local document ID of the classifier used to evaluate guardrails; remapped on import */ defaultGuardrailClassifierId?: string | null; + /** Sample copy configuration including the default classifier used to evaluate prompt triggers */ + sampleCopyConfig?: SampleCopyConfigExchangeV1; + /** Local document ID of the stage to start new conversations at; remapped on import */ + startingStageId?: string | null; /** * Timeout in seconds for active conversations with no activity * @min 0 */ conversationTimeoutSeconds?: number | null; + /** Audio recording configuration for conversation debugging */ + recordingConfig?: RecordingConfigExchangeV1; + /** Project-level LLM token cost management configuration with provider hints */ + costManagementConfig?: CostManagementConfigExchangeV1; +} + +/** Sample copy configuration including the default classifier used to evaluate prompt triggers */ +export interface SampleCopyConfigExchangeV1 { + /** Local document ID of the classifier used to evaluate sample copy prompt triggers; remapped on import */ + defaultClassifierId?: string; +} + +/** Audio recording configuration for conversation debugging */ +export interface RecordingConfigExchangeV1 { + /** Whether audio recording is enabled for this project */ + enabled: boolean; + /** + * Whether to record user voice input. Defaults to true. + * @default true + */ + recordInput?: boolean; + /** + * Whether to record AI voice output. Defaults to true. + * @default true + */ + recordOutput?: boolean; + /** + * Audio format for saved recordings. Defaults to pcm_16000. + * @default "pcm_16000" + */ + format?: + | "mp3" + | "opus" + | "aac" + | "flac" + | "wav" + | "pcm_8000" + | "pcm_16000" + | "pcm_22050" + | "pcm_24000" + | "pcm_44100" + | "pcm_48000" + | "mulaw" + | "alaw"; +} + +/** Project-level LLM token cost management configuration with provider hints */ +export interface CostManagementConfigExchangeV1 { + /** Token cap definitions keyed by provider hint and model name */ + limits: Record>; } /** Agent entity in the exchange format */ diff --git a/src/api/openapi.json b/src/api/openapi.json index c550744..dbd5b42 100644 --- a/src/api/openapi.json +++ b/src/api/openapi.json @@ -1244,21 +1244,6 @@ "exclusiveMinimum": true, "description": "Sample rate for audio output in Hz (e.g., 8000, 16000, 24000, 48000). Availability depends on audio format" }, - "bitRate": { - "type": "integer", - "minimum": 0, - "exclusiveMinimum": true, - "description": "Bit rate for audio output (e.g., 32000, 64000, 128000). Applies to certain formats like mp3, opus, aac" - }, - "container": { - "type": "string", - "enum": [ - "none", - "wav", - "ogg" - ], - "description": "Audio container format. Use \"none\" for raw audio, \"wav\" for WAV container, \"ogg\" for Ogg container" - }, "noSpeechMarkers": { "type": "array", "items": { @@ -1285,11 +1270,20 @@ "useSentenceSplitter": { "type": "boolean", "description": "Whether to use sentence splitter for text processing, defaults to true" + }, + "speed": { + "type": "number", + "minimum": 0.25, + "maximum": 4, + "description": "Speaking rate multiplier (0.25 to 4.0, default: 1.0)" } }, "required": [ "provider" - ] + ], + "additionalProperties": { + "nullable": true + } }, "CartesiaTtsSettings": { "type": "object", @@ -18015,11 +18009,25 @@ "nullable": true, "description": "Local document ID of the classifier used to evaluate guardrails; remapped on import" }, + "sampleCopyConfig": { + "$ref": "#/components/schemas/SampleCopyConfigExchangeV1" + }, + "startingStageId": { + "type": "string", + "nullable": true, + "description": "Local document ID of the stage to start new conversations at; remapped on import" + }, "conversationTimeoutSeconds": { "type": "integer", "nullable": true, "minimum": 0, "description": "Timeout in seconds for active conversations with no activity" + }, + "recordingConfig": { + "$ref": "#/components/schemas/RecordingConfigExchangeV1" + }, + "costManagementConfig": { + "$ref": "#/components/schemas/CostManagementConfigExchangeV1" } }, "required": [ @@ -18028,6 +18036,78 @@ ], "description": "Project entity in the exchange format" }, + "SampleCopyConfigExchangeV1": { + "type": "object", + "properties": { + "defaultClassifierId": { + "type": "string", + "description": "Local document ID of the classifier used to evaluate sample copy prompt triggers; remapped on import" + } + }, + "description": "Sample copy configuration including the default classifier used to evaluate prompt triggers" + }, + "RecordingConfigExchangeV1": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Whether audio recording is enabled for this project" + }, + "recordInput": { + "type": "boolean", + "default": true, + "description": "Whether to record user voice input. Defaults to true." + }, + "recordOutput": { + "type": "boolean", + "default": true, + "description": "Whether to record AI voice output. Defaults to true." + }, + "format": { + "type": "string", + "enum": [ + "mp3", + "opus", + "aac", + "flac", + "wav", + "pcm_8000", + "pcm_16000", + "pcm_22050", + "pcm_24000", + "pcm_44100", + "pcm_48000", + "mulaw", + "alaw" + ], + "default": "pcm_16000", + "description": "Audio format for saved recordings. Defaults to pcm_16000." + } + }, + "required": [ + "enabled" + ], + "description": "Audio recording configuration for conversation debugging" + }, + "CostManagementConfigExchangeV1": { + "type": "object", + "properties": { + "limits": { + "type": "object", + "additionalProperties": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ProviderModelLimits" + } + }, + "description": "Token cap definitions keyed by provider hint and model name" + } + }, + "required": [ + "limits" + ], + "description": "Project-level LLM token cost management configuration with provider hints" + }, "AgentExchangeV1": { "type": "object", "properties": { diff --git a/src/components/TtsProviderSettingsPanel.vue b/src/components/TtsProviderSettingsPanel.vue index 625e5b7..ddad4fa 100644 --- a/src/components/TtsProviderSettingsPanel.vue +++ b/src/components/TtsProviderSettingsPanel.vue @@ -98,6 +98,19 @@ function removeNoSpeechMarker(index: number) {

Voice Settings (Deepgram)

+ + + + + - - - - - - - - - -
diff --git a/src/views/PlaygroundView.vue b/src/views/PlaygroundView.vue index 834465e..0de66d8 100644 --- a/src/views/PlaygroundView.vue +++ b/src/views/PlaygroundView.vue @@ -1271,12 +1271,14 @@ async function connectWebSocket() { } event.voiceOutputId = msg.outputTurnId - const player = useAudioPlayback() - player.setOnEnded(() => { - activeVoiceOutputs.value.delete(msg.outputTurnId) - const client = wsClient.value as ReturnType | null - client?.client.value?.sendAudioPlaybackEnded(msg.outputTurnId) - }) + const player = useAudioPlayback() + player.setOnEnded(() => { + // Keep the player alive to receive subsequent audio chunks + // (e.g., main part after filler). Cleanup happens on the next + // start_ai_generation_output for a different turn. + const client = wsClient.value as ReturnType | null + client?.client.value?.sendAudioPlaybackEnded(msg.outputTurnId) + }) activeVoiceOutputs.value.set(msg.outputTurnId, { player: player as any, transcript: null diff --git a/src/views/administration/benchmark/BenchmarkProviderConfigEditView.vue b/src/views/administration/benchmark/BenchmarkProviderConfigEditView.vue index f44393a..d2927be 100644 --- a/src/views/administration/benchmark/BenchmarkProviderConfigEditView.vue +++ b/src/views/administration/benchmark/BenchmarkProviderConfigEditView.vue @@ -127,7 +127,7 @@ function initTtsProviderSettings(apiType: string): Record { case 'openai': return { provider: 'openai', model: '', voiceId: '', speed: 1.0, instructions: '', noSpeechMarkers: [], removeExclamationMarks: false, useSentenceSplitter: true } case 'deepgram': - return { provider: 'deepgram', model: undefined, voiceId: '', audioFormat: 'pcm_16000', sampleRate: 24000, container: 'none', noSpeechMarkers: [], removeExclamationMarks: false, useSentenceSplitter: true } + return { provider: 'deepgram', model: undefined, voiceId: '', audioFormat: 'pcm_16000', sampleRate: 16000, speed: 1.0, noSpeechMarkers: [], removeExclamationMarks: false, useSentenceSplitter: true } case 'cartesia': return { provider: 'cartesia', model: '', voiceId: '', language: 'en', audioFormat: 'pcm_24000', speed: 'normal', emotion: [], maxBufferDelayMs: 3000, useSentenceSplitter: false, noSpeechMarkers: [], removeExclamationMarks: false } case 'azure': diff --git a/src/views/design/AgentEditView.vue b/src/views/design/AgentEditView.vue index b1b5967..8dbe39a 100644 --- a/src/views/design/AgentEditView.vue +++ b/src/views/design/AgentEditView.vue @@ -223,7 +223,7 @@ function handleTtsProviderChange() { voiceId: '', audioFormat: 'pcm_16000', sampleRate: 24000, - container: 'none', + speed: 1.0, noSpeechMarkers: [], removeExclamationMarks: false, useSentenceSplitter: true