Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 61 additions & 8 deletions src/api/generated/data-contracts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -838,14 +838,6 @@ export interface DeepgramTtsSettings {
* @exclusiveMin true
*/
sampleRate?: number;
/**
* Bit rate for audio output (e.g., 32000, 64000, 128000). Applies to certain formats like mp3, opus, aac
* @min 0
* @exclusiveMin true
*/
bitRate?: number;
/** Audio container format. Use "none" for raw audio, "wav" for WAV container, "ogg" for Ogg container */
container?: "none" | "wav" | "ogg";
/** Markers to identify sections of text that should not be spoken */
noSpeechMarkers?: {
start: string;
Expand All @@ -855,6 +847,13 @@ export interface DeepgramTtsSettings {
removeExclamationMarks?: boolean;
/** Whether to use sentence splitter for text processing, defaults to true */
useSentenceSplitter?: boolean;
/**
* Speaking rate multiplier (0.25 to 4.0, default: 1.0)
* @min 0.25
* @max 4
*/
speed?: number;
[key: string]: any;
}

export interface CartesiaTtsSettings {
Expand Down Expand Up @@ -8004,11 +8003,65 @@ export interface ProjectExchangeV1 {
userProfileVariableDescriptors?: FieldDescriptor[];
/** Local document ID of the classifier used to evaluate guardrails; remapped on import */
defaultGuardrailClassifierId?: string | null;
/** Sample copy configuration including the default classifier used to evaluate prompt triggers */
sampleCopyConfig?: SampleCopyConfigExchangeV1;
/** Local document ID of the stage to start new conversations at; remapped on import */
startingStageId?: string | null;
/**
* Timeout in seconds for active conversations with no activity
* @min 0
*/
conversationTimeoutSeconds?: number | null;
/** Audio recording configuration for conversation debugging */
recordingConfig?: RecordingConfigExchangeV1;
/** Project-level LLM token cost management configuration with provider hints */
costManagementConfig?: CostManagementConfigExchangeV1;
}

/** Sample copy configuration including the default classifier used to evaluate prompt triggers */
export interface SampleCopyConfigExchangeV1 {
/** Local document ID of the classifier used to evaluate sample copy prompt triggers; remapped on import */
defaultClassifierId?: string;
}

/** Audio recording configuration for conversation debugging */
export interface RecordingConfigExchangeV1 {
/** Whether audio recording is enabled for this project */
enabled: boolean;
/**
* Whether to record user voice input. Defaults to true.
* @default true
*/
recordInput?: boolean;
/**
* Whether to record AI voice output. Defaults to true.
* @default true
*/
recordOutput?: boolean;
/**
* Audio format for saved recordings. Defaults to pcm_16000.
* @default "pcm_16000"
*/
format?:
| "mp3"
| "opus"
| "aac"
| "flac"
| "wav"
| "pcm_8000"
| "pcm_16000"
| "pcm_22050"
| "pcm_24000"
| "pcm_44100"
| "pcm_48000"
| "mulaw"
| "alaw";
}

/** Project-level LLM token cost management configuration with provider hints */
export interface CostManagementConfigExchangeV1 {
/** Token cap definitions keyed by provider hint and model name */
limits: Record<string, Record<string, ProviderModelLimits>>;
}

/** Agent entity in the exchange format */
Expand Down
112 changes: 96 additions & 16 deletions src/api/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -1244,21 +1244,6 @@
"exclusiveMinimum": true,
"description": "Sample rate for audio output in Hz (e.g., 8000, 16000, 24000, 48000). Availability depends on audio format"
},
"bitRate": {
"type": "integer",
"minimum": 0,
"exclusiveMinimum": true,
"description": "Bit rate for audio output (e.g., 32000, 64000, 128000). Applies to certain formats like mp3, opus, aac"
},
"container": {
"type": "string",
"enum": [
"none",
"wav",
"ogg"
],
"description": "Audio container format. Use \"none\" for raw audio, \"wav\" for WAV container, \"ogg\" for Ogg container"
},
"noSpeechMarkers": {
"type": "array",
"items": {
Expand All @@ -1285,11 +1270,20 @@
"useSentenceSplitter": {
"type": "boolean",
"description": "Whether to use sentence splitter for text processing, defaults to true"
},
"speed": {
"type": "number",
"minimum": 0.25,
"maximum": 4,
"description": "Speaking rate multiplier (0.25 to 4.0, default: 1.0)"
}
},
"required": [
"provider"
]
],
"additionalProperties": {
"nullable": true
}
},
"CartesiaTtsSettings": {
"type": "object",
Expand Down Expand Up @@ -18015,11 +18009,25 @@
"nullable": true,
"description": "Local document ID of the classifier used to evaluate guardrails; remapped on import"
},
"sampleCopyConfig": {
"$ref": "#/components/schemas/SampleCopyConfigExchangeV1"
},
"startingStageId": {
"type": "string",
"nullable": true,
"description": "Local document ID of the stage to start new conversations at; remapped on import"
},
"conversationTimeoutSeconds": {
"type": "integer",
"nullable": true,
"minimum": 0,
"description": "Timeout in seconds for active conversations with no activity"
},
"recordingConfig": {
"$ref": "#/components/schemas/RecordingConfigExchangeV1"
},
"costManagementConfig": {
"$ref": "#/components/schemas/CostManagementConfigExchangeV1"
}
},
"required": [
Expand All @@ -18028,6 +18036,78 @@
],
"description": "Project entity in the exchange format"
},
"SampleCopyConfigExchangeV1": {
"type": "object",
"properties": {
"defaultClassifierId": {
"type": "string",
"description": "Local document ID of the classifier used to evaluate sample copy prompt triggers; remapped on import"
}
},
"description": "Sample copy configuration including the default classifier used to evaluate prompt triggers"
},
"RecordingConfigExchangeV1": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean",
"description": "Whether audio recording is enabled for this project"
},
"recordInput": {
"type": "boolean",
"default": true,
"description": "Whether to record user voice input. Defaults to true."
},
"recordOutput": {
"type": "boolean",
"default": true,
"description": "Whether to record AI voice output. Defaults to true."
},
"format": {
"type": "string",
"enum": [
"mp3",
"opus",
"aac",
"flac",
"wav",
"pcm_8000",
"pcm_16000",
"pcm_22050",
"pcm_24000",
"pcm_44100",
"pcm_48000",
"mulaw",
"alaw"
],
"default": "pcm_16000",
"description": "Audio format for saved recordings. Defaults to pcm_16000."
}
},
"required": [
"enabled"
],
"description": "Audio recording configuration for conversation debugging"
},
"CostManagementConfigExchangeV1": {
"type": "object",
"properties": {
"limits": {
"type": "object",
"additionalProperties": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ProviderModelLimits"
}
},
"description": "Token cap definitions keyed by provider hint and model name"
}
},
"required": [
"limits"
],
"description": "Project-level LLM token cost management configuration with provider hints"
},
"AgentExchangeV1": {
"type": "object",
"properties": {
Expand Down
43 changes: 13 additions & 30 deletions src/components/TtsProviderSettingsPanel.vue
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,19 @@ function removeNoSpeechMarker(index: number) {
<div v-if="isDeepgram" class="mt-8 pt-6 border-t border-gray-200 dark:border-gray-700">
<h3 class="text-lg font-semibold text-gray-900 dark:text-white mb-4">Voice Settings (Deepgram)</h3>

<!-- Speed -->
<FormField :label="`Speed: ${((model as any).speed ?? 1.0).toFixed(2)}`" class="w-full" help="Speech speed (0.25-4.0), defaults to 1.0">
<input
v-model.number="(model as DeepgramTtsSettings).speed"
type="range"
min="0.25"
max="4.0"
step="0.01"
class="block min-w-64 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer"
:disabled="isLoading"
/>
</FormField>

<!-- Sample Rate -->
<FormField label="Sample Rate (Hz)" class="w-full" help="Audio sample rate in Hz. Higher values provide better quality but larger file sizes. Common values: 8000, 16000, 24000, 48000.">
<select
Expand All @@ -112,36 +125,6 @@ function removeNoSpeechMarker(index: number) {
<option :value="48000">48000 Hz</option>
</select>
</FormField>

<!-- Bit Rate -->
<FormField label="Bit Rate" hint="(optional)" class="w-full" help="Bit rate for compressed formats (mp3, opus, aac). Higher values provide better quality.">
<select
v-model.number="(model as DeepgramTtsSettings).bitRate"
class="form-select-auto min-w-64"
:disabled="isLoading"
>
<option :value="undefined">Default</option>
<option :value="32000">32 kbps</option>
<option :value="64000">64 kbps</option>
<option :value="96000">96 kbps</option>
<option :value="128000">128 kbps</option>
<option :value="192000">192 kbps</option>
<option :value="256000">256 kbps</option>
</select>
</FormField>

<!-- Container -->
<FormField label="Container Format" class="w-full" help='Audio container format. Use "none" for raw audio, "wav" for WAV container, "ogg" for Ogg container'>
<select
v-model="(model as DeepgramTtsSettings).container"
class="form-select-auto min-w-64"
:disabled="isLoading"
>
<option value="none">None (raw audio)</option>
<option value="wav">WAV</option>
<option value="ogg">Ogg</option>
</select>
</FormField>
</div>

<!-- Voice Settings Section (Cartesia) -->
Expand Down
14 changes: 8 additions & 6 deletions src/views/PlaygroundView.vue
Original file line number Diff line number Diff line change
Expand Up @@ -1271,12 +1271,14 @@ async function connectWebSocket() {
}

event.voiceOutputId = msg.outputTurnId
const player = useAudioPlayback()
player.setOnEnded(() => {
activeVoiceOutputs.value.delete(msg.outputTurnId)
const client = wsClient.value as ReturnType<typeof useWebSocketClient> | null
client?.client.value?.sendAudioPlaybackEnded(msg.outputTurnId)
})
const player = useAudioPlayback()
player.setOnEnded(() => {
// Keep the player alive to receive subsequent audio chunks
// (e.g., main part after filler). Cleanup happens on the next
// start_ai_generation_output for a different turn.
const client = wsClient.value as ReturnType<typeof useWebSocketClient> | null
client?.client.value?.sendAudioPlaybackEnded(msg.outputTurnId)
})
activeVoiceOutputs.value.set(msg.outputTurnId, {
player: player as any,
transcript: null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ function initTtsProviderSettings(apiType: string): Record<string, any> {
case 'openai':
return { provider: 'openai', model: '', voiceId: '', speed: 1.0, instructions: '', noSpeechMarkers: [], removeExclamationMarks: false, useSentenceSplitter: true }
case 'deepgram':
return { provider: 'deepgram', model: undefined, voiceId: '', audioFormat: 'pcm_16000', sampleRate: 24000, container: 'none', noSpeechMarkers: [], removeExclamationMarks: false, useSentenceSplitter: true }
return { provider: 'deepgram', model: undefined, voiceId: '', audioFormat: 'pcm_16000', sampleRate: 16000, speed: 1.0, noSpeechMarkers: [], removeExclamationMarks: false, useSentenceSplitter: true }
case 'cartesia':
return { provider: 'cartesia', model: '', voiceId: '', language: 'en', audioFormat: 'pcm_24000', speed: 'normal', emotion: [], maxBufferDelayMs: 3000, useSentenceSplitter: false, noSpeechMarkers: [], removeExclamationMarks: false }
case 'azure':
Expand Down
2 changes: 1 addition & 1 deletion src/views/design/AgentEditView.vue
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ function handleTtsProviderChange() {
voiceId: '',
audioFormat: 'pcm_16000',
sampleRate: 24000,
container: 'none',
speed: 1.0,
noSpeechMarkers: [],
removeExclamationMarks: false,
useSentenceSplitter: true
Expand Down
Loading