utter-one · bayger · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026
diff --git a/src/api/generated/data-contracts.ts b/src/api/generated/data-contracts.ts
@@ -838,14 +838,6 @@ export interface DeepgramTtsSettings {
    * @exclusiveMin true
    */
   sampleRate?: number;
-  /**
-   * Bit rate for audio output (e.g., 32000, 64000, 128000). Applies to certain formats like mp3, opus, aac
-   * @min 0
-   * @exclusiveMin true
-   */
-  bitRate?: number;
-  /** Audio container format. Use "none" for raw audio, "wav" for WAV container, "ogg" for Ogg container */
-  container?: "none" | "wav" | "ogg";
   /** Markers to identify sections of text that should not be spoken */
   noSpeechMarkers?: {
     start: string;
@@ -855,6 +847,13 @@ export interface DeepgramTtsSettings {
   removeExclamationMarks?: boolean;
   /** Whether to use sentence splitter for text processing, defaults to true */
   useSentenceSplitter?: boolean;
+  /**
+   * Speaking rate multiplier (0.25 to 4.0, default: 1.0)
+   * @min 0.25
+   * @max 4
+   */
+  speed?: number;
+  [key: string]: any;
 }
 
 export interface CartesiaTtsSettings {
@@ -8004,11 +8003,65 @@ export interface ProjectExchangeV1 {
   userProfileVariableDescriptors?: FieldDescriptor[];
   /** Local document ID of the classifier used to evaluate guardrails; remapped on import */
   defaultGuardrailClassifierId?: string | null;
+  /** Sample copy configuration including the default classifier used to evaluate prompt triggers */
+  sampleCopyConfig?: SampleCopyConfigExchangeV1;
+  /** Local document ID of the stage to start new conversations at; remapped on import */
+  startingStageId?: string | null;
   /**
    * Timeout in seconds for active conversations with no activity
    * @min 0
    */
   conversationTimeoutSeconds?: number | null;
+  /** Audio recording configuration for conversation debugging */
+  recordingConfig?: RecordingConfigExchangeV1;
+  /** Project-level LLM token cost management configuration with provider hints */
+  costManagementConfig?: CostManagementConfigExchangeV1;
+}
+
+/** Sample copy configuration including the default classifier used to evaluate prompt triggers */
+export interface SampleCopyConfigExchangeV1 {
+  /** Local document ID of the classifier used to evaluate sample copy prompt triggers; remapped on import */
+  defaultClassifierId?: string;
+}
+
+/** Audio recording configuration for conversation debugging */
+export interface RecordingConfigExchangeV1 {
+  /** Whether audio recording is enabled for this project */
+  enabled: boolean;
+  /**
+   * Whether to record user voice input. Defaults to true.
+   * @default true
+   */
+  recordInput?: boolean;
+  /**
+   * Whether to record AI voice output. Defaults to true.
+   * @default true
+   */
+  recordOutput?: boolean;
+  /**
+   * Audio format for saved recordings. Defaults to pcm_16000.
+   * @default "pcm_16000"
+   */
+  format?:
+    | "mp3"
+    | "opus"
+    | "aac"
+    | "flac"
+    | "wav"
+    | "pcm_8000"
+    | "pcm_16000"
+    | "pcm_22050"
+    | "pcm_24000"
+    | "pcm_44100"
+    | "pcm_48000"
+    | "mulaw"
+    | "alaw";
+}
+
+/** Project-level LLM token cost management configuration with provider hints */
+export interface CostManagementConfigExchangeV1 {
+  /** Token cap definitions keyed by provider hint and model name */
+  limits: Record<string, Record<string, ProviderModelLimits>>;
 }
 
 /** Agent entity in the exchange format */

diff --git a/src/api/openapi.json b/src/api/openapi.json
@@ -1244,21 +1244,6 @@
             "exclusiveMinimum": true,
             "description": "Sample rate for audio output in Hz (e.g., 8000, 16000, 24000, 48000). Availability depends on audio format"
           },
-          "bitRate": {
-            "type": "integer",
-            "minimum": 0,
-            "exclusiveMinimum": true,
-            "description": "Bit rate for audio output (e.g., 32000, 64000, 128000). Applies to certain formats like mp3, opus, aac"
-          },
-          "container": {
-            "type": "string",
-            "enum": [
-              "none",
-              "wav",
-              "ogg"
-            ],
-            "description": "Audio container format. Use \"none\" for raw audio, \"wav\" for WAV container, \"ogg\" for Ogg container"
-          },
           "noSpeechMarkers": {
             "type": "array",
             "items": {
@@ -1285,11 +1270,20 @@
           "useSentenceSplitter": {
             "type": "boolean",
             "description": "Whether to use sentence splitter for text processing, defaults to true"
+          },
+          "speed": {
+            "type": "number",
+            "minimum": 0.25,
+            "maximum": 4,
+            "description": "Speaking rate multiplier (0.25 to 4.0, default: 1.0)"
           }
         },
         "required": [
           "provider"
-        ]
+        ],
+        "additionalProperties": {
+          "nullable": true
+        }
       },
       "CartesiaTtsSettings": {
         "type": "object",
@@ -18015,11 +18009,25 @@
             "nullable": true,
             "description": "Local document ID of the classifier used to evaluate guardrails; remapped on import"
           },
+          "sampleCopyConfig": {
+            "$ref": "#/components/schemas/SampleCopyConfigExchangeV1"
+          },
+          "startingStageId": {
+            "type": "string",
+            "nullable": true,
+            "description": "Local document ID of the stage to start new conversations at; remapped on import"
+          },
           "conversationTimeoutSeconds": {
             "type": "integer",
             "nullable": true,
             "minimum": 0,
             "description": "Timeout in seconds for active conversations with no activity"
+          },
+          "recordingConfig": {
+            "$ref": "#/components/schemas/RecordingConfigExchangeV1"
+          },
+          "costManagementConfig": {
+            "$ref": "#/components/schemas/CostManagementConfigExchangeV1"
           }
         },
         "required": [
@@ -18028,6 +18036,78 @@
         ],
         "description": "Project entity in the exchange format"
       },
+      "SampleCopyConfigExchangeV1": {
+        "type": "object",
+        "properties": {
+          "defaultClassifierId": {
+            "type": "string",
+            "description": "Local document ID of the classifier used to evaluate sample copy prompt triggers; remapped on import"
+          }
+        },
+        "description": "Sample copy configuration including the default classifier used to evaluate prompt triggers"
+      },
+      "RecordingConfigExchangeV1": {
+        "type": "object",
+        "properties": {
+          "enabled": {
+            "type": "boolean",
+            "description": "Whether audio recording is enabled for this project"
+          },
+          "recordInput": {
+            "type": "boolean",
+            "default": true,
+            "description": "Whether to record user voice input. Defaults to true."
+          },
+          "recordOutput": {
+            "type": "boolean",
+            "default": true,
+            "description": "Whether to record AI voice output. Defaults to true."
+          },
+          "format": {
+            "type": "string",
+            "enum": [
+              "mp3",
+              "opus",
+              "aac",
+              "flac",
+              "wav",
+              "pcm_8000",
+              "pcm_16000",
+              "pcm_22050",
+              "pcm_24000",
+              "pcm_44100",
+              "pcm_48000",
+              "mulaw",
+              "alaw"
+            ],
+            "default": "pcm_16000",
+            "description": "Audio format for saved recordings. Defaults to pcm_16000."
+          }
+        },
+        "required": [
+          "enabled"
+        ],
+        "description": "Audio recording configuration for conversation debugging"
+      },
+      "CostManagementConfigExchangeV1": {
+        "type": "object",
+        "properties": {
+          "limits": {
+            "type": "object",
+            "additionalProperties": {
+              "type": "object",
+              "additionalProperties": {
+                "$ref": "#/components/schemas/ProviderModelLimits"
+              }
+            },
+            "description": "Token cap definitions keyed by provider hint and model name"
+          }
+        },
+        "required": [
+          "limits"
+        ],
+        "description": "Project-level LLM token cost management configuration with provider hints"
+      },
       "AgentExchangeV1": {
         "type": "object",
         "properties": {

diff --git a/src/components/TtsProviderSettingsPanel.vue b/src/components/TtsProviderSettingsPanel.vue
@@ -98,6 +98,19 @@ function removeNoSpeechMarker(index: number) {
           <div v-if="isDeepgram" class="mt-8 pt-6 border-t border-gray-200 dark:border-gray-700">
             <h3 class="text-lg font-semibold text-gray-900 dark:text-white mb-4">Voice Settings (Deepgram)</h3>
 
+            <!-- Speed -->
+            <FormField :label="`Speed: ${((model as any).speed ?? 1.0).toFixed(2)}`" class="w-full" help="Speech speed (0.25-4.0), defaults to 1.0">
+              <input
+                v-model.number="(model as DeepgramTtsSettings).speed"
+                type="range"
+                min="0.25"
+                max="4.0"
+                step="0.01"
+                class="block min-w-64 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer"
+                :disabled="isLoading"
+              />
+            </FormField>
+
             <!-- Sample Rate -->
             <FormField label="Sample Rate (Hz)" class="w-full" help="Audio sample rate in Hz. Higher values provide better quality but larger file sizes. Common values: 8000, 16000, 24000, 48000.">
               <select
@@ -112,36 +125,6 @@ function removeNoSpeechMarker(index: number) {
                 <option :value="48000">48000 Hz</option>
               </select>
             </FormField>
-
-            <!-- Bit Rate -->
-            <FormField label="Bit Rate" hint="(optional)" class="w-full" help="Bit rate for compressed formats (mp3, opus, aac). Higher values provide better quality.">
-              <select
-                v-model.number="(model as DeepgramTtsSettings).bitRate"
-                class="form-select-auto min-w-64"
-                :disabled="isLoading"
-              >
-                <option :value="undefined">Default</option>
-                <option :value="32000">32 kbps</option>
-                <option :value="64000">64 kbps</option>
-                <option :value="96000">96 kbps</option>
-                <option :value="128000">128 kbps</option>
-                <option :value="192000">192 kbps</option>
-                <option :value="256000">256 kbps</option>
-              </select>
-            </FormField>
-
-            <!-- Container -->
-            <FormField label="Container Format" class="w-full" help='Audio container format. Use "none" for raw audio, "wav" for WAV container, "ogg" for Ogg container'>
-              <select
-                v-model="(model as DeepgramTtsSettings).container"
-                class="form-select-auto min-w-64"
-                :disabled="isLoading"
-              >
-                <option value="none">None (raw audio)</option>
-                <option value="wav">WAV</option>
-                <option value="ogg">Ogg</option>
-              </select>
-            </FormField>
           </div>
 
           <!-- Voice Settings Section (Cartesia) -->

diff --git a/src/views/PlaygroundView.vue b/src/views/PlaygroundView.vue
@@ -1271,12 +1271,14 @@ async function connectWebSocket() {
           }
 
          event.voiceOutputId = msg.outputTurnId
-           const player = useAudioPlayback()
-           player.setOnEnded(() => {
-              activeVoiceOutputs.value.delete(msg.outputTurnId)
-              const client = wsClient.value as ReturnType<typeof useWebSocketClient> | null
-              client?.client.value?.sendAudioPlaybackEnded(msg.outputTurnId)
-            })
+          const player = useAudioPlayback()
+            player.setOnEnded(() => {
+               // Keep the player alive to receive subsequent audio chunks
+               // (e.g., main part after filler). Cleanup happens on the next
+               // start_ai_generation_output for a different turn.
+               const client = wsClient.value as ReturnType<typeof useWebSocketClient> | null
+               client?.client.value?.sendAudioPlaybackEnded(msg.outputTurnId)
+             })
            activeVoiceOutputs.value.set(msg.outputTurnId, {
              player: player as any,
              transcript: null

diff --git a/src/views/administration/benchmark/BenchmarkProviderConfigEditView.vue b/src/views/administration/benchmark/BenchmarkProviderConfigEditView.vue
@@ -127,7 +127,7 @@ function initTtsProviderSettings(apiType: string): Record<string, any> {
     case 'openai':
       return { provider: 'openai', model: '', voiceId: '', speed: 1.0, instructions: '', noSpeechMarkers: [], removeExclamationMarks: false, useSentenceSplitter: true }
     case 'deepgram':
-      return { provider: 'deepgram', model: undefined, voiceId: '', audioFormat: 'pcm_16000', sampleRate: 24000, container: 'none', noSpeechMarkers: [], removeExclamationMarks: false, useSentenceSplitter: true }
+      return { provider: 'deepgram', model: undefined, voiceId: '', audioFormat: 'pcm_16000', sampleRate: 16000, speed: 1.0, noSpeechMarkers: [], removeExclamationMarks: false, useSentenceSplitter: true }
     case 'cartesia':
       return { provider: 'cartesia', model: '', voiceId: '', language: 'en', audioFormat: 'pcm_24000', speed: 'normal', emotion: [], maxBufferDelayMs: 3000, useSentenceSplitter: false, noSpeechMarkers: [], removeExclamationMarks: false }
     case 'azure':

diff --git a/src/views/design/AgentEditView.vue b/src/views/design/AgentEditView.vue
@@ -223,7 +223,7 @@ function handleTtsProviderChange() {
         voiceId: '',
         audioFormat: 'pcm_16000',
         sampleRate: 24000,
-        container: 'none',
+        speed: 1.0,
         noSpeechMarkers: [],
         removeExclamationMarks: false,
         useSentenceSplitter: true