vercel · gr2m · Jul 2, 2026 · Jul 2, 2026
diff --git a/.changeset/assemblyai-provider-update.md b/.changeset/assemblyai-provider-update.md
@@ -0,0 +1,26 @@
+---
+'@ai-sdk/assemblyai': patch
+---
+
+feat(assemblyai): support universal-3-5-pro and expand the transcription provider
+
+- Add current speech models `universal-3-5-pro`, `universal-3-pro`, and
+  `universal-2`, routed via AssemblyAI's `speech_models` parameter (the
+  deprecated singular `speech_model` is used only for the legacy `best` model).
+  Using `universal-3-pro`/`universal-2` emits an informational warning
+  suggesting `universal-3-5-pro`.
+- Deprecate the legacy `best` model (still works, warns) and remove `nano`,
+  which AssemblyAI no longer accepts.
+- Surface speaker diarization and audio-intelligence results: `doGenerate` now
+  returns the full raw response on `response.body` and populates
+  `providerMetadata.assemblyai` with `utterances`, `entities`,
+  `sentimentAnalysisResults`, `contentSafetyLabels`, `iabCategoriesResult`, and
+  `autoHighlightsResult`.
+- Add provider options for newer request parameters: `prompt`, `keytermsPrompt`,
+  `temperature`, `removeAudioTags`, `domain`, `speakerOptions`,
+  `languageDetectionOptions`, `redactPiiAudioOptions`,
+  `redactPiiReturnUnredacted`, and `redactStaticEntities`. Deprecate
+  `wordBoost`/`boostParam` in favor of `keytermsPrompt` (AssemblyAI rejects
+  `word_boost` on the newer models).
+- Fix transcription segment timings, which were reported in milliseconds instead
+  of seconds.
diff --git a/content/docs/03-ai-sdk-core/36-transcription.mdx b/content/docs/03-ai-sdk-core/36-transcription.mdx
@@ -219,8 +219,8 @@ try {
 | [Deepgram](/providers/ai-sdk-providers/deepgram#transcription-models)           | `nova-2` (+ variants)    |
 | [Deepgram](/providers/ai-sdk-providers/deepgram#transcription-models)           | `nova-3` (+ variants)    |
 | [Gladia](/providers/ai-sdk-providers/gladia#transcription-models)               | `default`                |
-| [AssemblyAI](/providers/ai-sdk-providers/assemblyai#transcription-models)       | `best`                   |
-| [AssemblyAI](/providers/ai-sdk-providers/assemblyai#transcription-models)       | `nano`                   |
+| [AssemblyAI](/providers/ai-sdk-providers/assemblyai#transcription-models)       | `universal-3-5-pro`      |
+| [AssemblyAI](/providers/ai-sdk-providers/assemblyai#transcription-models)       | `universal-3-pro`        |
 | [Fal](/providers/ai-sdk-providers/fal#transcription-models)                     | `whisper`                |
 | [Fal](/providers/ai-sdk-providers/fal#transcription-models)                     | `wizper`                 |
 | [Google Vertex](/providers/ai-sdk-providers/google-vertex#transcription-models) | `chirp_2`                |

diff --git a/content/providers/01-ai-sdk-providers/100-assemblyai.mdx b/content/providers/01-ai-sdk-providers/100-assemblyai.mdx
@@ -5,7 +5,7 @@ description: Learn how to use the AssemblyAI provider for the AI SDK.
 
 # AssemblyAI Provider
 
-The [AssemblyAI](https://assemblyai.com/) provider contains language model support for the AssemblyAI transcription API.
+The [AssemblyAI](https://assemblyai.com/) provider contains transcription model support for the AssemblyAI transcription API.
 
 ## Setup
 
@@ -69,12 +69,23 @@ You can use the following optional settings to customize the AssemblyAI provider
 You can create models that call the [AssemblyAI transcription API](https://www.assemblyai.com/docs/getting-started/transcribe-an-audio-file/typescript)
 using the `.transcription()` factory method.
 
-The first argument is the model id e.g. `best`.
+The first argument is the model id, e.g. `universal-3-5-pro`.
 
 ```ts
-const model = assemblyai.transcription('best');
+const model = assemblyai.transcription('universal-3-5-pro');
 ```
 
+The `best` model is a **deprecated** legacy model, sent using AssemblyAI's
+deprecated `speech_model` request parameter. It still works, but using it emits a
+deprecation warning — prefer `universal-3-5-pro`. The older `nano` model has been
+removed by AssemblyAI and is no longer available. All newer models are sent using
+the [`speech_models`](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model)
+request parameter, which the provider selects automatically based on the model id.
+
+`universal-3-pro` and `universal-2` are fully supported and continue to work, but
+using them emits an informational warning (not a deprecation) suggesting
+`universal-3-5-pro`, AssemblyAI's latest flagship model.
+
 You can also pass additional provider-specific options using the `providerOptions` argument. For example, supplying the `contentSafety` option will enable content safety filtering.
 
 ```ts highlight="7"
@@ -84,7 +95,7 @@ import { type AssemblyAITranscriptionModelOptions } from '@ai-sdk/assemblyai';
 import { readFile } from 'fs/promises';
 
 const result = await transcribe({
-  model: assemblyai.transcription('best'),
+  model: assemblyai.transcription('universal-3-5-pro'),
   audio: await readFile('audio.mp3'),
   providerOptions: {
     assemblyai: {
@@ -118,8 +129,9 @@ The following provider options are available:
 
 - **boostParam** _enum_
 
-  Boost parameter for the transcription.
+  Boost parameter for `wordBoost`.
   Allowed values: `'low'`, `'default'`, `'high'`.
+  **Deprecated** — only applies to the deprecated `wordBoost`; use `keytermsPrompt` instead.
   Optional.
 
 - **contentSafety** _boolean_
@@ -143,6 +155,12 @@ The following provider options are available:
   Whether to include disfluencies (um, uh, etc.) in the transcription.
   Optional.
 
+- **domain** _string_
+
+  Enable a domain-specific model for specialized terminology.
+  Currently supports `'medical-v1'` (Medical Mode).
+  Optional.
+
 - **entityDetection** _boolean_
 
   Whether to detect entities in the transcription.
@@ -163,6 +181,13 @@ The following provider options are available:
   Whether to include IAB categories in the transcription.
   Optional.
 
+- **keytermsPrompt** _array of strings_
+
+  Domain-specific keyterms to boost recognition for (max 6 words per phrase).
+  Replaces `wordBoost` for newer models — supported by `universal-3-pro`,
+  `universal-3-5-pro`, and `slam-1` (and `universal-2` when enabled).
+  Optional.
+
 - **languageCode** _string_
 
   Language code for the audio.
@@ -179,11 +204,24 @@ The following provider options are available:
   Whether to enable language detection.
   Optional.
 
+- **languageDetectionOptions** _object_
+
+  Options for automatic language detection: `expectedLanguages` (array of
+  strings), `fallbackLanguage` (string), `codeSwitching` (boolean),
+  `codeSwitchingConfidenceThreshold` (number, 0-1).
+  Optional.
+
 - **multichannel** _boolean_
 
   Whether to process multiple audio channels separately.
   Optional.
 
+- **prompt** _string_
+
+  Natural-language context (up to 1,500 words) to steer the model.
+  Only supported by `universal-3-pro`, `universal-3-5-pro`, and `slam-1`.
+  Optional.
+
 - **punctuate** _boolean_
 
   Whether to add punctuation to the transcription.
@@ -199,6 +237,12 @@ The following provider options are available:
   Whether to redact PII in the audio file.
   Optional.
 
+- **redactPiiAudioOptions** _object_
+
+  Options for PII-redacted audio: `returnRedactedNoSpeechAudio` (boolean),
+  `overrideAudioRedactionMethod` (`'silence'`). Requires `redactPiiAudio`.
+  Optional.
+
 - **redactPiiAudioQuality** _enum_
 
   Quality of the redacted audio file.
@@ -211,12 +255,32 @@ The following provider options are available:
   Supports numerous types like `'person_name'`, `'phone_number'`, etc.
   Optional.
 
+- **redactPiiReturnUnredacted** _boolean_
+
+  Return the original unredacted transcript alongside the redacted one.
+  Requires `redactPii`.
+  Optional.
+
 - **redactPiiSub** _enum_
 
   Substitution method for redacted PII.
   Allowed values: `'entity_name'`, `'hash'`.
   Optional.
 
+- **redactStaticEntities** _object_
+
+  Map of user-defined labels to exact terms to redact, e.g.
+  `{ INTERNAL_TOOL: ['Bearclaw'] }`. Applied on top of standard PII redaction.
+  Requires `redactPii`.
+  Optional.
+
+- **removeAudioTags** _enum_
+
+  Remove inline annotations from rich transcripts.
+  Allowed values: `'all'` (all annotations), `'speaker'` (speaker cues only).
+  Universal-3 Pro models.
+  Optional.
+
 - **sentimentAnalysis** _boolean_
 
   Whether to perform sentiment analysis on the transcription.
@@ -227,6 +291,12 @@ The following provider options are available:
   Whether to label different speakers in the transcription.
   Optional.
 
+- **speakerOptions** _object_
+
+  Options for speaker diarization: `minSpeakersExpected` (number),
+  `maxSpeakersExpected` (number).
+  Optional.
+
 - **speakersExpected** _number_
 
   Expected number of speakers in the audio.
@@ -254,6 +324,12 @@ The following provider options are available:
   Allowed values: `'bullets'`, `'bullets_verbose'`, `'gist'`, `'headline'`, `'paragraph'`.
   Optional.
 
+- **temperature** _number_
+
+  Sampling temperature (0-1) controlling randomness.
+  Universal-3 Pro models.
+  Optional.
+
 - **webhookAuthHeaderName** _string_
 
   Name of the authentication header for webhook requests.
@@ -272,11 +348,60 @@ The following provider options are available:
 - **wordBoost** _array of strings_
 
   List of words to boost in the transcription.
+  **Deprecated** — rejected by `universal-3-pro`, `universal-3-5-pro`, and `slam-1`
+  (works only on `universal-2`/`best`); use `keytermsPrompt` instead.
   Optional.
 
+### Speaker diarization and audio-intelligence results
+
+The AI SDK's `transcribe` result exposes `text`, `segments`, `language`, and
+`durationInSeconds`. AssemblyAI's richer results — speaker diarization and
+audio-intelligence features — don't fit that shape, so they are surfaced in two
+places:
+
+- **`providerMetadata.assemblyai`** — structured results for the features you
+  enabled: `utterances` (speaker-diarized segments, when `speakerLabels` is set),
+  `entities`, `sentimentAnalysisResults`, `contentSafetyLabels`,
+  `iabCategoriesResult`, and `autoHighlightsResult`.
+- **`response.body`** — the complete, raw AssemblyAI transcript response, so any
+  field not surfaced above (e.g. `chapters`, word-level `speaker` labels) is
+  still available.
+
+Note: timings inside `providerMetadata` and `response.body` (e.g.
+`utterances[].start`) are in **milliseconds**, matching the AssemblyAI API —
+whereas the top-level `segments` use **seconds**.
+
+```ts
+import { experimental_transcribe as transcribe } from 'ai';
+import { assemblyai } from '@ai-sdk/assemblyai';
+import { readFile } from 'fs/promises';
+
+const result = await transcribe({
+  model: assemblyai.transcription('universal-3-5-pro'),
+  audio: await readFile('audio.mp3'),
+  providerOptions: {
+    assemblyai: {
+      speakerLabels: true,
+      entityDetection: true,
+    },
+  },
+});
+
+const { utterances, entities } = result.providerMetadata?.assemblyai ?? {};
+// utterances: [{ speaker: 'A', text: '…', start, end, … }, …]  (start/end in ms)
+```
+
+The following AssemblyAI features are **deprecated** by the API and not surfaced
+in `providerMetadata` (their output remains on the raw `response.body` if
+enabled): Summarization, Auto Chapters, and Custom Topics. Note also that some
+features are language-gated (e.g. sentiment analysis is English-centric); see
+AssemblyAI's documentation for per-language availability.
+
 ### Model Capabilities
 
-| Model  | Transcription       | Duration            | Segments            | Language            |
-| ------ | ------------------- | ------------------- | ------------------- | ------------------- |
-| `best` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
-| `nano` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+| Model               | Transcription       | Duration            | Segments            | Language            |
+| ------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
+| `universal-3-5-pro` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+| `universal-3-pro`   | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+| `universal-2`       | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+| `best`              | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
diff --git a/examples/ai-functions/src/transcribe/assemblyai/basic.ts b/examples/ai-functions/src/transcribe/assemblyai/basic.ts
@@ -5,7 +5,7 @@ import { run } from '../../lib/run';
 
 run(async () => {
   const result = await transcribe({
-    model: assemblyai.transcription('best'),
+    model: assemblyai.transcription('universal-3-5-pro'),
     audio: await readFile('data/galileo.mp3'),
   });
 

diff --git a/examples/ai-functions/src/transcribe/assemblyai/string.ts b/examples/ai-functions/src/transcribe/assemblyai/string.ts
@@ -5,7 +5,7 @@ import { run } from '../../lib/run';
 
 run(async () => {
   const result = await transcribe({
-    model: assemblyai.transcription('best'),
+    model: assemblyai.transcription('universal-3-5-pro'),
     audio: Buffer.from(await readFile('./data/galileo.mp3')).toString('base64'),
   });
 

diff --git a/examples/ai-functions/src/transcribe/assemblyai/url.ts b/examples/ai-functions/src/transcribe/assemblyai/url.ts
@@ -4,7 +4,7 @@ import { run } from '../../lib/run';
 
 run(async () => {
   const result = await transcribe({
-    model: assemblyai.transcription('best'),
+    model: assemblyai.transcription('universal-3-5-pro'),
     audio: new URL(
       'https://github.com/vercel/ai/raw/refs/heads/main/examples/ai-functions/data/galileo.mp3',
     ),

diff --git a/packages/assemblyai/README.md b/packages/assemblyai/README.md
@@ -36,7 +36,7 @@ import { assemblyai } from '@ai-sdk/assemblyai';
 import { experimental_transcribe as transcribe } from 'ai';
 
 const { text } = await transcribe({
-  model: assemblyai.transcription('best'),
+  model: assemblyai.transcription('universal-3-5-pro'),
   audio: new URL(
     'https://github.com/vercel/ai/raw/refs/heads/main/examples/ai-functions/data/galileo.mp3',
   ),