From 0115f21dc0471c6ba5b9e3835550f871a7cdc624 Mon Sep 17 00:00:00 2001
From: Gregor Martynus <39992+gr2m@users.noreply.github.com>
Date: Wed, 1 Jul 2026 22:42:18 -0700
Subject: [PATCH] Backport: feat(assemblyai): support universal-3-5-pro and
 expand the transcription provider (#16548)

Backport of #16548 to release-v6.0.

Adapted to the v6 provider spec: the transcription model targets
TranscriptionModelV3 (not V4), so the deprecation/nudge warnings are
emitted as `type: 'other'` warnings (SharedV3Warning has no `deprecated`
variant), and the workflow serialize/deserialize hooks from the original
PR are omitted (not available in v6). The provider options schema remains
inline in the model file, matching v6's layout.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .changeset/assemblyai-provider-update.md      |  26 ++
 .../docs/03-ai-sdk-core/36-transcription.mdx  |   4 +-
 .../01-ai-sdk-providers/100-assemblyai.mdx    | 143 ++++++-
 .../src/transcribe/assemblyai/basic.ts        |   2 +-
 .../src/transcribe/assemblyai/string.ts       |   2 +-
 .../src/transcribe/assemblyai/url.ts          |   2 +-
 packages/assemblyai/README.md                 |   2 +-
 .../assemblyai/src/assemblyai-api-types.ts    |  85 ++++-
 .../assemblyai/src/assemblyai-provider.ts     |   2 +-
 .../assemblyai-transcription-model.test.ts    | 359 +++++++++++++++++-
 .../src/assemblyai-transcription-model.ts     | 345 ++++++++++++++++-
 .../src/assemblyai-transcription-settings.ts  |  16 +-
 12 files changed, 948 insertions(+), 40 deletions(-)
 create mode 100644 .changeset/assemblyai-provider-update.md

diff --git a/.changeset/assemblyai-provider-update.md b/.changeset/assemblyai-provider-update.md
new file mode 100644
index 000000000000..9019c00d1c88
--- /dev/null
+++ b/.changeset/assemblyai-provider-update.md
@@ -0,0 +1,26 @@
+---
+'@ai-sdk/assemblyai': patch
+---
+
+feat(assemblyai): support universal-3-5-pro and expand the transcription provider
+
+- Add current speech models `universal-3-5-pro`, `universal-3-pro`, and
+  `universal-2`, routed via AssemblyAI's `speech_models` parameter (the
+  deprecated singular `speech_model` is used only for the legacy `best` model).
+  Using `universal-3-pro`/`universal-2` emits an informational warning
+  suggesting `universal-3-5-pro`.
+- Deprecate the legacy `best` model (still works, warns) and remove `nano`,
+  which AssemblyAI no longer accepts.
+- Surface speaker diarization and audio-intelligence results: `doGenerate` now
+  returns the full raw response on `response.body` and populates
+  `providerMetadata.assemblyai` with `utterances`, `entities`,
+  `sentimentAnalysisResults`, `contentSafetyLabels`, `iabCategoriesResult`, and
+  `autoHighlightsResult`.
+- Add provider options for newer request parameters: `prompt`, `keytermsPrompt`,
+  `temperature`, `removeAudioTags`, `domain`, `speakerOptions`,
+  `languageDetectionOptions`, `redactPiiAudioOptions`,
+  `redactPiiReturnUnredacted`, and `redactStaticEntities`. Deprecate
+  `wordBoost`/`boostParam` in favor of `keytermsPrompt` (AssemblyAI rejects
+  `word_boost` on the newer models).
+- Fix transcription segment timings, which were reported in milliseconds instead
+  of seconds.
diff --git a/content/docs/03-ai-sdk-core/36-transcription.mdx b/content/docs/03-ai-sdk-core/36-transcription.mdx
index 143b04bdb002..839b89d53d75 100644
--- a/content/docs/03-ai-sdk-core/36-transcription.mdx
+++ b/content/docs/03-ai-sdk-core/36-transcription.mdx
@@ -219,8 +219,8 @@ try {
 | [Deepgram](/providers/ai-sdk-providers/deepgram#transcription-models)           | `nova-2` (+ variants)    |
 | [Deepgram](/providers/ai-sdk-providers/deepgram#transcription-models)           | `nova-3` (+ variants)    |
 | [Gladia](/providers/ai-sdk-providers/gladia#transcription-models)               | `default`                |
-| [AssemblyAI](/providers/ai-sdk-providers/assemblyai#transcription-models)       | `best`                   |
-| [AssemblyAI](/providers/ai-sdk-providers/assemblyai#transcription-models)       | `nano`                   |
+| [AssemblyAI](/providers/ai-sdk-providers/assemblyai#transcription-models)       | `universal-3-5-pro`      |
+| [AssemblyAI](/providers/ai-sdk-providers/assemblyai#transcription-models)       | `universal-3-pro`        |
 | [Fal](/providers/ai-sdk-providers/fal#transcription-models)                     | `whisper`                |
 | [Fal](/providers/ai-sdk-providers/fal#transcription-models)                     | `wizper`                 |
 | [Google Vertex](/providers/ai-sdk-providers/google-vertex#transcription-models) | `chirp_2`                |
diff --git a/content/providers/01-ai-sdk-providers/100-assemblyai.mdx b/content/providers/01-ai-sdk-providers/100-assemblyai.mdx
index 27e3c651604d..d90869a97c4a 100644
--- a/content/providers/01-ai-sdk-providers/100-assemblyai.mdx
+++ b/content/providers/01-ai-sdk-providers/100-assemblyai.mdx
@@ -5,7 +5,7 @@ description: Learn how to use the AssemblyAI provider for the AI SDK.
 
 # AssemblyAI Provider
 
-The [AssemblyAI](https://assemblyai.com/) provider contains language model support for the AssemblyAI transcription API.
+The [AssemblyAI](https://assemblyai.com/) provider contains transcription model support for the AssemblyAI transcription API.
 
 ## Setup
 
@@ -69,12 +69,23 @@ You can use the following optional settings to customize the AssemblyAI provider
 You can create models that call the [AssemblyAI transcription API](https://www.assemblyai.com/docs/getting-started/transcribe-an-audio-file/typescript)
 using the `.transcription()` factory method.
 
-The first argument is the model id e.g. `best`.
+The first argument is the model id, e.g. `universal-3-5-pro`.
 
 ```ts
-const model = assemblyai.transcription('best');
+const model = assemblyai.transcription('universal-3-5-pro');
 ```
 
+The `best` model is a **deprecated** legacy model, sent using AssemblyAI's
+deprecated `speech_model` request parameter. It still works, but using it emits a
+deprecation warning — prefer `universal-3-5-pro`. The older `nano` model has been
+removed by AssemblyAI and is no longer available. All newer models are sent using
+the [`speech_models`](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model)
+request parameter, which the provider selects automatically based on the model id.
+
+`universal-3-pro` and `universal-2` are fully supported and continue to work, but
+using them emits an informational warning (not a deprecation) suggesting
+`universal-3-5-pro`, AssemblyAI's latest flagship model.
+
 You can also pass additional provider-specific options using the `providerOptions` argument. For example, supplying the `contentSafety` option will enable content safety filtering.
 
 ```ts highlight="7"
@@ -84,7 +95,7 @@ import { type AssemblyAITranscriptionModelOptions } from '@ai-sdk/assemblyai';
 import { readFile } from 'fs/promises';
 
 const result = await transcribe({
-  model: assemblyai.transcription('best'),
+  model: assemblyai.transcription('universal-3-5-pro'),
   audio: await readFile('audio.mp3'),
   providerOptions: {
     assemblyai: {
@@ -118,8 +129,9 @@ The following provider options are available:
 
 - **boostParam** _enum_
 
-  Boost parameter for the transcription.
+  Boost parameter for `wordBoost`.
   Allowed values: `'low'`, `'default'`, `'high'`.
+  **Deprecated** — only applies to the deprecated `wordBoost`; use `keytermsPrompt` instead.
   Optional.
 
 - **contentSafety** _boolean_
@@ -143,6 +155,12 @@ The following provider options are available:
   Whether to include disfluencies (um, uh, etc.) in the transcription.
   Optional.
 
+- **domain** _string_
+
+  Enable a domain-specific model for specialized terminology.
+  Currently supports `'medical-v1'` (Medical Mode).
+  Optional.
+
 - **entityDetection** _boolean_
 
   Whether to detect entities in the transcription.
@@ -163,6 +181,13 @@ The following provider options are available:
   Whether to include IAB categories in the transcription.
   Optional.
 
+- **keytermsPrompt** _array of strings_
+
+  Domain-specific keyterms to boost recognition for (max 6 words per phrase).
+  Replaces `wordBoost` for newer models — supported by `universal-3-pro`,
+  `universal-3-5-pro`, and `slam-1` (and `universal-2` when enabled).
+  Optional.
+
 - **languageCode** _string_
 
   Language code for the audio.
@@ -179,11 +204,24 @@ The following provider options are available:
   Whether to enable language detection.
   Optional.
 
+- **languageDetectionOptions** _object_
+
+  Options for automatic language detection: `expectedLanguages` (array of
+  strings), `fallbackLanguage` (string), `codeSwitching` (boolean),
+  `codeSwitchingConfidenceThreshold` (number, 0-1).
+  Optional.
+
 - **multichannel** _boolean_
 
   Whether to process multiple audio channels separately.
   Optional.
 
+- **prompt** _string_
+
+  Natural-language context (up to 1,500 words) to steer the model.
+  Only supported by `universal-3-pro`, `universal-3-5-pro`, and `slam-1`.
+  Optional.
+
 - **punctuate** _boolean_
 
   Whether to add punctuation to the transcription.
@@ -199,6 +237,12 @@ The following provider options are available:
   Whether to redact PII in the audio file.
   Optional.
 
+- **redactPiiAudioOptions** _object_
+
+  Options for PII-redacted audio: `returnRedactedNoSpeechAudio` (boolean),
+  `overrideAudioRedactionMethod` (`'silence'`). Requires `redactPiiAudio`.
+  Optional.
+
 - **redactPiiAudioQuality** _enum_
 
   Quality of the redacted audio file.
@@ -211,12 +255,32 @@ The following provider options are available:
   Supports numerous types like `'person_name'`, `'phone_number'`, etc.
   Optional.
 
+- **redactPiiReturnUnredacted** _boolean_
+
+  Return the original unredacted transcript alongside the redacted one.
+  Requires `redactPii`.
+  Optional.
+
 - **redactPiiSub** _enum_
 
   Substitution method for redacted PII.
   Allowed values: `'entity_name'`, `'hash'`.
   Optional.
 
+- **redactStaticEntities** _object_
+
+  Map of user-defined labels to exact terms to redact, e.g.
+  `{ INTERNAL_TOOL: ['Bearclaw'] }`. Applied on top of standard PII redaction.
+  Requires `redactPii`.
+  Optional.
+
+- **removeAudioTags** _enum_
+
+  Remove inline annotations from rich transcripts.
+  Allowed values: `'all'` (all annotations), `'speaker'` (speaker cues only).
+  Universal-3 Pro models.
+  Optional.
+
 - **sentimentAnalysis** _boolean_
 
   Whether to perform sentiment analysis on the transcription.
@@ -227,6 +291,12 @@ The following provider options are available:
   Whether to label different speakers in the transcription.
   Optional.
 
+- **speakerOptions** _object_
+
+  Options for speaker diarization: `minSpeakersExpected` (number),
+  `maxSpeakersExpected` (number).
+  Optional.
+
 - **speakersExpected** _number_
 
   Expected number of speakers in the audio.
@@ -254,6 +324,12 @@ The following provider options are available:
   Allowed values: `'bullets'`, `'bullets_verbose'`, `'gist'`, `'headline'`, `'paragraph'`.
   Optional.
 
+- **temperature** _number_
+
+  Sampling temperature (0-1) controlling randomness.
+  Universal-3 Pro models.
+  Optional.
+
 - **webhookAuthHeaderName** _string_
 
   Name of the authentication header for webhook requests.
@@ -272,11 +348,60 @@ The following provider options are available:
 - **wordBoost** _array of strings_
 
   List of words to boost in the transcription.
+  **Deprecated** — rejected by `universal-3-pro`, `universal-3-5-pro`, and `slam-1`
+  (works only on `universal-2`/`best`); use `keytermsPrompt` instead.
   Optional.
 
+### Speaker diarization and audio-intelligence results
+
+The AI SDK's `transcribe` result exposes `text`, `segments`, `language`, and
+`durationInSeconds`. AssemblyAI's richer results — speaker diarization and
+audio-intelligence features — don't fit that shape, so they are surfaced in two
+places:
+
+- **`providerMetadata.assemblyai`** — structured results for the features you
+  enabled: `utterances` (speaker-diarized segments, when `speakerLabels` is set),
+  `entities`, `sentimentAnalysisResults`, `contentSafetyLabels`,
+  `iabCategoriesResult`, and `autoHighlightsResult`.
+- **`response.body`** — the complete, raw AssemblyAI transcript response, so any
+  field not surfaced above (e.g. `chapters`, word-level `speaker` labels) is
+  still available.
+
+Note: timings inside `providerMetadata` and `response.body` (e.g.
+`utterances[].start`) are in **milliseconds**, matching the AssemblyAI API —
+whereas the top-level `segments` use **seconds**.
+
+```ts
+import { experimental_transcribe as transcribe } from 'ai';
+import { assemblyai } from '@ai-sdk/assemblyai';
+import { readFile } from 'fs/promises';
+
+const result = await transcribe({
+  model: assemblyai.transcription('universal-3-5-pro'),
+  audio: await readFile('audio.mp3'),
+  providerOptions: {
+    assemblyai: {
+      speakerLabels: true,
+      entityDetection: true,
+    },
+  },
+});
+
+const { utterances, entities } = result.providerMetadata?.assemblyai ?? {};
+// utterances: [{ speaker: 'A', text: '…', start, end, … }, …]  (start/end in ms)
+```
+
+The following AssemblyAI features are **deprecated** by the API and not surfaced
+in `providerMetadata` (their output remains on the raw `response.body` if
+enabled): Summarization, Auto Chapters, and Custom Topics. Note also that some
+features are language-gated (e.g. sentiment analysis is English-centric); see
+AssemblyAI's documentation for per-language availability.
+
 ### Model Capabilities
 
-| Model  | Transcription       | Duration            | Segments            | Language            |
-| ------ | ------------------- | ------------------- | ------------------- | ------------------- |
-| `best` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
-| `nano` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+| Model               | Transcription       | Duration            | Segments            | Language            |
+| ------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
+| `universal-3-5-pro` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+| `universal-3-pro`   | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+| `universal-2`       | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+| `best`              | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
diff --git a/examples/ai-functions/src/transcribe/assemblyai/basic.ts b/examples/ai-functions/src/transcribe/assemblyai/basic.ts
index 0c7ec77faf70..2f4abafce37e 100644
--- a/examples/ai-functions/src/transcribe/assemblyai/basic.ts
+++ b/examples/ai-functions/src/transcribe/assemblyai/basic.ts
@@ -5,7 +5,7 @@ import { run } from '../../lib/run';
 
 run(async () => {
   const result = await transcribe({
-    model: assemblyai.transcription('best'),
+    model: assemblyai.transcription('universal-3-5-pro'),
     audio: await readFile('data/galileo.mp3'),
   });
 
diff --git a/examples/ai-functions/src/transcribe/assemblyai/string.ts b/examples/ai-functions/src/transcribe/assemblyai/string.ts
index 664091306b78..a77334febd36 100644
--- a/examples/ai-functions/src/transcribe/assemblyai/string.ts
+++ b/examples/ai-functions/src/transcribe/assemblyai/string.ts
@@ -5,7 +5,7 @@ import { run } from '../../lib/run';
 
 run(async () => {
   const result = await transcribe({
-    model: assemblyai.transcription('best'),
+    model: assemblyai.transcription('universal-3-5-pro'),
     audio: Buffer.from(await readFile('./data/galileo.mp3')).toString('base64'),
   });
 
diff --git a/examples/ai-functions/src/transcribe/assemblyai/url.ts b/examples/ai-functions/src/transcribe/assemblyai/url.ts
index 6eab50c11acf..d675470c9869 100644
--- a/examples/ai-functions/src/transcribe/assemblyai/url.ts
+++ b/examples/ai-functions/src/transcribe/assemblyai/url.ts
@@ -4,7 +4,7 @@ import { run } from '../../lib/run';
 
 run(async () => {
   const result = await transcribe({
-    model: assemblyai.transcription('best'),
+    model: assemblyai.transcription('universal-3-5-pro'),
     audio: new URL(
       'https://github.com/vercel/ai/raw/refs/heads/main/examples/ai-functions/data/galileo.mp3',
     ),
diff --git a/packages/assemblyai/README.md b/packages/assemblyai/README.md
index 81df1dbb2313..9c387e5394e1 100644
--- a/packages/assemblyai/README.md
+++ b/packages/assemblyai/README.md
@@ -36,7 +36,7 @@ import { assemblyai } from '@ai-sdk/assemblyai';
 import { experimental_transcribe as transcribe } from 'ai';
 
 const { text } = await transcribe({
-  model: assemblyai.transcription('best'),
+  model: assemblyai.transcription('universal-3-5-pro'),
   audio: new URL(
     'https://github.com/vercel/ai/raw/refs/heads/main/examples/ai-functions/data/galileo.mp3',
   ),
diff --git a/packages/assemblyai/src/assemblyai-api-types.ts b/packages/assemblyai/src/assemblyai-api-types.ts
index 22fd08b0f30c..17d690374cc8 100644
--- a/packages/assemblyai/src/assemblyai-api-types.ts
+++ b/packages/assemblyai/src/assemblyai-api-types.ts
@@ -28,6 +28,7 @@ export type AssemblyAITranscriptionAPITypes = {
 
   /**
    * How much to boost specified words
+   * @deprecated Only used with the deprecated `word_boost`. Use `keyterms_prompt`.
    */
   boost_param?: 'low' | 'default' | 'high';
 
@@ -207,6 +208,16 @@ export type AssemblyAITranscriptionAPITypes = {
    */
   language_detection?: boolean;
 
+  /**
+   * Options for automatic language detection.
+   */
+  language_detection_options?: {
+    expected_languages?: string[];
+    fallback_language?: string;
+    code_switching?: boolean;
+    code_switching_confidence_threshold?: number;
+  };
+
   /**
    * Enable Multichannel transcription, can be true or false.
    * @default false
@@ -231,6 +242,14 @@ export type AssemblyAITranscriptionAPITypes = {
    */
   redact_pii_audio?: boolean;
 
+  /**
+   * Options for PII-redacted audio files. Requires redact_pii.
+   */
+  redact_pii_audio_options?: {
+    return_redacted_no_speech_audio?: boolean;
+    override_audio_redaction_method?: 'silence';
+  };
+
   /**
    * Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav.
    */
@@ -286,11 +305,23 @@ export type AssemblyAITranscriptionAPITypes = {
     | 'zodiac_sign'
   >;
 
+  /**
+   * Return the original unredacted transcript alongside the redacted one.
+   * Requires redact_pii.
+   */
+  redact_pii_return_unredacted?: boolean;
+
   /**
    * The replacement logic for detected PII, can be "entity_name" or "hash".
    */
   redact_pii_sub?: 'entity_name' | 'hash';
 
+  /**
+   * Map of user-defined labels to exact terms to redact, applied on top of
+   * standard PII redaction. Requires redact_pii.
+   */
+  redact_static_entities?: Record<string, string[]>;
+
   /**
    * Enable Sentiment Analysis, can be true or false
    * @default false
@@ -303,6 +334,14 @@ export type AssemblyAITranscriptionAPITypes = {
    */
   speaker_labels?: boolean;
 
+  /**
+   * Options for speaker diarization, e.g. a range of possible speakers.
+   */
+  speaker_options?: {
+    min_speakers_expected?: number;
+    max_speakers_expected?: number;
+  };
+
   /**
    * Tells the speaker label model how many speakers it should attempt to identify, up to 10.
    */
@@ -310,8 +349,21 @@ export type AssemblyAITranscriptionAPITypes = {
 
   /**
    * The speech model to use for the transcription.
+   *
+   * @deprecated This parameter has been replaced with `speech_models`. It only
+   * supports the legacy `best` model. Use `speech_models` for `universal-2`,
+   * `universal-3-pro`, `universal-3-5-pro`, etc.
+   * @see https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model
    */
-  speech_model?: 'best' | 'nano';
+  speech_model?: 'best';
+
+  /**
+   * List of speech models in priority order, allowing the system to
+   * automatically route the audio to the best available option. When omitted,
+   * the API defaults to `['universal-3-pro', 'universal-2']`.
+   * @see https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model
+   */
+  speech_models?: string[];
 
   /**
    * Reject audio files that contain less than this fraction of speech. Valid values are in the range [0, 1] inclusive.
@@ -357,6 +409,37 @@ export type AssemblyAITranscriptionAPITypes = {
 
   /**
    * The list of custom vocabulary to boost transcription probability for
+   * @deprecated Rejected by `universal-3-pro` / `universal-3-5-pro` and
+   * `slam-1` (works only on `universal-2`/`best`). Use `keyterms_prompt`.
    */
   word_boost?: string[];
+
+  /**
+   * Domain-specific keyterms to boost (max 6 words per phrase). Replaces
+   * `word_boost` for `universal-3-pro` / `universal-3-5-pro` and `slam-1`.
+   */
+  keyterms_prompt?: string[];
+
+  /**
+   * Natural-language context (up to 1,500 words) to steer the model.
+   * Only supported by `universal-3-pro` / `universal-3-5-pro` and `slam-1`.
+   */
+  prompt?: string;
+
+  /**
+   * Sampling temperature (0-1) controlling randomness. Universal-3 Pro models.
+   */
+  temperature?: number;
+
+  /**
+   * Remove inline annotations from rich transcripts: `'all'` removes all
+   * annotations, `'speaker'` removes only speaker cues. Universal-3 Pro models.
+   */
+  remove_audio_tags?: 'all' | 'speaker';
+
+  /**
+   * Enable a domain-specific model to improve accuracy for specialized
+   * terminology, e.g. `'medical-v1'` for Medical Mode.
+   */
+  domain?: string;
 };
diff --git a/packages/assemblyai/src/assemblyai-provider.ts b/packages/assemblyai/src/assemblyai-provider.ts
index 9522b55ecc50..f938e1aa7821 100644
--- a/packages/assemblyai/src/assemblyai-provider.ts
+++ b/packages/assemblyai/src/assemblyai-provider.ts
@@ -14,7 +14,7 @@ import { VERSION } from './version';
 
 export interface AssemblyAIProvider extends ProviderV3 {
   (
-    modelId: 'best',
+    modelId: AssemblyAITranscriptionModelId,
     settings?: {},
   ): {
     transcription: AssemblyAITranscriptionModel;
diff --git a/packages/assemblyai/src/assemblyai-transcription-model.test.ts b/packages/assemblyai/src/assemblyai-transcription-model.test.ts
index 2147a6c05b66..048e32b9df0f 100644
--- a/packages/assemblyai/src/assemblyai-transcription-model.test.ts
+++ b/packages/assemblyai/src/assemblyai-transcription-model.test.ts
@@ -233,6 +233,16 @@ describe('doGenerate', () => {
         summary_model: 'informative',
         summary: '- Hello, world!',
         sentiment_analysis: true,
+        sentiment_analysis_results: [
+          {
+            text: 'Hello, world!',
+            start: 250,
+            end: 26950,
+            sentiment: 'POSITIVE',
+            confidence: 0.9,
+            speaker: 'A',
+          },
+        ],
         entity_detection: true,
         entities: [
           {
@@ -256,18 +266,361 @@ describe('doGenerate', () => {
     };
   }
 
-  it('should pass the model', async () => {
+  it('should pass the legacy model via the speech_model parameter', async () => {
     prepareJsonResponse();
 
-    await model.doGenerate({
+    const result = await model.doGenerate({
       audio: audioData,
       mediaType: 'audio/wav',
     });
 
-    expect(await server.calls[1].requestBodyJson).toMatchObject({
+    const requestBody = await server.calls[1].requestBodyJson;
+    expect(requestBody).toMatchObject({
       audio_url: 'https://storage.assemblyai.com/mock-upload-url',
       speech_model: 'best',
     });
+    expect(requestBody.speech_models).toBeUndefined();
+
+    const [deprecation] = result.warnings.filter(
+      warning => warning.type === 'other',
+    );
+    expect(deprecation?.message).toContain('universal-3-5-pro');
+    expect(deprecation?.message).toContain(
+      'https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model',
+    );
+  });
+
+  it('should pass newer models via the speech_models parameter', async () => {
+    prepareJsonResponse();
+
+    const result = await provider
+      .transcription('universal-3-5-pro')
+      .doGenerate({
+        audio: audioData,
+        mediaType: 'audio/wav',
+      });
+
+    const requestBody = await server.calls[1].requestBodyJson;
+    expect(requestBody).toMatchObject({
+      audio_url: 'https://storage.assemblyai.com/mock-upload-url',
+      speech_models: ['universal-3-5-pro'],
+    });
+    expect(requestBody.speech_model).toBeUndefined();
+
+    // No deprecation and no nudge for the latest flagship model.
+    expect(result.warnings).toEqual([]);
+  });
+
+  it('should route universal-3-pro via speech_models and nudge to universal-3-5-pro', async () => {
+    prepareJsonResponse();
+
+    const result = await provider.transcription('universal-3-pro').doGenerate({
+      audio: audioData,
+      mediaType: 'audio/wav',
+    });
+
+    const requestBody = await server.calls[1].requestBodyJson;
+    expect(requestBody.speech_models).toEqual(['universal-3-pro']);
+    expect(requestBody.speech_model).toBeUndefined();
+
+    // universal-3-pro is the model universal-3-5-pro replaces, so the message
+    // names it explicitly.
+    const [nudge] = result.warnings.filter(w => w.type === 'other');
+    expect(nudge?.message).toContain('universal-3-5-pro');
+    expect(nudge?.message).toContain("replace 'universal-3-pro'");
+  });
+
+  it('should nudge universal-2 users toward universal-3-5-pro', async () => {
+    prepareJsonResponse();
+
+    const result = await provider.transcription('universal-2').doGenerate({
+      audio: audioData,
+      mediaType: 'audio/wav',
+    });
+
+    // The nudge for universal-2 must not claim it is replaced by universal-3-pro.
+    const [nudge] = result.warnings.filter(w => w.type === 'other');
+    expect(nudge?.message).toContain('universal-3-5-pro');
+    expect(nudge?.message).not.toContain("replace 'universal-3-pro'");
+  });
+
+  it('should not special-case the removed nano model', async () => {
+    prepareJsonResponse();
+
+    const result = await provider.transcription('nano').doGenerate({
+      audio: audioData,
+      mediaType: 'audio/wav',
+    });
+
+    // `nano` is no longer a legacy `speech_model` alias: it falls through to
+    // `speech_models` (where the live API rejects it) and emits no warning.
+    const requestBody = await server.calls[1].requestBodyJson;
+    expect(requestBody.speech_models).toEqual(['nano']);
+    expect(requestBody.speech_model).toBeUndefined();
+    expect(result.warnings).toEqual([]);
+  });
+
+  it('should still send provider options alongside speech_models', async () => {
+    prepareJsonResponse();
+
+    await provider.transcription('universal-3-5-pro').doGenerate({
+      audio: audioData,
+      mediaType: 'audio/wav',
+      providerOptions: {
+        assemblyai: {
+          languageDetection: true,
+          punctuate: false,
+        },
+      },
+    });
+
+    const requestBody = await server.calls[1].requestBodyJson;
+    expect(requestBody).toMatchObject({
+      speech_models: ['universal-3-5-pro'],
+      language_detection: true,
+      punctuate: false,
+    });
+  });
+
+  it('should surface diarization + audio-intelligence via providerMetadata', async () => {
+    prepareJsonResponse();
+
+    const result = await provider
+      .transcription('universal-3-5-pro')
+      .doGenerate({
+        audio: audioData,
+        mediaType: 'audio/wav',
+      });
+
+    const metadata = result.providerMetadata?.assemblyai as
+      | Record<string, any>
+      | undefined;
+    expect(metadata).toBeDefined();
+
+    // Speaker diarization
+    expect(metadata?.utterances?.[0]).toMatchObject({
+      speaker: 'A',
+      text: 'Hello, world!',
+    });
+
+    // Audio-intelligence results
+    expect(metadata?.entities?.[0]).toMatchObject({
+      entity_type: 'location',
+      text: 'Canada',
+    });
+    expect(metadata?.sentimentAnalysisResults?.[0]).toMatchObject({
+      sentiment: 'POSITIVE',
+      text: 'Hello, world!',
+    });
+    expect(metadata?.contentSafetyLabels).toBeDefined();
+    expect(metadata?.iabCategoriesResult).toBeDefined();
+    expect(metadata?.autoHighlightsResult).toBeDefined();
+  });
+
+  it('should preserve the full raw response on response.body', async () => {
+    prepareJsonResponse();
+
+    const result = await provider
+      .transcription('universal-3-5-pro')
+      .doGenerate({
+        audio: audioData,
+        mediaType: 'audio/wav',
+      });
+
+    const body = result.response.body as Record<string, any>;
+    // Word-level speaker label survives on the raw body.
+    expect(body.words[0].speaker).toBe('speaker');
+    // Fields not modeled in our schema (e.g. chapters, summary) are no longer
+    // stripped — proves response.body is the raw response, not the parsed one.
+    expect(body.chapters).toBeDefined();
+    expect(body.summary).toBe('- Hello, world!');
+  });
+
+  it('should pass the Universal-3-Pro input params', async () => {
+    prepareJsonResponse();
+
+    await provider.transcription('universal-3-5-pro').doGenerate({
+      audio: audioData,
+      mediaType: 'audio/wav',
+      providerOptions: {
+        assemblyai: {
+          prompt: 'This is a conversation about the AI SDK.',
+          keytermsPrompt: ['Vercel', 'AI SDK'],
+          temperature: 0.2,
+          removeAudioTags: 'speaker',
+          domain: 'medical-v1',
+        },
+      },
+    });
+
+    const requestBody = await server.calls[1].requestBodyJson;
+    expect(requestBody).toMatchObject({
+      speech_models: ['universal-3-5-pro'],
+      prompt: 'This is a conversation about the AI SDK.',
+      keyterms_prompt: ['Vercel', 'AI SDK'],
+      temperature: 0.2,
+      remove_audio_tags: 'speaker',
+      domain: 'medical-v1',
+    });
+  });
+
+  it('should pass the GA nested input params', async () => {
+    prepareJsonResponse();
+
+    await provider.transcription('universal-3-5-pro').doGenerate({
+      audio: audioData,
+      mediaType: 'audio/wav',
+      providerOptions: {
+        assemblyai: {
+          redactPii: true,
+          speakerOptions: { minSpeakersExpected: 1, maxSpeakersExpected: 3 },
+          languageDetectionOptions: {
+            expectedLanguages: ['en', 'es'],
+            fallbackLanguage: 'en',
+            codeSwitching: true,
+            codeSwitchingConfidenceThreshold: 0.5,
+          },
+          redactPiiAudioOptions: {
+            returnRedactedNoSpeechAudio: true,
+            overrideAudioRedactionMethod: 'silence',
+          },
+          redactPiiReturnUnredacted: true,
+          redactStaticEntities: { INTERNAL_TOOL: ['Bearclaw'] },
+        },
+      },
+    });
+
+    const requestBody = await server.calls[1].requestBodyJson;
+    expect(requestBody).toMatchObject({
+      speaker_options: { min_speakers_expected: 1, max_speakers_expected: 3 },
+      language_detection_options: {
+        expected_languages: ['en', 'es'],
+        fallback_language: 'en',
+        code_switching: true,
+        code_switching_confidence_threshold: 0.5,
+      },
+      redact_pii_audio_options: {
+        return_redacted_no_speech_audio: true,
+        override_audio_redaction_method: 'silence',
+      },
+      redact_pii_return_unredacted: true,
+      redact_static_entities: { INTERNAL_TOOL: ['Bearclaw'] },
+    });
+  });
+
+  it('should warn when deprecated wordBoost/boostParam options are used', async () => {
+    prepareJsonResponse();
+
+    const result = await provider
+      .transcription('universal-3-5-pro')
+      .doGenerate({
+        audio: audioData,
+        mediaType: 'audio/wav',
+        providerOptions: {
+          assemblyai: { wordBoost: ['Vercel'], boostParam: 'high' },
+        },
+      });
+
+    const [boostWarning] = result.warnings.filter(w => w.type === 'other');
+    expect(boostWarning?.message).toContain('wordBoost');
+    expect(boostWarning?.message).toContain('boostParam');
+    expect(boostWarning?.message).toContain('keytermsPrompt');
+  });
+
+  it('should attribute the deprecation warning to boostParam when only boostParam is set', async () => {
+    prepareJsonResponse();
+
+    const result = await provider
+      .transcription('universal-3-5-pro')
+      .doGenerate({
+        audio: audioData,
+        mediaType: 'audio/wav',
+        providerOptions: { assemblyai: { boostParam: 'high' } },
+      });
+
+    const [boostWarning] = result.warnings.filter(w => w.type === 'other');
+    expect(boostWarning?.message).toContain('boostParam');
+    expect(boostWarning?.message).not.toContain('wordBoost');
+    expect(boostWarning?.message).toContain('keytermsPrompt');
+  });
+
+  it('should warn when redactPii-dependent options are set without redactPii', async () => {
+    prepareJsonResponse();
+
+    const result = await provider
+      .transcription('universal-3-5-pro')
+      .doGenerate({
+        audio: audioData,
+        mediaType: 'audio/wav',
+        providerOptions: {
+          assemblyai: { redactStaticEntities: { TOOL: ['Vercel'] } },
+        },
+      });
+
+    expect(
+      result.warnings.some(
+        w => w.type === 'other' && w.message.includes('redactPii'),
+      ),
+    ).toBe(true);
+  });
+
+  it('should warn when redactPiiAudioOptions is set without redactPiiAudio', async () => {
+    prepareJsonResponse();
+
+    const result = await provider
+      .transcription('universal-3-5-pro')
+      .doGenerate({
+        audio: audioData,
+        mediaType: 'audio/wav',
+        providerOptions: {
+          assemblyai: {
+            redactPii: true,
+            redactPiiAudioOptions: { overrideAudioRedactionMethod: 'silence' },
+          },
+        },
+      });
+
+    expect(
+      result.warnings.some(
+        w => w.type === 'other' && w.message.includes('redactPiiAudio'),
+      ),
+    ).toBe(true);
+  });
+
+  it('should warn when languageCode and languageDetection are combined', async () => {
+    prepareJsonResponse();
+
+    const result = await provider
+      .transcription('universal-3-5-pro')
+      .doGenerate({
+        audio: audioData,
+        mediaType: 'audio/wav',
+        providerOptions: {
+          assemblyai: { languageCode: 'en', languageDetection: true },
+        },
+      });
+
+    expect(
+      result.warnings.some(
+        w => w.type === 'other' && w.message.includes('languageDetection'),
+      ),
+    ).toBe(true);
+  });
+
+  it('should report segment timings in seconds (ms converted)', async () => {
+    prepareJsonResponse();
+
+    const result = await model.doGenerate({
+      audio: audioData,
+      mediaType: 'audio/wav',
+    });
+
+    // Fixture word[0] is start: 250ms, end: 650ms → 0.25s / 0.65s.
+    expect(result.segments[0]).toEqual({
+      text: 'Hello,',
+      startSecond: 0.25,
+      endSecond: 0.65,
+    });
   });
 
   it('should pass headers', async () => {
diff --git a/packages/assemblyai/src/assemblyai-transcription-model.ts b/packages/assemblyai/src/assemblyai-transcription-model.ts
index 3d39255e32a4..1326186384fe 100644
--- a/packages/assemblyai/src/assemblyai-transcription-model.ts
+++ b/packages/assemblyai/src/assemblyai-transcription-model.ts
@@ -1,4 +1,8 @@
-import type { TranscriptionModelV3, SharedV3Warning } from '@ai-sdk/provider';
+import type {
+  TranscriptionModelV3,
+  SharedV3Warning,
+  SharedV3ProviderMetadata,
+} from '@ai-sdk/provider';
 import {
   combineHeaders,
   createJsonResponseHandler,
@@ -32,8 +36,12 @@ const assemblyaiTranscriptionModelOptionsSchema = z.object({
    */
   autoHighlights: z.boolean().nullish(),
   /**
-   * Boost parameter for the transcription.
+   * Boost parameter for word boost (used with `wordBoost`).
    * Allowed values: 'low', 'default', 'high'.
+   *
+   * @deprecated Only applies to the deprecated `wordBoost` option. Use
+   * `keytermsPrompt` instead, which works with the recommended `universal-*`
+   * models.
    */
   boostParam: z.string().nullish(),
   /**
@@ -59,6 +67,11 @@ const assemblyaiTranscriptionModelOptionsSchema = z.object({
    * Whether to include filler words (um, uh, etc.) in the transcription.
    */
   disfluencies: z.boolean().nullish(),
+  /**
+   * Enable a domain-specific model to improve accuracy for specialized
+   * terminology. Currently supports `'medical-v1'` (Medical Mode).
+   */
+  domain: z.string().nullish(),
   /**
    * Whether to enable entity detection.
    */
@@ -75,6 +88,13 @@ const assemblyaiTranscriptionModelOptionsSchema = z.object({
    * Whether to enable IAB categories detection.
    */
   iabCategories: z.boolean().nullish(),
+  /**
+   * Domain-specific keyterms to boost recognition for (max 6 words per phrase).
+   * Replaces `wordBoost` for newer models: supported by `universal-3-pro` /
+   * `universal-3-5-pro` and `slam-1` (and `universal-2` when metaphone is
+   * enabled for the account).
+   */
+  keytermsPrompt: z.array(z.string()).nullish(),
   /**
    * Language code for the transcription.
    */
@@ -87,10 +107,30 @@ const assemblyaiTranscriptionModelOptionsSchema = z.object({
    * Whether to enable language detection.
    */
   languageDetection: z.boolean().nullish(),
+  /**
+   * Options for automatic language detection.
+   */
+  languageDetectionOptions: z
+    .object({
+      /** List of languages expected in the audio file. */
+      expectedLanguages: z.array(z.string()).nullish(),
+      /** Fallback language if the detected language is not expected. */
+      fallbackLanguage: z.string().nullish(),
+      /** Whether code switching should be detected. */
+      codeSwitching: z.boolean().nullish(),
+      /** Confidence threshold for code switching detection (0-1). */
+      codeSwitchingConfidenceThreshold: z.number().min(0).max(1).nullish(),
+    })
+    .nullish(),
   /**
    * Whether to process audio as multichannel.
    */
   multichannel: z.boolean().nullish(),
+  /**
+   * Provide natural-language context (up to 1,500 words) to steer the model.
+   * Only supported by `universal-3-pro` / `universal-3-5-pro` and `slam-1`.
+   */
+  prompt: z.string().nullish(),
   /**
    * Whether to add punctuation to the transcription.
    */
@@ -103,6 +143,17 @@ const assemblyaiTranscriptionModelOptionsSchema = z.object({
    * Whether to redact PII in the audio file.
    */
   redactPiiAudio: z.boolean().nullish(),
+  /**
+   * Options for PII-redacted audio files. Requires `redactPiiAudio`.
+   */
+  redactPiiAudioOptions: z
+    .object({
+      /** Return redacted audio even for files without detected speech. */
+      returnRedactedNoSpeechAudio: z.boolean().nullish(),
+      /** Redaction method; set to `'silence'` to replace PII with silence. */
+      overrideAudioRedactionMethod: z.enum(['silence']).nullish(),
+    })
+    .nullish(),
   /**
    * Audio format for PII redaction.
    */
@@ -111,10 +162,26 @@ const assemblyaiTranscriptionModelOptionsSchema = z.object({
    * List of PII types to redact.
    */
   redactPiiPolicies: z.array(z.string()).nullish(),
+  /**
+   * Return the original unredacted transcript alongside the redacted one.
+   * Requires `redactPii`.
+   */
+  redactPiiReturnUnredacted: z.boolean().nullish(),
   /**
    * Substitution method for redacted PII.
    */
   redactPiiSub: z.string().nullish(),
+  /**
+   * Map of user-defined labels to exact terms to redact, e.g.
+   * `{ INTERNAL_TOOL: ['Bearclaw'] }`. Applied on top of standard PII redaction
+   * using `redactPiiSub`. Requires `redactPii`.
+   */
+  redactStaticEntities: z.record(z.string(), z.array(z.string())).nullish(),
+  /**
+   * Remove inline annotations from rich transcripts. `'all'` removes all inline
+   * annotations; `'speaker'` removes only speaker cues. Universal-3 Pro models.
+   */
+  removeAudioTags: z.enum(['all', 'speaker']).nullish(),
   /**
    * Whether to enable sentiment analysis.
    */
@@ -123,6 +190,17 @@ const assemblyaiTranscriptionModelOptionsSchema = z.object({
    * Whether to identify different speakers in the audio.
    */
   speakerLabels: z.boolean().nullish(),
+  /**
+   * Options for speaker diarization, e.g. a range of possible speakers.
+   */
+  speakerOptions: z
+    .object({
+      /** Minimum number of speakers expected in the audio file. */
+      minSpeakersExpected: z.number().int().nullish(),
+      /** Maximum number of speakers expected in the audio file. */
+      maxSpeakersExpected: z.number().int().nullish(),
+    })
+    .nullish(),
   /**
    * Number of speakers expected in the audio.
    */
@@ -143,6 +221,10 @@ const assemblyaiTranscriptionModelOptionsSchema = z.object({
    * Type of summary to generate.
    */
   summaryType: z.string().nullish(),
+  /**
+   * Sampling temperature (0-1) controlling randomness. Universal-3 Pro models.
+   */
+  temperature: z.number().min(0).max(1).nullish(),
   /**
    * Name of the authentication header for webhook requests.
    */
@@ -157,6 +239,10 @@ const assemblyaiTranscriptionModelOptionsSchema = z.object({
   webhookUrl: z.string().nullish(),
   /**
    * List of words to boost recognition for.
+   *
+   * @deprecated `wordBoost` is rejected by `universal-3-pro` /
+   * `universal-3-5-pro` and `slam-1` (it only works on `universal-2`/`best`).
+   * Use `keytermsPrompt` instead.
    */
   wordBoost: z.array(z.string()).nullish(),
 });
@@ -200,9 +286,41 @@ export class AssemblyAITranscriptionModel implements TranscriptionModelV3 {
       schema: assemblyaiTranscriptionModelOptionsSchema,
     });
 
-    const body: Omit<AssemblyAITranscriptionAPITypes, 'audio_url'> = {
-      speech_model: this.modelId,
-    };
+    const body: Omit<AssemblyAITranscriptionAPITypes, 'audio_url'> = {};
+
+    // The legacy `best` model is selected via the deprecated singular
+    // `speech_model` parameter. All other models (e.g. `universal-2`,
+    // `universal-3-pro`, `universal-3-5-pro`) are only accessible via the
+    // `speech_models` array and are rejected by `speech_model`.
+    // See https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model
+    if (this.modelId === 'best') {
+      body.speech_model = this.modelId as 'best';
+      warnings.push({
+        type: 'other',
+        message:
+          "The 'best' model is a legacy AssemblyAI model. Use 'universal-3-5-pro' instead. See documentation: https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model",
+      });
+    } else {
+      body.speech_models = [this.modelId];
+
+      // Forward-looking nudge: universal-3-5-pro is AssemblyAI's latest
+      // flagship and is set to replace universal-3-pro. Not a deprecation —
+      // both models still work — so this is an informational warning only.
+      if (
+        this.modelId === 'universal-3-pro' ||
+        this.modelId === 'universal-2'
+      ) {
+        const docsUrl =
+          'https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model';
+        warnings.push({
+          type: 'other',
+          message:
+            this.modelId === 'universal-3-pro'
+              ? `'universal-3-5-pro' is AssemblyAI's latest flagship model and is set to replace 'universal-3-pro'. See ${docsUrl}`
+              : `'universal-3-5-pro' is AssemblyAI's latest flagship model. See ${docsUrl}`,
+        });
+      }
+    }
 
     // Add provider-specific options
     if (assemblyaiOptions) {
@@ -252,6 +370,103 @@ export class AssemblyAITranscriptionModel implements TranscriptionModelV3 {
         assemblyaiOptions.webhookAuthHeaderValue ?? undefined;
       body.webhook_url = assemblyaiOptions.webhookUrl ?? undefined;
       body.word_boost = assemblyaiOptions.wordBoost ?? undefined;
+      body.keyterms_prompt = assemblyaiOptions.keytermsPrompt ?? undefined;
+      body.prompt = assemblyaiOptions.prompt ?? undefined;
+      body.temperature = assemblyaiOptions.temperature ?? undefined;
+      body.remove_audio_tags = assemblyaiOptions.removeAudioTags ?? undefined;
+      body.domain = assemblyaiOptions.domain ?? undefined;
+      body.redact_pii_return_unredacted =
+        assemblyaiOptions.redactPiiReturnUnredacted ?? undefined;
+      body.redact_static_entities =
+        assemblyaiOptions.redactStaticEntities ?? undefined;
+
+      if (assemblyaiOptions.speakerOptions) {
+        body.speaker_options = {
+          min_speakers_expected:
+            assemblyaiOptions.speakerOptions.minSpeakersExpected ?? undefined,
+          max_speakers_expected:
+            assemblyaiOptions.speakerOptions.maxSpeakersExpected ?? undefined,
+        };
+      }
+
+      if (assemblyaiOptions.languageDetectionOptions) {
+        body.language_detection_options = {
+          expected_languages:
+            assemblyaiOptions.languageDetectionOptions.expectedLanguages ??
+            undefined,
+          fallback_language:
+            assemblyaiOptions.languageDetectionOptions.fallbackLanguage ??
+            undefined,
+          code_switching:
+            assemblyaiOptions.languageDetectionOptions.codeSwitching ??
+            undefined,
+          code_switching_confidence_threshold:
+            assemblyaiOptions.languageDetectionOptions
+              .codeSwitchingConfidenceThreshold ?? undefined,
+        };
+      }
+
+      if (assemblyaiOptions.redactPiiAudioOptions) {
+        body.redact_pii_audio_options = {
+          return_redacted_no_speech_audio:
+            assemblyaiOptions.redactPiiAudioOptions
+              .returnRedactedNoSpeechAudio ?? undefined,
+          override_audio_redaction_method:
+            assemblyaiOptions.redactPiiAudioOptions
+              .overrideAudioRedactionMethod ?? undefined,
+        };
+      }
+
+      const deprecatedBoostOptions: string[] = [];
+      if (assemblyaiOptions.wordBoost != null) {
+        deprecatedBoostOptions.push('wordBoost');
+      }
+      if (assemblyaiOptions.boostParam != null) {
+        deprecatedBoostOptions.push('boostParam');
+      }
+      if (deprecatedBoostOptions.length > 0) {
+        warnings.push({
+          type: 'other',
+          message: `${deprecatedBoostOptions.join(', ')} ${
+            deprecatedBoostOptions.length > 1 ? 'are' : 'is'
+          } deprecated and rejected by 'universal-3-pro' / 'universal-3-5-pro' and 'slam-1'. Use 'keytermsPrompt' instead.`,
+        });
+      }
+
+      // The following options only take effect alongside a prerequisite
+      // option; without it AssemblyAI either rejects the request (400) or
+      // silently ignores the option. Warn rather than mutate user input.
+      if (
+        (assemblyaiOptions.redactPiiReturnUnredacted != null ||
+          assemblyaiOptions.redactStaticEntities != null) &&
+        !assemblyaiOptions.redactPii
+      ) {
+        warnings.push({
+          type: 'other',
+          message:
+            "'redactPiiReturnUnredacted' and 'redactStaticEntities' require 'redactPii' to be enabled; AssemblyAI rejects the request otherwise.",
+        });
+      }
+      if (
+        assemblyaiOptions.redactPiiAudioOptions != null &&
+        !assemblyaiOptions.redactPiiAudio
+      ) {
+        warnings.push({
+          type: 'other',
+          message:
+            "'redactPiiAudioOptions' only applies when 'redactPiiAudio' is enabled; it is otherwise ignored.",
+        });
+      }
+      if (
+        assemblyaiOptions.languageCode != null &&
+        assemblyaiOptions.languageDetection
+      ) {
+        warnings.push({
+          type: 'other',
+          message:
+            "'languageDetection' cannot be combined with an explicit 'languageCode'; AssemblyAI rejects requests that set both.",
+        });
+      }
     }
 
     return {
@@ -271,17 +486,22 @@ export class AssemblyAITranscriptionModel implements TranscriptionModelV3 {
     abortSignal?: AbortSignal,
   ): Promise<{
     transcript: z.infer<typeof assemblyaiTranscriptionResponseSchema>;
+    rawTranscript: unknown;
     responseHeaders: Record<string, string>;
   }> {
     const pollingInterval =
       this.config.pollingInterval ?? this.POLLING_INTERVAL_MS;
 
+    // Honor a caller-provided fetch (proxy, auth injection, tests) for the
+    // polling GETs, matching the upload/submit calls that use config.fetch.
+    const fetchImpl = this.config.fetch ?? globalThis.fetch;
+
     while (true) {
       if (abortSignal?.aborted) {
         throw new Error('Transcription request was aborted');
       }
 
-      const response = await fetch(
+      const response = await fetchImpl(
         this.config.url({
           path: `/v2/transcript/${transcriptId}`,
           modelId: this.modelId,
@@ -307,13 +527,14 @@ export class AssemblyAITranscriptionModel implements TranscriptionModelV3 {
         });
       }
 
-      const transcript = assemblyaiTranscriptionResponseSchema.parse(
-        await response.json(),
-      );
+      const rawTranscript = await response.json();
+      const transcript =
+        assemblyaiTranscriptionResponseSchema.parse(rawTranscript);
 
       if (transcript.status === 'completed') {
         return {
           transcript,
+          rawTranscript,
           responseHeaders: extractResponseHeaders(response),
         };
       }
@@ -374,29 +595,70 @@ export class AssemblyAITranscriptionModel implements TranscriptionModelV3 {
       fetch: this.config.fetch,
     });
 
-    const { transcript, responseHeaders } = await this.waitForCompletion(
-      submitResponse.id,
-      options.headers,
-      options.abortSignal,
-    );
+    const { transcript, rawTranscript, responseHeaders } =
+      await this.waitForCompletion(
+        submitResponse.id,
+        options.headers,
+        options.abortSignal,
+      );
+
+    // Surface diarization and audio-intelligence results that the AI SDK's
+    // `segments` shape can't represent, keyed under `assemblyai`. Presence is
+    // gated on the parsed transcript, but values are taken from the raw
+    // response so no fields are stripped by the schema.
+    //
+    // NOTE: timings inside these objects (e.g. `utterances[].start`) are in
+    // milliseconds, matching the AssemblyAI API — unlike the top-level
+    // `segments`, whose `startSecond`/`endSecond` are in seconds.
+    const raw = (rawTranscript ?? {}) as Record<string, unknown>;
+    const assemblyaiMetadata: Record<string, unknown> = {};
+    if (transcript.utterances != null) {
+      assemblyaiMetadata.utterances = raw.utterances;
+    }
+    if (transcript.sentiment_analysis_results != null) {
+      assemblyaiMetadata.sentimentAnalysisResults =
+        raw.sentiment_analysis_results;
+    }
+    if (transcript.entities != null) {
+      assemblyaiMetadata.entities = raw.entities;
+    }
+    if (transcript.content_safety_labels != null) {
+      assemblyaiMetadata.contentSafetyLabels = raw.content_safety_labels;
+    }
+    if (transcript.iab_categories_result != null) {
+      assemblyaiMetadata.iabCategoriesResult = raw.iab_categories_result;
+    }
+    if (transcript.auto_highlights_result != null) {
+      assemblyaiMetadata.autoHighlightsResult = raw.auto_highlights_result;
+    }
+
+    const lastWordEndMs = transcript.words?.at(-1)?.end;
 
     return {
       text: transcript.text ?? '',
+      // AssemblyAI returns word timings in milliseconds; the AI SDK reports
+      // segment timings in seconds.
       segments:
         transcript.words?.map(word => ({
           text: word.text,
-          startSecond: word.start,
-          endSecond: word.end,
+          startSecond: word.start / 1000,
+          endSecond: word.end / 1000,
         })) ?? [],
       language: transcript.language_code ?? undefined,
       durationInSeconds:
-        transcript.audio_duration ?? transcript.words?.at(-1)?.end ?? undefined,
+        transcript.audio_duration ??
+        (lastWordEndMs != null ? lastWordEndMs / 1000 : undefined),
       warnings,
+      ...(Object.keys(assemblyaiMetadata).length > 0 && {
+        providerMetadata: {
+          assemblyai: assemblyaiMetadata,
+        } as SharedV3ProviderMetadata,
+      }),
       response: {
         timestamp: currentDate,
         modelId: this.modelId,
         headers: responseHeaders, // Headers from final GET request
-        body: transcript, // Raw response from final GET request
+        body: rawTranscript, // Full raw response from final GET request
       },
     };
   }
@@ -411,20 +673,65 @@ const assemblyaiSubmitResponseSchema = z.object({
   status: z.enum(['queued', 'processing', 'completed', 'error']),
 });
 
+const assemblyaiWordSchema = z.object({
+  start: z.number(),
+  end: z.number(),
+  text: z.string(),
+  confidence: z.number().nullish(),
+  // Speaker label (e.g. 'A', 'B') when speaker diarization is enabled, else null.
+  speaker: z.string().nullish(),
+  channel: z.string().nullish(),
+});
+
 const assemblyaiTranscriptionResponseSchema = z.object({
   id: z.string(),
   status: z.enum(['queued', 'processing', 'completed', 'error']),
   text: z.string().nullish(),
   language_code: z.string().nullish(),
-  words: z
+  speech_model_used: z.string().nullish(),
+  words: z.array(assemblyaiWordSchema).nullish(),
+  // Speaker-diarized utterances (present when `speaker_labels` is enabled).
+  utterances: z
     .array(
       z.object({
         start: z.number(),
         end: z.number(),
         text: z.string(),
+        confidence: z.number().nullish(),
+        speaker: z.string().nullish(),
+        channel: z.string().nullish(),
+        words: z.array(assemblyaiWordSchema).nullish(),
+      }),
+    )
+    .nullish(),
+  // Audio-intelligence results, present only when the matching feature is
+  // enabled. Kept intentionally permissive (the full structures are also
+  // available on the raw `response.body`).
+  sentiment_analysis_results: z
+    .array(
+      z.object({
+        text: z.string(),
+        start: z.number().nullish(),
+        end: z.number().nullish(),
+        sentiment: z.string(),
+        confidence: z.number().nullish(),
+        speaker: z.string().nullish(),
+      }),
+    )
+    .nullish(),
+  entities: z
+    .array(
+      z.object({
+        entity_type: z.string(),
+        text: z.string(),
+        start: z.number().nullish(),
+        end: z.number().nullish(),
       }),
     )
     .nullish(),
+  content_safety_labels: z.record(z.string(), z.any()).nullish(),
+  iab_categories_result: z.record(z.string(), z.any()).nullish(),
+  auto_highlights_result: z.record(z.string(), z.any()).nullish(),
   audio_duration: z.number().nullish(),
   error: z.string().nullish(),
 });
diff --git a/packages/assemblyai/src/assemblyai-transcription-settings.ts b/packages/assemblyai/src/assemblyai-transcription-settings.ts
index f83e8d2e297a..ef3ba0c1ba1a 100644
--- a/packages/assemblyai/src/assemblyai-transcription-settings.ts
+++ b/packages/assemblyai/src/assemblyai-transcription-settings.ts
@@ -1 +1,15 @@
-export type AssemblyAITranscriptionModelId = 'best' | 'nano';
+/**
+ * Legacy AssemblyAI speech model, sent via the deprecated singular
+ * `speech_model` request parameter.
+ *
+ * @deprecated Use `universal-3-5-pro` instead.
+ * @see https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model
+ */
+export type AssemblyAIDeprecatedTranscriptionModelId = 'best';
+
+export type AssemblyAITranscriptionModelId =
+  | 'universal-2'
+  | 'universal-3-pro'
+  | 'universal-3-5-pro'
+  | AssemblyAIDeprecatedTranscriptionModelId
+  | (string & {});