From a292b8c4ddd6996784a0407ee36e8d31fe934ea9 Mon Sep 17 00:00:00 2001
From: hm21 <alex.frei@waio.ch>
Date: Thu, 11 Jun 2026 09:37:13 +0200
Subject: [PATCH] feat(audio): add pitch-preserving playback speed support for
 audio extraction across platforms

---
 CHANGELOG.md                                  |   3 +
 .../src/features/audio/ExtractAudio.kt        | 206 +++++++++++++++++-
 .../src/features/audio/PcmSpeedProcessor.kt   |  76 +++++++
 .../src/features/audio/WavFileWriter.kt       |  89 +++++++-
 .../audio/models/AudioExtractConfig.kt        |   4 +
 .../shared/features/audio/ExtractAudio.swift  | 106 +++++++--
 .../audio/models/AudioExtractConfig.swift     |  11 +-
 .../integration_test/audio_extract_test.dart  |  64 ++++++
 .../audio/audio_extract_example_page.dart     |  32 +++
 .../audio/audio_extract_configs_model.dart    |  21 +-
 pubspec.yaml                                  |   2 +-
 11 files changed, 575 insertions(+), 39 deletions(-)
 create mode 100644 android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/PcmSpeedProcessor.kt

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab9c5ecf..17124535 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,6 @@
+## 1.18.0
+- **FEAT**(android, iOS, macOS): Add `speed` to `AudioExtractConfigs` for pitch-preserving playback-speed changes during audio extraction (e.g. `2.0` for double speed, `0.5` for half). WAV applies the time-stretch directly on the decoded PCM (Android `SonicAudioProcessor`; AVFoundation `AVAssetReaderAudioMixOutput` with the spectral time-pitch algorithm), while compressed formats are re-encoded — Android via a Media3 `Transformer`, Darwin via `AVAssetExportPresetAppleM4A` with a scaled time range. Note: on Apple platforms a non-`1.0` speed re-encodes to AAC/M4A, so `caf` output falls back to M4A content.
+
 ## 1.17.6
 - **FIX**(iOS, macOS): Fix unstable/glitchy audio on reversed clips inside multi-clip timelines. `AudioReverser` now preserves the source audio format (sample rate and channel count) instead of forcing 44.1 kHz stereo, so a reversed clip's audio matches its untouched neighbours and AVFoundation no longer has to reconcile mixed formats within a single composition audio track during looped playback. The reversed segment is also written as an AVFoundation-authored CAF/PCM file (replacing the hand-authored WAV) and inserted at its decoded duration without padding, avoiding encoder priming artifacts and audio-timing drift in the exported video.
 
diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt
index d3df4adc..ac1d4b90 100644
--- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt
+++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt
@@ -5,14 +5,28 @@ import android.media.MediaCodec
 import android.media.MediaExtractor
 import android.media.MediaFormat
 import android.media.MediaMuxer
+import android.net.Uri
 import android.os.Handler
 import android.os.Looper
+import androidx.media3.common.MediaItem
+import androidx.media3.common.MimeTypes
+import androidx.media3.common.audio.AudioProcessor
+import androidx.media3.common.audio.SonicAudioProcessor
+import androidx.media3.common.util.UnstableApi
+import androidx.media3.transformer.Composition
+import androidx.media3.transformer.EditedMediaItem
+import androidx.media3.transformer.Effects
+import androidx.media3.transformer.ExportException
+import androidx.media3.transformer.ExportResult
+import androidx.media3.transformer.ProgressHolder
+import androidx.media3.transformer.Transformer
 import ch.waio.pro_video_editor.src.features.audio.models.AudioExtractConfig
 import ch.waio.pro_video_editor.src.features.audio.models.AudioExtractJobHandle
 import ch.waio.pro_video_editor.src.shared.logging.PluginLog as Log
 import java.io.File
 import java.nio.ByteBuffer
 import java.util.concurrent.atomic.AtomicBoolean
+import java.util.concurrent.atomic.AtomicReference
 
 /**
  * Exception thrown when no audio track is found in the video file.
@@ -31,7 +45,13 @@ class NoAudioTrackException(message: String) : Exception(message)
  *
  * For WAV format, uses custom WAV file writer with PCM encoding.
  * For other formats, uses Android MediaExtractor and MediaMuxer.
+ *
+ * When a playback speed other than 1.0 is requested, compressed formats are
+ * re-encoded through a Media3 [Transformer] (with [SonicAudioProcessor] for a
+ * pitch-preserving time-stretch), while WAV applies the speed change directly
+ * on the decoded PCM in [WavFileWriter].
  */
+@UnstableApi
 class ExtractAudio(private val context: Context) {
 
     companion object {
@@ -58,12 +78,18 @@ class ExtractAudio(private val context: Context) {
         onComplete: (ByteArray?) -> Unit,
         onError: (Throwable) -> Unit
     ): AudioExtractJobHandle {
-        return if (config.format.lowercase() == "wav") {
-            // WAV format requires special handling
-            extractToWav(config, onProgress, onComplete, onError)
-        } else {
-            // Use MediaMuxer for other formats
-            extractWithMuxer(config, onProgress, onComplete, onError)
+        val applySpeed = config.speed > 0f && config.speed != 1.0f
+        return when {
+            // WAV applies the speed change on the decoded PCM (see WavFileWriter).
+            config.format.lowercase() == "wav" ->
+                extractToWav(config, onProgress, onComplete, onError)
+            // Compressed formats can't be re-timed by copying samples, so a
+            // speed change requires re-encoding through Media3 Transformer.
+            applySpeed ->
+                extractWithSpeed(config, onProgress, onComplete, onError)
+            // Fast path: copy the compressed audio samples unchanged.
+            else ->
+                extractWithMuxer(config, onProgress, onComplete, onError)
         }
     }
 
@@ -154,8 +180,9 @@ class ExtractAudio(private val context: Context) {
                     throw IllegalArgumentException("endUs must be greater than startUs")
                 }
 
-                // Create WAV writer (handles both compressed and PCM audio)
-                wavWriter = WavFileWriter(outputFile)
+                // Create WAV writer (handles both compressed and PCM audio,
+                // and applies a pitch-preserving speed change when requested)
+                wavWriter = WavFileWriter(outputFile, config.speed)
 
                 mainHandler.post { onProgress(0.0) }
                 
@@ -430,6 +457,169 @@ class ExtractAudio(private val context: Context) {
         }
     }
 
+    /**
+     * Extracts audio while applying a pitch-preserving playback speed change.
+     *
+     * Compressed output formats (AAC/M4A/MP3) can't be re-timed by simply
+     * copying samples, so the audio is re-encoded with a Media3 [Transformer].
+     * A [SonicAudioProcessor] performs the time-stretch and the result is muxed
+     * into an MP4/M4A container (matching the muxer path's output container).
+     */
+    private fun extractWithSpeed(
+        config: AudioExtractConfig,
+        onProgress: (Double) -> Unit,
+        onComplete: (ByteArray?) -> Unit,
+        onError: (Throwable) -> Unit
+    ): AudioExtractJobHandle {
+        val mainHandler = Handler(Looper.getMainLooper())
+        val shouldStopPolling = AtomicBoolean(false)
+        val canceled = AtomicBoolean(false)
+        val transformerRef = AtomicReference<Transformer?>(null)
+
+        val outputFile = if (config.outputPath != null) {
+            File(config.outputPath)
+        } else {
+            File(
+                context.cacheDir,
+                "audio_output_${System.currentTimeMillis()}.${config.getExtension()}"
+            )
+        }
+
+        fun cleanupOnFailure() {
+            if (config.outputPath == null && outputFile.exists()) {
+                outputFile.delete()
+            }
+        }
+
+        // Run the audio-track pre-check off the main thread, then start the
+        // Transformer (which must run on a thread with a Looper) on the main
+        // thread, mirroring the render pipeline.
+        Thread {
+            var probe: MediaExtractor? = null
+            val hasAudio: Boolean
+            try {
+                probe = MediaExtractor()
+                probe.setDataSource(config.inputPath)
+                hasAudio = findAudioTrack(probe) >= 0
+            } catch (e: Exception) {
+                mainHandler.post { onError(e) }
+                return@Thread
+            } finally {
+                try {
+                    probe?.release()
+                } catch (e: Exception) {
+                    Log.w(TAG, "Error releasing probe extractor: ${e.message}")
+                }
+            }
+
+            if (!hasAudio) {
+                mainHandler.post {
+                    onError(NoAudioTrackException("No audio track found in video file"))
+                }
+                return@Thread
+            }
+
+            mainHandler.post {
+                if (canceled.get()) return@post
+                try {
+                    val mediaItemBuilder = MediaItem.Builder()
+                        .setUri(Uri.fromFile(File(config.inputPath)))
+
+                    if (config.startUs != null || config.endUs != null) {
+                        val clipping = MediaItem.ClippingConfiguration.Builder().apply {
+                            config.startUs?.let { setStartPositionUs(it) }
+                            config.endUs?.let { setEndPositionUs(it) }
+                        }.build()
+                        mediaItemBuilder.setClippingConfiguration(clipping)
+                    }
+
+                    val sonic = SonicAudioProcessor().apply { setSpeed(config.speed) }
+                    val editedMediaItem = EditedMediaItem.Builder(mediaItemBuilder.build())
+                        .setRemoveVideo(true)
+                        .setEffects(Effects(listOf<AudioProcessor>(sonic), emptyList()))
+                        .build()
+
+                    val transformer = Transformer.Builder(context)
+                        .setAudioMimeType(MimeTypes.AUDIO_AAC)
+                        .addListener(object : Transformer.Listener {
+                            override fun onCompleted(
+                                composition: Composition,
+                                result: ExportResult
+                            ) {
+                                shouldStopPolling.set(true)
+                                onProgress(1.0)
+                                try {
+                                    if (config.outputPath != null) {
+                                        onComplete(null)
+                                    } else {
+                                        onComplete(outputFile.readBytes())
+                                    }
+                                } catch (e: Exception) {
+                                    onError(e)
+                                } finally {
+                                    if (config.outputPath == null) outputFile.delete()
+                                }
+                            }
+
+                            override fun onError(
+                                composition: Composition,
+                                result: ExportResult,
+                                exception: ExportException
+                            ) {
+                                shouldStopPolling.set(true)
+                                cleanupOnFailure()
+                                onError(exception)
+                            }
+                        })
+                        .build()
+
+                    transformerRef.set(transformer)
+                    if (canceled.get()) {
+                        transformer.cancel()
+                        cleanupOnFailure()
+                        return@post
+                    }
+
+                    // Transformer fails if the destination already exists.
+                    if (outputFile.exists()) outputFile.delete()
+
+                    onProgress(0.0)
+                    transformer.start(editedMediaItem, outputFile.absolutePath)
+
+                    // Poll for progress until the export finishes.
+                    val progressHolder = ProgressHolder()
+                    mainHandler.post(object : Runnable {
+                        override fun run() {
+                            if (shouldStopPolling.get()) return
+                            val progressState = transformer.getProgress(progressHolder)
+                            if (progressHolder.progress >= 0) {
+                                onProgress(progressHolder.progress / 100.0)
+                            }
+                            if (!shouldStopPolling.get() &&
+                                progressState != Transformer.PROGRESS_STATE_NOT_STARTED
+                            ) {
+                                mainHandler.postDelayed(this, 200)
+                            }
+                        }
+                    })
+                } catch (e: Exception) {
+                    Log.e(TAG, "Error extracting audio with speed: ${e.message}", e)
+                    cleanupOnFailure()
+                    onError(e)
+                }
+            }
+        }.start()
+
+        return AudioExtractJobHandle {
+            canceled.set(true)
+            shouldStopPolling.set(true)
+            mainHandler.post {
+                transformerRef.get()?.cancel()
+                cleanupOnFailure()
+            }
+        }
+    }
+
     /**
      * Finds the first audio track in the media file.
      *
diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/PcmSpeedProcessor.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/PcmSpeedProcessor.kt
new file mode 100644
index 00000000..217c04f3
--- /dev/null
+++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/PcmSpeedProcessor.kt
@@ -0,0 +1,76 @@
+package ch.waio.pro_video_editor.src.features.audio
+
+import androidx.media3.common.C
+import androidx.media3.common.audio.AudioProcessor
+import androidx.media3.common.audio.SonicAudioProcessor
+import androidx.media3.common.util.UnstableApi
+import java.io.ByteArrayOutputStream
+import java.nio.ByteBuffer
+import java.nio.ByteOrder
+
+/**
+ * Applies a pitch-preserving playback speed change to a stream of 16-bit PCM
+ * audio using Media3's [SonicAudioProcessor].
+ *
+ * The processor is fed interleaved little-endian 16-bit PCM via [process] and
+ * returns the time-stretched output. [drain] must be called once at the end of
+ * the stream to flush any samples buffered internally by Sonic.
+ *
+ * This mirrors the speed handling used in the video render pipeline
+ * (see `applyPlaybackSpeed`), keeping audio extraction consistent: a value of
+ * `2.0` plays twice as fast while keeping the original pitch.
+ */
+@UnstableApi
+class PcmSpeedProcessor(
+    speed: Float,
+    sampleRate: Int,
+    channelCount: Int
+) {
+    private val sonic = SonicAudioProcessor().apply {
+        setSpeed(speed)
+        // Pitch is intentionally left at 1.0 so only the duration changes.
+        configure(
+            AudioProcessor.AudioFormat(sampleRate, channelCount, C.ENCODING_PCM_16BIT)
+        )
+        // SonicAudioProcessor only overrides flush(StreamMetadata); the no-arg
+        // flush() default throws, so call the StreamMetadata overload directly.
+        flush(AudioProcessor.StreamMetadata.DEFAULT)
+    }
+
+    /**
+     * Feeds a chunk of 16-bit PCM and returns whatever output is currently
+     * available (may be empty while Sonic buffers internally).
+     */
+    fun process(pcm: ByteArray): ByteArray {
+        sonic.queueInput(ByteBuffer.wrap(pcm).order(ByteOrder.LITTLE_ENDIAN))
+        return collectOutput()
+    }
+
+    /**
+     * Signals end-of-stream and returns any remaining buffered output.
+     */
+    fun drain(): ByteArray {
+        sonic.queueEndOfStream()
+        val output = ByteArrayOutputStream()
+        output.write(collectOutput())
+        while (!sonic.isEnded) {
+            val chunk = collectOutput()
+            if (chunk.isEmpty()) break
+            output.write(chunk)
+        }
+        sonic.reset()
+        return output.toByteArray()
+    }
+
+    private fun collectOutput(): ByteArray {
+        val output = ByteArrayOutputStream()
+        var buffer = sonic.output
+        while (buffer.hasRemaining()) {
+            val chunk = ByteArray(buffer.remaining())
+            buffer.get(chunk)
+            output.write(chunk)
+            buffer = sonic.output
+        }
+        return output.toByteArray()
+    }
+}
diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt
index 417dd2ec..0af52a11 100644
--- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt
+++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt
@@ -4,6 +4,7 @@ import android.media.AudioFormat
 import android.media.MediaCodec
 import android.media.MediaExtractor
 import android.media.MediaFormat
+import androidx.media3.common.util.UnstableApi
 import java.io.File
 import java.io.FileOutputStream
 import java.io.RandomAccessFile
@@ -17,9 +18,16 @@ import java.nio.ByteOrder
  * - Writing a proper RIFF/WAVE header
  * - Decoding compressed audio to PCM samples
  * - Writing PCM data in the correct format
+ * - Optionally applying a pitch-preserving playback speed change
  * - Updating the file size fields after writing
+ *
+ * @param outputFile Destination WAV file.
+ * @param speed Playback speed multiplier (1.0 = original). Values other than
+ *   1.0 time-stretch the audio while keeping the original pitch. Only applied
+ *   to 16-bit PCM output.
  */
-class WavFileWriter(private val outputFile: File) {
+@UnstableApi
+class WavFileWriter(private val outputFile: File, private val speed: Float = 1.0f) {
 
     companion object {
         // Magic numbers for WAV format
@@ -39,6 +47,50 @@ class WavFileWriter(private val outputFile: File) {
     private var isFloatPcm: Boolean = false
     private var totalDataSize: Long = 0
 
+    /** Active when a pitch-preserving speed change is being applied. */
+    private var speedProcessor: PcmSpeedProcessor? = null
+
+    /**
+     * Initializes the speed processor once the PCM format is known.
+     *
+     * Speed is only applied to 16-bit PCM (the format this writer always emits
+     * after float conversion); 8-bit sources are written unchanged.
+     */
+    private fun maybeInitSpeedProcessor() {
+        if (speed != 1.0f && bitsPerSample == 16 && speedProcessor == null) {
+            speedProcessor = PcmSpeedProcessor(speed, sampleRate, numChannels)
+        }
+    }
+
+    /**
+     * Writes a chunk of 16-bit PCM, routing it through the speed processor when
+     * a speed change is active. Accumulates the number of bytes actually
+     * written so the WAV header reflects the (re-timed) output length.
+     */
+    private fun writePcm(outputStream: FileOutputStream, pcm: ByteArray) {
+        val processor = speedProcessor
+        if (processor == null) {
+            outputStream.write(pcm)
+            totalDataSize += pcm.size
+        } else {
+            val processed = processor.process(pcm)
+            if (processed.isNotEmpty()) {
+                outputStream.write(processed)
+                totalDataSize += processed.size
+            }
+        }
+    }
+
+    /** Flushes any samples buffered inside the speed processor. */
+    private fun flushSpeedProcessor(outputStream: FileOutputStream) {
+        val processor = speedProcessor ?: return
+        val tail = processor.drain()
+        if (tail.isNotEmpty()) {
+            outputStream.write(tail)
+            totalDataSize += tail.size
+        }
+    }
+
     /**
      * Extracts audio from a video file and writes it as a WAV file.
      *
@@ -162,6 +214,7 @@ class WavFileWriter(private val outputFile: File) {
             var outputEos = false
 
             onProgress(0.0)
+            maybeInitSpeedProcessor()
 
             while (!outputEos && !shouldStop()) {
                 if (!inputEos) {
@@ -280,6 +333,19 @@ class WavFileWriter(private val outputFile: File) {
                             
                                 raf.write(headerBytes)
                             }
+
+                            // Re-create the speed processor for the new PCM
+                            // format, flushing whatever it had buffered first.
+                            val processor = speedProcessor
+                            if (processor != null) {
+                                val tail = processor.drain()
+                                if (tail.isNotEmpty()) {
+                                    outputStream.write(tail)
+                                    totalDataSize += tail.size
+                                }
+                                speedProcessor = null
+                                maybeInitSpeedProcessor()
+                            }
                         }
                     }
                     outputBufferId >= 0 -> {
@@ -306,15 +372,13 @@ class WavFileWriter(private val outputFile: File) {
                                     val intValue = (floatValue.coerceIn(-1.0f, 1.0f) * 32767.0f).toInt().toShort()
                                     int16Buffer.putShort(intValue)
                                 }
-                                
-                                outputStream.write(int16Buffer.array())
-                                totalDataSize += int16Buffer.array().size
+
+                                writePcm(outputStream, int16Buffer.array())
                             } else {
                                 // Write PCM data directly (already in correct format)
                                 val pcmData = ByteArray(bufferInfo.size)
                                 decoderOutputBuffer.get(pcmData)
-                                outputStream.write(pcmData)
-                                totalDataSize += pcmData.size
+                                writePcm(outputStream, pcmData)
                             }
                         }
 
@@ -323,6 +387,9 @@ class WavFileWriter(private val outputFile: File) {
                 }
             }
 
+            // Flush any samples buffered by the speed processor.
+            flushSpeedProcessor(outputStream)
+
             outputStream.flush()
             outputStream.close()
             outputStream = null
@@ -402,6 +469,7 @@ class WavFileWriter(private val outputFile: File) {
             var currentTimeUs = startUs
 
             onProgress(0.0)
+            maybeInitSpeedProcessor()
 
             while (!shouldStop()) {
                 buffer.clear()
@@ -434,11 +502,9 @@ class WavFileWriter(private val outputFile: File) {
                         val intValue = (floatValue.coerceIn(-1.0f, 1.0f) * 32767.0f).toInt().toShort()
                         int16Buffer.putShort(intValue)
                     }
-                    outputStream.write(int16Buffer.array())
-                    totalDataSize += int16Buffer.array().size
+                    writePcm(outputStream, int16Buffer.array())
                 } else {
-                    outputStream.write(pcmData)
-                    totalDataSize += sampleSize
+                    writePcm(outputStream, pcmData)
                 }
 
                 currentTimeUs = presentationTimeUs
@@ -450,6 +516,9 @@ class WavFileWriter(private val outputFile: File) {
                 extractor.advance()
             }
 
+            // Flush any samples buffered by the speed processor.
+            flushSpeedProcessor(outputStream)
+
             outputStream.flush()
             outputStream.close()
             outputStream = null
diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/models/AudioExtractConfig.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/models/AudioExtractConfig.kt
index ad7915c5..6dc68f76 100644
--- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/models/AudioExtractConfig.kt
+++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/models/AudioExtractConfig.kt
@@ -10,6 +10,7 @@ import io.flutter.plugin.common.MethodCall
  * @property format Output audio format (mp3, aac, wav, m4a, ogg)
  * @property startUs Optional start time in microseconds for trimming
  * @property endUs Optional end time in microseconds for trimming
+ * @property speed Playback speed multiplier (1.0 = original speed, pitch preserved)
  * @property outputPath Optional output file path (null = return bytes)
  */
 data class AudioExtractConfig(
@@ -18,6 +19,7 @@ data class AudioExtractConfig(
     val format: String,
     val startUs: Long?,
     val endUs: Long?,
+    val speed: Float,
     val outputPath: String?
 ) {
     companion object {
@@ -37,6 +39,7 @@ data class AudioExtractConfig(
             val format = call.argument<String>("format") ?: "mp3"
             val startUs = call.argument<Number>("startTime")?.toLong()
             val endUs = call.argument<Number>("endTime")?.toLong()
+            val speed = call.argument<Number>("speed")?.toFloat()?.takeIf { it > 0f } ?: 1.0f
             val outputPath = call.argument<String>("outputPath")
 
             return AudioExtractConfig(
@@ -45,6 +48,7 @@ data class AudioExtractConfig(
                 format = format,
                 startUs = startUs,
                 endUs = endUs,
+                speed = speed,
                 outputPath = outputPath
             )
         }
diff --git a/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/ExtractAudio.swift b/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/ExtractAudio.swift
index 188a7b16..6c8183e9 100644
--- a/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/ExtractAudio.swift
+++ b/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/ExtractAudio.swift
@@ -89,6 +89,8 @@ class ExtractAudio {
         let sourceURL = URL(fileURLWithPath: config.inputPath)
         let asset = AVURLAsset(url: sourceURL)
 
+        let applySpeed = config.speed > 0 && config.speed != 1.0
+
         /// https://developer.apple.com/documentation/dispatch/dispatchsemaphore
         let semaphore = DispatchSemaphore(value: 0)
         var loadError: Error?
@@ -123,7 +125,10 @@ class ExtractAudio {
         try? FileManager.default.removeItem(at: outputURL)
 
         let fileExtension = outputURL.pathExtension.lowercased()
-        let outputFileType: AVFileType = (fileExtension == "caf") ? .caf : .m4a
+        // A speed change forces a re-encode through AVAssetExportPresetAppleM4A,
+        // which only supports the .m4a (AAC) output file type.
+        let outputFileType: AVFileType =
+          applySpeed ? .m4a : ((fileExtension == "caf") ? .caf : .m4a)
 
         let audioTracks = asset.tracks(withMediaType: .audio)
         guard let audioTrack = audioTracks.first else {
@@ -162,10 +167,22 @@ class ExtractAudio {
 
         try compositionAudioTrack.insertTimeRange(sourceTimeRange, of: audioTrack, at: .zero)
 
+        // Apply a pitch-preserving speed change by scaling the inserted range.
+        if applySpeed {
+          let scaled = CMTimeMultiplyByFloat64(
+            sourceTimeRange.duration, multiplier: 1.0 / config.speed)
+          compositionAudioTrack.scaleTimeRange(
+            CMTimeRange(start: .zero, duration: sourceTimeRange.duration), toDuration: scaled)
+        }
+
+        // Passthrough can't re-time samples, so a speed change requires a
+        // re-encoding preset.
+        let presetName =
+          applySpeed ? AVAssetExportPresetAppleM4A : AVAssetExportPresetPassthrough
         guard
           let session = AVAssetExportSession(
             asset: composition,
-            presetName: AVAssetExportPresetPassthrough
+            presetName: presetName
           )
         else {
           throw NSError(
@@ -176,6 +193,10 @@ class ExtractAudio {
 
         session.outputURL = outputURL
         session.outputFileType = outputFileType
+        if applySpeed {
+          // Preserve the original pitch while time-stretching.
+          session.audioTimePitchAlgorithm = .spectral
+        }
 
         // Sync context access back to main thread to cleanly initialize timers
         DispatchQueue.main.async {
@@ -379,9 +400,7 @@ class ExtractAudio {
           timeRange = audioTrack.timeRange
         }
 
-        let reader = try AVAssetReader(asset: asset)
-        assetReader = reader
-        reader.timeRange = timeRange
+        let applySpeed = config.speed > 0 && config.speed != 1.0
 
         let readerOutputSettings: [String: Any] = [
           AVFormatIDKey: kAudioFormatLinearPCM,
@@ -391,16 +410,73 @@ class ExtractAudio {
           AVLinearPCMIsNonInterleaved: false,
         ]
 
-        let readerOutput = AVAssetReaderTrackOutput(
-          track: audioTrack, outputSettings: readerOutputSettings)
-        readerOutput.alwaysCopiesSampleData = false
+        let reader: AVAssetReader
+        let readerOutput: AVAssetReaderOutput
+        // Duration/start used only for progress reporting.
+        let effectiveDuration: CMTime
+        let progressStartSeconds: Double
+
+        if applySpeed {
+          // Build a composition holding only the requested range, then scale it
+          // to apply the speed change. Reading it through an audio-mix output
+          // lets us request a pitch-preserving time-stretch.
+          let composition = AVMutableComposition()
+          guard
+            let compositionAudioTrack = composition.addMutableTrack(
+              withMediaType: .audio,
+              preferredTrackID: kCMPersistentTrackID_Invalid
+            )
+          else {
+            throw NSError(
+              domain: "ExtractAudio", code: -14,
+              userInfo: [NSLocalizedDescriptionKey: "Failed to create composition audio track"])
+          }
 
-        guard reader.canAdd(readerOutput) else {
-          throw NSError(
-            domain: "ExtractAudio", code: -7,
-            userInfo: [NSLocalizedDescriptionKey: "Cannot add reader output"])
+          try compositionAudioTrack.insertTimeRange(timeRange, of: audioTrack, at: .zero)
+          let scaled = CMTimeMultiplyByFloat64(timeRange.duration, multiplier: 1.0 / config.speed)
+          compositionAudioTrack.scaleTimeRange(
+            CMTimeRange(start: .zero, duration: timeRange.duration), toDuration: scaled)
+
+          let compositionReader = try AVAssetReader(asset: composition)
+          let mixOutput = AVAssetReaderAudioMixOutput(
+            audioTracks: composition.tracks(withMediaType: .audio),
+            audioSettings: readerOutputSettings)
+          mixOutput.audioTimePitchAlgorithm = .spectral
+          mixOutput.alwaysCopiesSampleData = false
+
+          guard compositionReader.canAdd(mixOutput) else {
+            throw NSError(
+              domain: "ExtractAudio", code: -7,
+              userInfo: [NSLocalizedDescriptionKey: "Cannot add reader output"])
+          }
+          compositionReader.add(mixOutput)
+
+          reader = compositionReader
+          readerOutput = mixOutput
+          effectiveDuration = scaled
+          // Composition samples start at zero.
+          progressStartSeconds = 0.0
+        } else {
+          let trackReader = try AVAssetReader(asset: asset)
+          trackReader.timeRange = timeRange
+
+          let trackOutput = AVAssetReaderTrackOutput(
+            track: audioTrack, outputSettings: readerOutputSettings)
+          trackOutput.alwaysCopiesSampleData = false
+
+          guard trackReader.canAdd(trackOutput) else {
+            throw NSError(
+              domain: "ExtractAudio", code: -7,
+              userInfo: [NSLocalizedDescriptionKey: "Cannot add reader output"])
+          }
+          trackReader.add(trackOutput)
+
+          reader = trackReader
+          readerOutput = trackOutput
+          effectiveDuration = timeRange.duration
+          progressStartSeconds = CMTimeGetSeconds(timeRange.start)
         }
-        reader.add(readerOutput)
+        assetReader = reader
 
         let fm = FileManager.default
         guard fm.createFile(atPath: outputURL.path, contents: nil) else {
@@ -430,7 +506,7 @@ class ExtractAudio {
 
         DispatchQueue.main.async { onProgress(0.0) }
 
-        let totalDuration = CMTimeGetSeconds(timeRange.duration)
+        let totalDuration = CMTimeGetSeconds(effectiveDuration)
         var totalPcmBytes: Int64 = 0
 
         while let sampleBuffer = readerOutput.copyNextSampleBuffer() {
@@ -456,7 +532,7 @@ class ExtractAudio {
           }
 
           let currentTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
-          let elapsed = CMTimeGetSeconds(currentTime) - CMTimeGetSeconds(timeRange.start)
+          let elapsed = CMTimeGetSeconds(currentTime) - progressStartSeconds
           let progress = totalDuration > 0 ? min(max(elapsed / totalDuration, 0.0), 0.99) : 0.0
           DispatchQueue.main.async { onProgress(progress) }
         }
diff --git a/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/models/AudioExtractConfig.swift b/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/models/AudioExtractConfig.swift
index 278e22da..854b6f28 100644
--- a/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/models/AudioExtractConfig.swift
+++ b/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/models/AudioExtractConfig.swift
@@ -19,10 +19,13 @@ struct AudioExtractConfig {
     
     /// Optional start time in microseconds for trimming
     let startUs: Int64?
-    
+
     /// Optional end time in microseconds for trimming
     let endUs: Int64?
-    
+
+    /// Playback speed multiplier (1.0 = original speed, pitch preserved)
+    let speed: Double
+
     /// Optional output file path (nil = return bytes)
     let outputPath: String?
     
@@ -41,8 +44,9 @@ struct AudioExtractConfig {
         
         let startUs = args["startTime"] as? Int64
         let endUs = args["endTime"] as? Int64
+        let speed = (args["speed"] as? NSNumber)?.doubleValue ?? 1.0
         let outputPath = args["outputPath"] as? String
-        
+
         return AudioExtractConfig(
             id: id,
             inputPath: inputPath,
@@ -50,6 +54,7 @@ struct AudioExtractConfig {
             format: format,
             startUs: startUs,
             endUs: endUs,
+            speed: speed > 0 ? speed : 1.0,
             outputPath: outputPath
         )
     }
diff --git a/example/integration_test/audio_extract_test.dart b/example/integration_test/audio_extract_test.dart
index 727089d7..3235a48b 100644
--- a/example/integration_test/audio_extract_test.dart
+++ b/example/integration_test/audio_extract_test.dart
@@ -140,6 +140,70 @@ void main() {
       },
       skip: skipPlatform || !isFormatSupported(format),
     );
+
+    testWidgets(
+      'extractAudio with $format at 2x speed produces valid, shorter output',
+      (tester) async {
+        if (!isFormatSupported(format)) return;
+
+        final directory = await getTemporaryDirectory();
+        final ts = DateTime.now().millisecondsSinceEpoch;
+        final normalPath =
+            '${directory.path}/test_audio_1x_$ts.${format.extension}';
+        final fastPath =
+            '${directory.path}/test_audio_2x_$ts.${format.extension}';
+
+        await pve.extractAudioToFile(
+          normalPath,
+          AudioExtractConfigs(video: testVideo, format: format),
+        );
+        await pve.extractAudioToFile(
+          fastPath,
+          AudioExtractConfigs(video: testVideo, format: format, speed: 2.0),
+        );
+
+        final normalFile = File(normalPath);
+        final fastFile = File(fastPath);
+
+        expect(
+          await fastFile.exists(),
+          isTrue,
+          reason: 'Sped-up audio file should exist',
+        );
+
+        // Extension-based MIME detection (see notes on the base test).
+        final mimeType = lookupMimeType(fastPath);
+        final expectedMimeTypes = switch (format) {
+          AudioFormat.aac => [format.mimeType, 'audio/mp4'],
+          AudioFormat.wav => [format.mimeType, 'audio/wav'],
+          _ => [format.mimeType],
+        };
+        expect(expectedMimeTypes, contains(mimeType));
+
+        expect(
+          await fastFile.length(),
+          greaterThan(1000),
+          reason: 'Sped-up audio should have content (>1KB)',
+        );
+
+        // WAV is uncompressed, so 2x speed roughly halves the PCM data. A
+        // generous bound keeps this robust across sample rates / bit depths.
+        if (format == AudioFormat.wav) {
+          final normalSize = await normalFile.length();
+          final fastSize = await fastFile.length();
+          expect(
+            fastSize,
+            lessThan(normalSize * 0.75),
+            reason: '2x WAV should be markedly smaller than the 1x extraction',
+          );
+        }
+
+        // Clean up
+        if (await normalFile.exists()) await normalFile.delete();
+        if (await fastFile.exists()) await fastFile.delete();
+      },
+      skip: skipPlatform || !isFormatSupported(format),
+    );
   }
 
   testWidgets('extractAudio emits progress updates', (tester) async {
diff --git a/example/lib/features/audio/audio_extract_example_page.dart b/example/lib/features/audio/audio_extract_example_page.dart
index 15620a16..9356a306 100644
--- a/example/lib/features/audio/audio_extract_example_page.dart
+++ b/example/lib/features/audio/audio_extract_example_page.dart
@@ -28,6 +28,7 @@ class _AudioExtractExamplePageState extends State<AudioExtractExamplePage> {
   String? _extractedAudioPath;
   bool _isExtracting = false;
   AudioFormat _selectedFormat = AudioFormat.mp3;
+  double _selectedSpeed = 1.0;
   final String _taskId = 'AudioExtractionTaskId';
 
   final AudioPlayer _audioPlayer = AudioPlayer();
@@ -110,6 +111,7 @@ class _AudioExtractExamplePageState extends State<AudioExtractExamplePage> {
       final config = AudioExtractConfigs(
         video: EditorVideo.asset(kVideoEditorExampleH264Path),
         format: _selectedFormat,
+        speed: _selectedSpeed,
         // Optional: Add trimming
         // startTime: Duration(seconds: 5),
         // endTime: Duration(seconds: 15),
@@ -365,6 +367,7 @@ class _AudioExtractExamplePageState extends State<AudioExtractExamplePage> {
         children: [
           _AudioExtractionCard(
             selectedFormat: _selectedFormat,
+            selectedSpeed: _selectedSpeed,
             isFormatSupported: _isFormatSupported,
             isExtracting: _isExtracting,
             taskId: _taskId,
@@ -375,6 +378,9 @@ class _AudioExtractExamplePageState extends State<AudioExtractExamplePage> {
             onFormatChanged: (format) => setState(() {
               _selectedFormat = format;
             }),
+            onSpeedChanged: (speed) => setState(() {
+              _selectedSpeed = speed;
+            }),
             onExtractAudio: _extractAudio,
             onPlayPause: _playAudio,
             onSeek: (value) async {
@@ -429,6 +435,7 @@ class _AudioExtractExamplePageState extends State<AudioExtractExamplePage> {
 class _AudioExtractionCard extends StatelessWidget {
   const _AudioExtractionCard({
     required this.selectedFormat,
+    required this.selectedSpeed,
     required this.isFormatSupported,
     required this.isExtracting,
     required this.taskId,
@@ -437,6 +444,7 @@ class _AudioExtractionCard extends StatelessWidget {
     required this.position,
     required this.duration,
     required this.onFormatChanged,
+    required this.onSpeedChanged,
     required this.onExtractAudio,
     required this.onPlayPause,
     required this.onSeek,
@@ -444,6 +452,7 @@ class _AudioExtractionCard extends StatelessWidget {
   });
 
   final AudioFormat selectedFormat;
+  final double selectedSpeed;
   final bool Function(AudioFormat) isFormatSupported;
   final bool isExtracting;
   final String taskId;
@@ -452,11 +461,14 @@ class _AudioExtractionCard extends StatelessWidget {
   final Duration position;
   final Duration duration;
   final ValueChanged<AudioFormat> onFormatChanged;
+  final ValueChanged<double> onSpeedChanged;
   final VoidCallback onExtractAudio;
   final VoidCallback onPlayPause;
   final ValueChanged<double> onSeek;
   final VoidCallback onDelete;
 
+  static const List<double> _speedOptions = [0.5, 1.0, 1.5, 2.0];
+
   String _formatDuration(Duration dur) {
     String twoDigits(int n) => n.toString().padLeft(2, '0');
     final minutes = twoDigits(dur.inMinutes.remainder(60));
@@ -508,6 +520,26 @@ class _AudioExtractionCard extends StatelessWidget {
               }).toList(),
             ),
             const SizedBox(height: 16),
+            const Text(
+              'Speed:',
+              style: TextStyle(fontSize: 14, fontWeight: FontWeight.w500),
+            ),
+            const SizedBox(height: 8),
+            Wrap(
+              spacing: 8,
+              children: _speedOptions.map((speed) {
+                return ChoiceChip(
+                  label: Text('${speed}x'),
+                  selected: selectedSpeed == speed,
+                  onSelected: isExtracting
+                      ? null
+                      : (selected) {
+                          if (selected) onSpeedChanged(speed);
+                        },
+                );
+              }).toList(),
+            ),
+            const SizedBox(height: 16),
             SizedBox(
               width: double.infinity,
               child: ElevatedButton.icon(
diff --git a/lib/core/models/audio/audio_extract_configs_model.dart b/lib/core/models/audio/audio_extract_configs_model.dart
index 446b6933..b9fae48c 100644
--- a/lib/core/models/audio/audio_extract_configs_model.dart
+++ b/lib/core/models/audio/audio_extract_configs_model.dart
@@ -17,14 +17,17 @@ class AudioExtractConfigs {
   /// [startTime] Optional start time for trimming. If null, starts from
   /// beginning.
   /// [endTime] Optional end time for trimming. If null, goes to video end.
+  /// [speed] Playback speed multiplier for the extracted audio (default `1.0`).
   /// [id] Unique task identifier. Generated automatically if not provided.
   AudioExtractConfigs({
     required this.video,
     this.format = AudioFormat.mp3,
     this.startTime,
     this.endTime,
+    this.speed = 1.0,
     String? id,
-  }) : id = id ?? DateTime.now().millisecondsSinceEpoch.toString();
+  })  : assert(speed > 0, '[speed] must be greater than 0'),
+        id = id ?? DateTime.now().millisecondsSinceEpoch.toString();
 
   /// The source video to extract audio from.
   final EditorVideo video;
@@ -44,6 +47,15 @@ class AudioExtractConfigs {
   /// If null, extraction continues to the end of the video.
   final Duration? endTime;
 
+  /// Playback speed multiplier applied to the extracted audio.
+  ///
+  /// For example, `0.5` for half speed, `2.0` for double speed.
+  /// The pitch is preserved (time-stretch), matching the editor's video
+  /// rendering behavior.
+  ///
+  /// **Default**: `1.0` (original speed)
+  final double speed;
+
   /// Unique identifier for tracking progress of this extraction task.
   ///
   /// Used with [ProVideoEditor.progressStreamById] to monitor extraction
@@ -57,6 +69,7 @@ class AudioExtractConfigs {
       'format': format.name,
       'startTime': startTime?.inMicroseconds,
       'endTime': endTime?.inMicroseconds,
+      'speed': speed,
     };
   }
 
@@ -66,6 +79,7 @@ class AudioExtractConfigs {
     AudioFormat? format,
     Duration? startTime,
     Duration? endTime,
+    double? speed,
     String? id,
   }) {
     return AudioExtractConfigs(
@@ -73,6 +87,7 @@ class AudioExtractConfigs {
       format: format ?? this.format,
       startTime: startTime ?? this.startTime,
       endTime: endTime ?? this.endTime,
+      speed: speed ?? this.speed,
       id: id ?? this.id,
     );
   }
@@ -86,6 +101,7 @@ class AudioExtractConfigs {
         other.format == format &&
         other.startTime == startTime &&
         other.endTime == endTime &&
+        other.speed == speed &&
         other.id == id;
   }
 
@@ -95,12 +111,13 @@ class AudioExtractConfigs {
         format.hashCode ^
         startTime.hashCode ^
         endTime.hashCode ^
+        speed.hashCode ^
         id.hashCode;
   }
 
   @override
   String toString() {
     return 'AudioExtractConfigs(video: $video, format: $format, '
-        'startTime: $startTime, endTime: $endTime, id: $id)';
+        'startTime: $startTime, endTime: $endTime, speed: $speed, id: $id)';
   }
 }
diff --git a/pubspec.yaml b/pubspec.yaml
index daab2119..3bfbfdaf 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,6 +1,6 @@
 name: pro_video_editor
 description: "A Flutter video editor: Seamlessly enhance your videos with user-friendly editing features."
-version: 1.17.6
+version: 1.18.0
 homepage: https://github.com/hm21/pro_video_editor/
 repository: https://github.com/hm21/pro_video_editor/
 documentation: https://github.com/hm21/pro_video_editor/