hm21 · hm21 · Jun 11, 2026 · Jun 11, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,6 @@
+## 1.18.0
+- **FEAT**(android, iOS, macOS): Add `speed` to `AudioExtractConfigs` for pitch-preserving playback-speed changes during audio extraction (e.g. `2.0` for double speed, `0.5` for half). WAV applies the time-stretch directly on the decoded PCM (Android `SonicAudioProcessor`; AVFoundation `AVAssetReaderAudioMixOutput` with the spectral time-pitch algorithm), while compressed formats are re-encoded — Android via a Media3 `Transformer`, Darwin via `AVAssetExportPresetAppleM4A` with a scaled time range. Note: on Apple platforms a non-`1.0` speed re-encodes to AAC/M4A, so `caf` output falls back to M4A content.
+
 ## 1.17.6
 - **FIX**(iOS, macOS): Fix unstable/glitchy audio on reversed clips inside multi-clip timelines. `AudioReverser` now preserves the source audio format (sample rate and channel count) instead of forcing 44.1 kHz stereo, so a reversed clip's audio matches its untouched neighbours and AVFoundation no longer has to reconcile mixed formats within a single composition audio track during looped playback. The reversed segment is also written as an AVFoundation-authored CAF/PCM file (replacing the hand-authored WAV) and inserted at its decoded duration without padding, avoiding encoder priming artifacts and audio-timing drift in the exported video.
 

diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt
@@ -5,14 +5,28 @@ import android.media.MediaCodec
 import android.media.MediaExtractor
 import android.media.MediaFormat
 import android.media.MediaMuxer
+import android.net.Uri
 import android.os.Handler
 import android.os.Looper
+import androidx.media3.common.MediaItem
+import androidx.media3.common.MimeTypes
+import androidx.media3.common.audio.AudioProcessor
+import androidx.media3.common.audio.SonicAudioProcessor
+import androidx.media3.common.util.UnstableApi
+import androidx.media3.transformer.Composition
+import androidx.media3.transformer.EditedMediaItem
+import androidx.media3.transformer.Effects
+import androidx.media3.transformer.ExportException
+import androidx.media3.transformer.ExportResult
+import androidx.media3.transformer.ProgressHolder
+import androidx.media3.transformer.Transformer
 import ch.waio.pro_video_editor.src.features.audio.models.AudioExtractConfig
 import ch.waio.pro_video_editor.src.features.audio.models.AudioExtractJobHandle
 import ch.waio.pro_video_editor.src.shared.logging.PluginLog as Log
 import java.io.File
 import java.nio.ByteBuffer
 import java.util.concurrent.atomic.AtomicBoolean
+import java.util.concurrent.atomic.AtomicReference
 
 /**
  * Exception thrown when no audio track is found in the video file.
@@ -31,7 +45,13 @@ class NoAudioTrackException(message: String) : Exception(message)
  *
  * For WAV format, uses custom WAV file writer with PCM encoding.
  * For other formats, uses Android MediaExtractor and MediaMuxer.
+ *
+ * When a playback speed other than 1.0 is requested, compressed formats are
+ * re-encoded through a Media3 [Transformer] (with [SonicAudioProcessor] for a
+ * pitch-preserving time-stretch), while WAV applies the speed change directly
+ * on the decoded PCM in [WavFileWriter].
  */
+@UnstableApi
 class ExtractAudio(private val context: Context) {
 
     companion object {
@@ -58,12 +78,18 @@ class ExtractAudio(private val context: Context) {
         onComplete: (ByteArray?) -> Unit,
         onError: (Throwable) -> Unit
     ): AudioExtractJobHandle {
-        return if (config.format.lowercase() == "wav") {
-            // WAV format requires special handling
-            extractToWav(config, onProgress, onComplete, onError)
-        } else {
-            // Use MediaMuxer for other formats
-            extractWithMuxer(config, onProgress, onComplete, onError)
+        val applySpeed = config.speed > 0f && config.speed != 1.0f
+        return when {
+            // WAV applies the speed change on the decoded PCM (see WavFileWriter).
+            config.format.lowercase() == "wav" ->
+                extractToWav(config, onProgress, onComplete, onError)
+            // Compressed formats can't be re-timed by copying samples, so a
+            // speed change requires re-encoding through Media3 Transformer.
+            applySpeed ->
+                extractWithSpeed(config, onProgress, onComplete, onError)
+            // Fast path: copy the compressed audio samples unchanged.
+            else ->
+                extractWithMuxer(config, onProgress, onComplete, onError)
         }
     }
 
@@ -154,8 +180,9 @@ class ExtractAudio(private val context: Context) {
                     throw IllegalArgumentException("endUs must be greater than startUs")
                 }
 
-                // Create WAV writer (handles both compressed and PCM audio)
-                wavWriter = WavFileWriter(outputFile)
+                // Create WAV writer (handles both compressed and PCM audio,
+                // and applies a pitch-preserving speed change when requested)
+                wavWriter = WavFileWriter(outputFile, config.speed)
 
                 mainHandler.post { onProgress(0.0) }
 
@@ -430,6 +457,169 @@ class ExtractAudio(private val context: Context) {
         }
     }
 
+    /**
+     * Extracts audio while applying a pitch-preserving playback speed change.
+     *
+     * Compressed output formats (AAC/M4A/MP3) can't be re-timed by simply
+     * copying samples, so the audio is re-encoded with a Media3 [Transformer].
+     * A [SonicAudioProcessor] performs the time-stretch and the result is muxed
+     * into an MP4/M4A container (matching the muxer path's output container).
+     */
+    private fun extractWithSpeed(
+        config: AudioExtractConfig,
+        onProgress: (Double) -> Unit,
+        onComplete: (ByteArray?) -> Unit,
+        onError: (Throwable) -> Unit
+    ): AudioExtractJobHandle {
+        val mainHandler = Handler(Looper.getMainLooper())
+        val shouldStopPolling = AtomicBoolean(false)
+        val canceled = AtomicBoolean(false)
+        val transformerRef = AtomicReference<Transformer?>(null)
+
+        val outputFile = if (config.outputPath != null) {
+            File(config.outputPath)
+        } else {
+            File(
+                context.cacheDir,
+                "audio_output_${System.currentTimeMillis()}.${config.getExtension()}"
+            )
+        }
+
+        fun cleanupOnFailure() {
+            if (config.outputPath == null && outputFile.exists()) {
+                outputFile.delete()
+            }
+        }
+
+        // Run the audio-track pre-check off the main thread, then start the
+        // Transformer (which must run on a thread with a Looper) on the main
+        // thread, mirroring the render pipeline.
+        Thread {
+            var probe: MediaExtractor? = null
+            val hasAudio: Boolean
+            try {
+                probe = MediaExtractor()
+                probe.setDataSource(config.inputPath)
+                hasAudio = findAudioTrack(probe) >= 0
+            } catch (e: Exception) {
+                mainHandler.post { onError(e) }
+                return@Thread
+            } finally {
+                try {
+                    probe?.release()
+                } catch (e: Exception) {
+                    Log.w(TAG, "Error releasing probe extractor: ${e.message}")
+                }
+            }
+
+            if (!hasAudio) {
+                mainHandler.post {
+                    onError(NoAudioTrackException("No audio track found in video file"))
+                }
+                return@Thread
+            }
+
+            mainHandler.post {
+                if (canceled.get()) return@post
+                try {
+                    val mediaItemBuilder = MediaItem.Builder()
+                        .setUri(Uri.fromFile(File(config.inputPath)))
+
+                    if (config.startUs != null || config.endUs != null) {
+                        val clipping = MediaItem.ClippingConfiguration.Builder().apply {
+                            config.startUs?.let { setStartPositionUs(it) }
+                            config.endUs?.let { setEndPositionUs(it) }
+                        }.build()
+                        mediaItemBuilder.setClippingConfiguration(clipping)
+                    }
+
+                    val sonic = SonicAudioProcessor().apply { setSpeed(config.speed) }
+                    val editedMediaItem = EditedMediaItem.Builder(mediaItemBuilder.build())
+                        .setRemoveVideo(true)
+                        .setEffects(Effects(listOf<AudioProcessor>(sonic), emptyList()))
+                        .build()
+
+                    val transformer = Transformer.Builder(context)
+                        .setAudioMimeType(MimeTypes.AUDIO_AAC)
+                        .addListener(object : Transformer.Listener {
+                            override fun onCompleted(
+                                composition: Composition,
+                                result: ExportResult
+                            ) {
+                                shouldStopPolling.set(true)
+                                onProgress(1.0)
+                                try {
+                                    if (config.outputPath != null) {
+                                        onComplete(null)
+                                    } else {
+                                        onComplete(outputFile.readBytes())
+                                    }
+                                } catch (e: Exception) {
+                                    onError(e)
+                                } finally {
+                                    if (config.outputPath == null) outputFile.delete()
+                                }
+                            }
+
+                            override fun onError(
+                                composition: Composition,
+                                result: ExportResult,
+                                exception: ExportException
+                            ) {
+                                shouldStopPolling.set(true)
+                                cleanupOnFailure()
+                                onError(exception)
+                            }
+                        })
+                        .build()
+
+                    transformerRef.set(transformer)
+                    if (canceled.get()) {
+                        transformer.cancel()
+                        cleanupOnFailure()
+                        return@post
+                    }
+
+                    // Transformer fails if the destination already exists.
+                    if (outputFile.exists()) outputFile.delete()
+
+                    onProgress(0.0)
+                    transformer.start(editedMediaItem, outputFile.absolutePath)
+
+                    // Poll for progress until the export finishes.
+                    val progressHolder = ProgressHolder()
+                    mainHandler.post(object : Runnable {
+                        override fun run() {
+                            if (shouldStopPolling.get()) return
+                            val progressState = transformer.getProgress(progressHolder)
+                            if (progressHolder.progress >= 0) {
+                                onProgress(progressHolder.progress / 100.0)
+                            }
+                            if (!shouldStopPolling.get() &&
+                                progressState != Transformer.PROGRESS_STATE_NOT_STARTED
+                            ) {
+                                mainHandler.postDelayed(this, 200)
+                            }
+                        }
+                    })
+                } catch (e: Exception) {
+                    Log.e(TAG, "Error extracting audio with speed: ${e.message}", e)
+                    cleanupOnFailure()
+                    onError(e)
+                }
+            }
+        }.start()
+
+        return AudioExtractJobHandle {
+            canceled.set(true)
+            shouldStopPolling.set(true)
+            mainHandler.post {
+                transformerRef.get()?.cancel()
+                cleanupOnFailure()
+            }
+        }
+    }
+
     /**
      * Finds the first audio track in the media file.
      *

diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/PcmSpeedProcessor.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/PcmSpeedProcessor.kt
@@ -0,0 +1,76 @@
+package ch.waio.pro_video_editor.src.features.audio
+
+import androidx.media3.common.C
+import androidx.media3.common.audio.AudioProcessor
+import androidx.media3.common.audio.SonicAudioProcessor
+import androidx.media3.common.util.UnstableApi
+import java.io.ByteArrayOutputStream
+import java.nio.ByteBuffer
+import java.nio.ByteOrder
+
+/**
+ * Applies a pitch-preserving playback speed change to a stream of 16-bit PCM
+ * audio using Media3's [SonicAudioProcessor].
+ *
+ * The processor is fed interleaved little-endian 16-bit PCM via [process] and
+ * returns the time-stretched output. [drain] must be called once at the end of
+ * the stream to flush any samples buffered internally by Sonic.
+ *
+ * This mirrors the speed handling used in the video render pipeline
+ * (see `applyPlaybackSpeed`), keeping audio extraction consistent: a value of
+ * `2.0` plays twice as fast while keeping the original pitch.
+ */
+@UnstableApi
+class PcmSpeedProcessor(
+    speed: Float,
+    sampleRate: Int,
+    channelCount: Int
+) {
+    private val sonic = SonicAudioProcessor().apply {
+        setSpeed(speed)
+        // Pitch is intentionally left at 1.0 so only the duration changes.
+        configure(
+            AudioProcessor.AudioFormat(sampleRate, channelCount, C.ENCODING_PCM_16BIT)
+        )
+        // SonicAudioProcessor only overrides flush(StreamMetadata); the no-arg
+        // flush() default throws, so call the StreamMetadata overload directly.
+        flush(AudioProcessor.StreamMetadata.DEFAULT)
+    }
+
+    /**
+     * Feeds a chunk of 16-bit PCM and returns whatever output is currently
+     * available (may be empty while Sonic buffers internally).
+     */
+    fun process(pcm: ByteArray): ByteArray {
+        sonic.queueInput(ByteBuffer.wrap(pcm).order(ByteOrder.LITTLE_ENDIAN))
+        return collectOutput()
+    }
+
+    /**
+     * Signals end-of-stream and returns any remaining buffered output.
+     */
+    fun drain(): ByteArray {
+        sonic.queueEndOfStream()
+        val output = ByteArrayOutputStream()
+        output.write(collectOutput())
+        while (!sonic.isEnded) {
+            val chunk = collectOutput()
+            if (chunk.isEmpty()) break
+            output.write(chunk)
+        }
+        sonic.reset()
+        return output.toByteArray()
+    }
+
+    private fun collectOutput(): ByteArray {
+        val output = ByteArrayOutputStream()
+        var buffer = sonic.output
+        while (buffer.hasRemaining()) {
+            val chunk = ByteArray(buffer.remaining())
+            buffer.get(chunk)
+            output.write(chunk)
+            buffer = sonic.output
+        }
+        return output.toByteArray()
+    }
+}