From a292b8c4ddd6996784a0407ee36e8d31fe934ea9 Mon Sep 17 00:00:00 2001 From: hm21 Date: Thu, 11 Jun 2026 09:37:13 +0200 Subject: [PATCH] feat(audio): add pitch-preserving playback speed support for audio extraction across platforms --- CHANGELOG.md | 3 + .../src/features/audio/ExtractAudio.kt | 206 +++++++++++++++++- .../src/features/audio/PcmSpeedProcessor.kt | 76 +++++++ .../src/features/audio/WavFileWriter.kt | 89 +++++++- .../audio/models/AudioExtractConfig.kt | 4 + .../shared/features/audio/ExtractAudio.swift | 106 +++++++-- .../audio/models/AudioExtractConfig.swift | 11 +- .../integration_test/audio_extract_test.dart | 64 ++++++ .../audio/audio_extract_example_page.dart | 32 +++ .../audio/audio_extract_configs_model.dart | 21 +- pubspec.yaml | 2 +- 11 files changed, 575 insertions(+), 39 deletions(-) create mode 100644 android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/PcmSpeedProcessor.kt diff --git a/CHANGELOG.md b/CHANGELOG.md index ab9c5ecf..17124535 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 1.18.0 +- **FEAT**(android, iOS, macOS): Add `speed` to `AudioExtractConfigs` for pitch-preserving playback-speed changes during audio extraction (e.g. `2.0` for double speed, `0.5` for half). WAV applies the time-stretch directly on the decoded PCM (Android `SonicAudioProcessor`; AVFoundation `AVAssetReaderAudioMixOutput` with the spectral time-pitch algorithm), while compressed formats are re-encoded — Android via a Media3 `Transformer`, Darwin via `AVAssetExportPresetAppleM4A` with a scaled time range. Note: on Apple platforms a non-`1.0` speed re-encodes to AAC/M4A, so `caf` output falls back to M4A content. + ## 1.17.6 - **FIX**(iOS, macOS): Fix unstable/glitchy audio on reversed clips inside multi-clip timelines. `AudioReverser` now preserves the source audio format (sample rate and channel count) instead of forcing 44.1 kHz stereo, so a reversed clip's audio matches its untouched neighbours and AVFoundation no longer has to reconcile mixed formats within a single composition audio track during looped playback. The reversed segment is also written as an AVFoundation-authored CAF/PCM file (replacing the hand-authored WAV) and inserted at its decoded duration without padding, avoiding encoder priming artifacts and audio-timing drift in the exported video. diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt index d3df4adc..ac1d4b90 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt @@ -5,14 +5,28 @@ import android.media.MediaCodec import android.media.MediaExtractor import android.media.MediaFormat import android.media.MediaMuxer +import android.net.Uri import android.os.Handler import android.os.Looper +import androidx.media3.common.MediaItem +import androidx.media3.common.MimeTypes +import androidx.media3.common.audio.AudioProcessor +import androidx.media3.common.audio.SonicAudioProcessor +import androidx.media3.common.util.UnstableApi +import androidx.media3.transformer.Composition +import androidx.media3.transformer.EditedMediaItem +import androidx.media3.transformer.Effects +import androidx.media3.transformer.ExportException +import androidx.media3.transformer.ExportResult +import androidx.media3.transformer.ProgressHolder +import androidx.media3.transformer.Transformer import ch.waio.pro_video_editor.src.features.audio.models.AudioExtractConfig import ch.waio.pro_video_editor.src.features.audio.models.AudioExtractJobHandle import ch.waio.pro_video_editor.src.shared.logging.PluginLog as Log import java.io.File import java.nio.ByteBuffer import java.util.concurrent.atomic.AtomicBoolean +import java.util.concurrent.atomic.AtomicReference /** * Exception thrown when no audio track is found in the video file. @@ -31,7 +45,13 @@ class NoAudioTrackException(message: String) : Exception(message) * * For WAV format, uses custom WAV file writer with PCM encoding. * For other formats, uses Android MediaExtractor and MediaMuxer. + * + * When a playback speed other than 1.0 is requested, compressed formats are + * re-encoded through a Media3 [Transformer] (with [SonicAudioProcessor] for a + * pitch-preserving time-stretch), while WAV applies the speed change directly + * on the decoded PCM in [WavFileWriter]. */ +@UnstableApi class ExtractAudio(private val context: Context) { companion object { @@ -58,12 +78,18 @@ class ExtractAudio(private val context: Context) { onComplete: (ByteArray?) -> Unit, onError: (Throwable) -> Unit ): AudioExtractJobHandle { - return if (config.format.lowercase() == "wav") { - // WAV format requires special handling - extractToWav(config, onProgress, onComplete, onError) - } else { - // Use MediaMuxer for other formats - extractWithMuxer(config, onProgress, onComplete, onError) + val applySpeed = config.speed > 0f && config.speed != 1.0f + return when { + // WAV applies the speed change on the decoded PCM (see WavFileWriter). + config.format.lowercase() == "wav" -> + extractToWav(config, onProgress, onComplete, onError) + // Compressed formats can't be re-timed by copying samples, so a + // speed change requires re-encoding through Media3 Transformer. + applySpeed -> + extractWithSpeed(config, onProgress, onComplete, onError) + // Fast path: copy the compressed audio samples unchanged. + else -> + extractWithMuxer(config, onProgress, onComplete, onError) } } @@ -154,8 +180,9 @@ class ExtractAudio(private val context: Context) { throw IllegalArgumentException("endUs must be greater than startUs") } - // Create WAV writer (handles both compressed and PCM audio) - wavWriter = WavFileWriter(outputFile) + // Create WAV writer (handles both compressed and PCM audio, + // and applies a pitch-preserving speed change when requested) + wavWriter = WavFileWriter(outputFile, config.speed) mainHandler.post { onProgress(0.0) } @@ -430,6 +457,169 @@ class ExtractAudio(private val context: Context) { } } + /** + * Extracts audio while applying a pitch-preserving playback speed change. + * + * Compressed output formats (AAC/M4A/MP3) can't be re-timed by simply + * copying samples, so the audio is re-encoded with a Media3 [Transformer]. + * A [SonicAudioProcessor] performs the time-stretch and the result is muxed + * into an MP4/M4A container (matching the muxer path's output container). + */ + private fun extractWithSpeed( + config: AudioExtractConfig, + onProgress: (Double) -> Unit, + onComplete: (ByteArray?) -> Unit, + onError: (Throwable) -> Unit + ): AudioExtractJobHandle { + val mainHandler = Handler(Looper.getMainLooper()) + val shouldStopPolling = AtomicBoolean(false) + val canceled = AtomicBoolean(false) + val transformerRef = AtomicReference(null) + + val outputFile = if (config.outputPath != null) { + File(config.outputPath) + } else { + File( + context.cacheDir, + "audio_output_${System.currentTimeMillis()}.${config.getExtension()}" + ) + } + + fun cleanupOnFailure() { + if (config.outputPath == null && outputFile.exists()) { + outputFile.delete() + } + } + + // Run the audio-track pre-check off the main thread, then start the + // Transformer (which must run on a thread with a Looper) on the main + // thread, mirroring the render pipeline. + Thread { + var probe: MediaExtractor? = null + val hasAudio: Boolean + try { + probe = MediaExtractor() + probe.setDataSource(config.inputPath) + hasAudio = findAudioTrack(probe) >= 0 + } catch (e: Exception) { + mainHandler.post { onError(e) } + return@Thread + } finally { + try { + probe?.release() + } catch (e: Exception) { + Log.w(TAG, "Error releasing probe extractor: ${e.message}") + } + } + + if (!hasAudio) { + mainHandler.post { + onError(NoAudioTrackException("No audio track found in video file")) + } + return@Thread + } + + mainHandler.post { + if (canceled.get()) return@post + try { + val mediaItemBuilder = MediaItem.Builder() + .setUri(Uri.fromFile(File(config.inputPath))) + + if (config.startUs != null || config.endUs != null) { + val clipping = MediaItem.ClippingConfiguration.Builder().apply { + config.startUs?.let { setStartPositionUs(it) } + config.endUs?.let { setEndPositionUs(it) } + }.build() + mediaItemBuilder.setClippingConfiguration(clipping) + } + + val sonic = SonicAudioProcessor().apply { setSpeed(config.speed) } + val editedMediaItem = EditedMediaItem.Builder(mediaItemBuilder.build()) + .setRemoveVideo(true) + .setEffects(Effects(listOf(sonic), emptyList())) + .build() + + val transformer = Transformer.Builder(context) + .setAudioMimeType(MimeTypes.AUDIO_AAC) + .addListener(object : Transformer.Listener { + override fun onCompleted( + composition: Composition, + result: ExportResult + ) { + shouldStopPolling.set(true) + onProgress(1.0) + try { + if (config.outputPath != null) { + onComplete(null) + } else { + onComplete(outputFile.readBytes()) + } + } catch (e: Exception) { + onError(e) + } finally { + if (config.outputPath == null) outputFile.delete() + } + } + + override fun onError( + composition: Composition, + result: ExportResult, + exception: ExportException + ) { + shouldStopPolling.set(true) + cleanupOnFailure() + onError(exception) + } + }) + .build() + + transformerRef.set(transformer) + if (canceled.get()) { + transformer.cancel() + cleanupOnFailure() + return@post + } + + // Transformer fails if the destination already exists. + if (outputFile.exists()) outputFile.delete() + + onProgress(0.0) + transformer.start(editedMediaItem, outputFile.absolutePath) + + // Poll for progress until the export finishes. + val progressHolder = ProgressHolder() + mainHandler.post(object : Runnable { + override fun run() { + if (shouldStopPolling.get()) return + val progressState = transformer.getProgress(progressHolder) + if (progressHolder.progress >= 0) { + onProgress(progressHolder.progress / 100.0) + } + if (!shouldStopPolling.get() && + progressState != Transformer.PROGRESS_STATE_NOT_STARTED + ) { + mainHandler.postDelayed(this, 200) + } + } + }) + } catch (e: Exception) { + Log.e(TAG, "Error extracting audio with speed: ${e.message}", e) + cleanupOnFailure() + onError(e) + } + } + }.start() + + return AudioExtractJobHandle { + canceled.set(true) + shouldStopPolling.set(true) + mainHandler.post { + transformerRef.get()?.cancel() + cleanupOnFailure() + } + } + } + /** * Finds the first audio track in the media file. * diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/PcmSpeedProcessor.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/PcmSpeedProcessor.kt new file mode 100644 index 00000000..217c04f3 --- /dev/null +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/PcmSpeedProcessor.kt @@ -0,0 +1,76 @@ +package ch.waio.pro_video_editor.src.features.audio + +import androidx.media3.common.C +import androidx.media3.common.audio.AudioProcessor +import androidx.media3.common.audio.SonicAudioProcessor +import androidx.media3.common.util.UnstableApi +import java.io.ByteArrayOutputStream +import java.nio.ByteBuffer +import java.nio.ByteOrder + +/** + * Applies a pitch-preserving playback speed change to a stream of 16-bit PCM + * audio using Media3's [SonicAudioProcessor]. + * + * The processor is fed interleaved little-endian 16-bit PCM via [process] and + * returns the time-stretched output. [drain] must be called once at the end of + * the stream to flush any samples buffered internally by Sonic. + * + * This mirrors the speed handling used in the video render pipeline + * (see `applyPlaybackSpeed`), keeping audio extraction consistent: a value of + * `2.0` plays twice as fast while keeping the original pitch. + */ +@UnstableApi +class PcmSpeedProcessor( + speed: Float, + sampleRate: Int, + channelCount: Int +) { + private val sonic = SonicAudioProcessor().apply { + setSpeed(speed) + // Pitch is intentionally left at 1.0 so only the duration changes. + configure( + AudioProcessor.AudioFormat(sampleRate, channelCount, C.ENCODING_PCM_16BIT) + ) + // SonicAudioProcessor only overrides flush(StreamMetadata); the no-arg + // flush() default throws, so call the StreamMetadata overload directly. + flush(AudioProcessor.StreamMetadata.DEFAULT) + } + + /** + * Feeds a chunk of 16-bit PCM and returns whatever output is currently + * available (may be empty while Sonic buffers internally). + */ + fun process(pcm: ByteArray): ByteArray { + sonic.queueInput(ByteBuffer.wrap(pcm).order(ByteOrder.LITTLE_ENDIAN)) + return collectOutput() + } + + /** + * Signals end-of-stream and returns any remaining buffered output. + */ + fun drain(): ByteArray { + sonic.queueEndOfStream() + val output = ByteArrayOutputStream() + output.write(collectOutput()) + while (!sonic.isEnded) { + val chunk = collectOutput() + if (chunk.isEmpty()) break + output.write(chunk) + } + sonic.reset() + return output.toByteArray() + } + + private fun collectOutput(): ByteArray { + val output = ByteArrayOutputStream() + var buffer = sonic.output + while (buffer.hasRemaining()) { + val chunk = ByteArray(buffer.remaining()) + buffer.get(chunk) + output.write(chunk) + buffer = sonic.output + } + return output.toByteArray() + } +} diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt index 417dd2ec..0af52a11 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt @@ -4,6 +4,7 @@ import android.media.AudioFormat import android.media.MediaCodec import android.media.MediaExtractor import android.media.MediaFormat +import androidx.media3.common.util.UnstableApi import java.io.File import java.io.FileOutputStream import java.io.RandomAccessFile @@ -17,9 +18,16 @@ import java.nio.ByteOrder * - Writing a proper RIFF/WAVE header * - Decoding compressed audio to PCM samples * - Writing PCM data in the correct format + * - Optionally applying a pitch-preserving playback speed change * - Updating the file size fields after writing + * + * @param outputFile Destination WAV file. + * @param speed Playback speed multiplier (1.0 = original). Values other than + * 1.0 time-stretch the audio while keeping the original pitch. Only applied + * to 16-bit PCM output. */ -class WavFileWriter(private val outputFile: File) { +@UnstableApi +class WavFileWriter(private val outputFile: File, private val speed: Float = 1.0f) { companion object { // Magic numbers for WAV format @@ -39,6 +47,50 @@ class WavFileWriter(private val outputFile: File) { private var isFloatPcm: Boolean = false private var totalDataSize: Long = 0 + /** Active when a pitch-preserving speed change is being applied. */ + private var speedProcessor: PcmSpeedProcessor? = null + + /** + * Initializes the speed processor once the PCM format is known. + * + * Speed is only applied to 16-bit PCM (the format this writer always emits + * after float conversion); 8-bit sources are written unchanged. + */ + private fun maybeInitSpeedProcessor() { + if (speed != 1.0f && bitsPerSample == 16 && speedProcessor == null) { + speedProcessor = PcmSpeedProcessor(speed, sampleRate, numChannels) + } + } + + /** + * Writes a chunk of 16-bit PCM, routing it through the speed processor when + * a speed change is active. Accumulates the number of bytes actually + * written so the WAV header reflects the (re-timed) output length. + */ + private fun writePcm(outputStream: FileOutputStream, pcm: ByteArray) { + val processor = speedProcessor + if (processor == null) { + outputStream.write(pcm) + totalDataSize += pcm.size + } else { + val processed = processor.process(pcm) + if (processed.isNotEmpty()) { + outputStream.write(processed) + totalDataSize += processed.size + } + } + } + + /** Flushes any samples buffered inside the speed processor. */ + private fun flushSpeedProcessor(outputStream: FileOutputStream) { + val processor = speedProcessor ?: return + val tail = processor.drain() + if (tail.isNotEmpty()) { + outputStream.write(tail) + totalDataSize += tail.size + } + } + /** * Extracts audio from a video file and writes it as a WAV file. * @@ -162,6 +214,7 @@ class WavFileWriter(private val outputFile: File) { var outputEos = false onProgress(0.0) + maybeInitSpeedProcessor() while (!outputEos && !shouldStop()) { if (!inputEos) { @@ -280,6 +333,19 @@ class WavFileWriter(private val outputFile: File) { raf.write(headerBytes) } + + // Re-create the speed processor for the new PCM + // format, flushing whatever it had buffered first. + val processor = speedProcessor + if (processor != null) { + val tail = processor.drain() + if (tail.isNotEmpty()) { + outputStream.write(tail) + totalDataSize += tail.size + } + speedProcessor = null + maybeInitSpeedProcessor() + } } } outputBufferId >= 0 -> { @@ -306,15 +372,13 @@ class WavFileWriter(private val outputFile: File) { val intValue = (floatValue.coerceIn(-1.0f, 1.0f) * 32767.0f).toInt().toShort() int16Buffer.putShort(intValue) } - - outputStream.write(int16Buffer.array()) - totalDataSize += int16Buffer.array().size + + writePcm(outputStream, int16Buffer.array()) } else { // Write PCM data directly (already in correct format) val pcmData = ByteArray(bufferInfo.size) decoderOutputBuffer.get(pcmData) - outputStream.write(pcmData) - totalDataSize += pcmData.size + writePcm(outputStream, pcmData) } } @@ -323,6 +387,9 @@ class WavFileWriter(private val outputFile: File) { } } + // Flush any samples buffered by the speed processor. + flushSpeedProcessor(outputStream) + outputStream.flush() outputStream.close() outputStream = null @@ -402,6 +469,7 @@ class WavFileWriter(private val outputFile: File) { var currentTimeUs = startUs onProgress(0.0) + maybeInitSpeedProcessor() while (!shouldStop()) { buffer.clear() @@ -434,11 +502,9 @@ class WavFileWriter(private val outputFile: File) { val intValue = (floatValue.coerceIn(-1.0f, 1.0f) * 32767.0f).toInt().toShort() int16Buffer.putShort(intValue) } - outputStream.write(int16Buffer.array()) - totalDataSize += int16Buffer.array().size + writePcm(outputStream, int16Buffer.array()) } else { - outputStream.write(pcmData) - totalDataSize += sampleSize + writePcm(outputStream, pcmData) } currentTimeUs = presentationTimeUs @@ -450,6 +516,9 @@ class WavFileWriter(private val outputFile: File) { extractor.advance() } + // Flush any samples buffered by the speed processor. + flushSpeedProcessor(outputStream) + outputStream.flush() outputStream.close() outputStream = null diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/models/AudioExtractConfig.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/models/AudioExtractConfig.kt index ad7915c5..6dc68f76 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/models/AudioExtractConfig.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/models/AudioExtractConfig.kt @@ -10,6 +10,7 @@ import io.flutter.plugin.common.MethodCall * @property format Output audio format (mp3, aac, wav, m4a, ogg) * @property startUs Optional start time in microseconds for trimming * @property endUs Optional end time in microseconds for trimming + * @property speed Playback speed multiplier (1.0 = original speed, pitch preserved) * @property outputPath Optional output file path (null = return bytes) */ data class AudioExtractConfig( @@ -18,6 +19,7 @@ data class AudioExtractConfig( val format: String, val startUs: Long?, val endUs: Long?, + val speed: Float, val outputPath: String? ) { companion object { @@ -37,6 +39,7 @@ data class AudioExtractConfig( val format = call.argument("format") ?: "mp3" val startUs = call.argument("startTime")?.toLong() val endUs = call.argument("endTime")?.toLong() + val speed = call.argument("speed")?.toFloat()?.takeIf { it > 0f } ?: 1.0f val outputPath = call.argument("outputPath") return AudioExtractConfig( @@ -45,6 +48,7 @@ data class AudioExtractConfig( format = format, startUs = startUs, endUs = endUs, + speed = speed, outputPath = outputPath ) } diff --git a/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/ExtractAudio.swift b/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/ExtractAudio.swift index 188a7b16..6c8183e9 100644 --- a/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/ExtractAudio.swift +++ b/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/ExtractAudio.swift @@ -89,6 +89,8 @@ class ExtractAudio { let sourceURL = URL(fileURLWithPath: config.inputPath) let asset = AVURLAsset(url: sourceURL) + let applySpeed = config.speed > 0 && config.speed != 1.0 + /// https://developer.apple.com/documentation/dispatch/dispatchsemaphore let semaphore = DispatchSemaphore(value: 0) var loadError: Error? @@ -123,7 +125,10 @@ class ExtractAudio { try? FileManager.default.removeItem(at: outputURL) let fileExtension = outputURL.pathExtension.lowercased() - let outputFileType: AVFileType = (fileExtension == "caf") ? .caf : .m4a + // A speed change forces a re-encode through AVAssetExportPresetAppleM4A, + // which only supports the .m4a (AAC) output file type. + let outputFileType: AVFileType = + applySpeed ? .m4a : ((fileExtension == "caf") ? .caf : .m4a) let audioTracks = asset.tracks(withMediaType: .audio) guard let audioTrack = audioTracks.first else { @@ -162,10 +167,22 @@ class ExtractAudio { try compositionAudioTrack.insertTimeRange(sourceTimeRange, of: audioTrack, at: .zero) + // Apply a pitch-preserving speed change by scaling the inserted range. + if applySpeed { + let scaled = CMTimeMultiplyByFloat64( + sourceTimeRange.duration, multiplier: 1.0 / config.speed) + compositionAudioTrack.scaleTimeRange( + CMTimeRange(start: .zero, duration: sourceTimeRange.duration), toDuration: scaled) + } + + // Passthrough can't re-time samples, so a speed change requires a + // re-encoding preset. + let presetName = + applySpeed ? AVAssetExportPresetAppleM4A : AVAssetExportPresetPassthrough guard let session = AVAssetExportSession( asset: composition, - presetName: AVAssetExportPresetPassthrough + presetName: presetName ) else { throw NSError( @@ -176,6 +193,10 @@ class ExtractAudio { session.outputURL = outputURL session.outputFileType = outputFileType + if applySpeed { + // Preserve the original pitch while time-stretching. + session.audioTimePitchAlgorithm = .spectral + } // Sync context access back to main thread to cleanly initialize timers DispatchQueue.main.async { @@ -379,9 +400,7 @@ class ExtractAudio { timeRange = audioTrack.timeRange } - let reader = try AVAssetReader(asset: asset) - assetReader = reader - reader.timeRange = timeRange + let applySpeed = config.speed > 0 && config.speed != 1.0 let readerOutputSettings: [String: Any] = [ AVFormatIDKey: kAudioFormatLinearPCM, @@ -391,16 +410,73 @@ class ExtractAudio { AVLinearPCMIsNonInterleaved: false, ] - let readerOutput = AVAssetReaderTrackOutput( - track: audioTrack, outputSettings: readerOutputSettings) - readerOutput.alwaysCopiesSampleData = false + let reader: AVAssetReader + let readerOutput: AVAssetReaderOutput + // Duration/start used only for progress reporting. + let effectiveDuration: CMTime + let progressStartSeconds: Double + + if applySpeed { + // Build a composition holding only the requested range, then scale it + // to apply the speed change. Reading it through an audio-mix output + // lets us request a pitch-preserving time-stretch. + let composition = AVMutableComposition() + guard + let compositionAudioTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) + else { + throw NSError( + domain: "ExtractAudio", code: -14, + userInfo: [NSLocalizedDescriptionKey: "Failed to create composition audio track"]) + } - guard reader.canAdd(readerOutput) else { - throw NSError( - domain: "ExtractAudio", code: -7, - userInfo: [NSLocalizedDescriptionKey: "Cannot add reader output"]) + try compositionAudioTrack.insertTimeRange(timeRange, of: audioTrack, at: .zero) + let scaled = CMTimeMultiplyByFloat64(timeRange.duration, multiplier: 1.0 / config.speed) + compositionAudioTrack.scaleTimeRange( + CMTimeRange(start: .zero, duration: timeRange.duration), toDuration: scaled) + + let compositionReader = try AVAssetReader(asset: composition) + let mixOutput = AVAssetReaderAudioMixOutput( + audioTracks: composition.tracks(withMediaType: .audio), + audioSettings: readerOutputSettings) + mixOutput.audioTimePitchAlgorithm = .spectral + mixOutput.alwaysCopiesSampleData = false + + guard compositionReader.canAdd(mixOutput) else { + throw NSError( + domain: "ExtractAudio", code: -7, + userInfo: [NSLocalizedDescriptionKey: "Cannot add reader output"]) + } + compositionReader.add(mixOutput) + + reader = compositionReader + readerOutput = mixOutput + effectiveDuration = scaled + // Composition samples start at zero. + progressStartSeconds = 0.0 + } else { + let trackReader = try AVAssetReader(asset: asset) + trackReader.timeRange = timeRange + + let trackOutput = AVAssetReaderTrackOutput( + track: audioTrack, outputSettings: readerOutputSettings) + trackOutput.alwaysCopiesSampleData = false + + guard trackReader.canAdd(trackOutput) else { + throw NSError( + domain: "ExtractAudio", code: -7, + userInfo: [NSLocalizedDescriptionKey: "Cannot add reader output"]) + } + trackReader.add(trackOutput) + + reader = trackReader + readerOutput = trackOutput + effectiveDuration = timeRange.duration + progressStartSeconds = CMTimeGetSeconds(timeRange.start) } - reader.add(readerOutput) + assetReader = reader let fm = FileManager.default guard fm.createFile(atPath: outputURL.path, contents: nil) else { @@ -430,7 +506,7 @@ class ExtractAudio { DispatchQueue.main.async { onProgress(0.0) } - let totalDuration = CMTimeGetSeconds(timeRange.duration) + let totalDuration = CMTimeGetSeconds(effectiveDuration) var totalPcmBytes: Int64 = 0 while let sampleBuffer = readerOutput.copyNextSampleBuffer() { @@ -456,7 +532,7 @@ class ExtractAudio { } let currentTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer) - let elapsed = CMTimeGetSeconds(currentTime) - CMTimeGetSeconds(timeRange.start) + let elapsed = CMTimeGetSeconds(currentTime) - progressStartSeconds let progress = totalDuration > 0 ? min(max(elapsed / totalDuration, 0.0), 0.99) : 0.0 DispatchQueue.main.async { onProgress(progress) } } diff --git a/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/models/AudioExtractConfig.swift b/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/models/AudioExtractConfig.swift index 278e22da..854b6f28 100644 --- a/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/models/AudioExtractConfig.swift +++ b/darwin/pro_video_editor/Sources/pro_video_editor/src/shared/features/audio/models/AudioExtractConfig.swift @@ -19,10 +19,13 @@ struct AudioExtractConfig { /// Optional start time in microseconds for trimming let startUs: Int64? - + /// Optional end time in microseconds for trimming let endUs: Int64? - + + /// Playback speed multiplier (1.0 = original speed, pitch preserved) + let speed: Double + /// Optional output file path (nil = return bytes) let outputPath: String? @@ -41,8 +44,9 @@ struct AudioExtractConfig { let startUs = args["startTime"] as? Int64 let endUs = args["endTime"] as? Int64 + let speed = (args["speed"] as? NSNumber)?.doubleValue ?? 1.0 let outputPath = args["outputPath"] as? String - + return AudioExtractConfig( id: id, inputPath: inputPath, @@ -50,6 +54,7 @@ struct AudioExtractConfig { format: format, startUs: startUs, endUs: endUs, + speed: speed > 0 ? speed : 1.0, outputPath: outputPath ) } diff --git a/example/integration_test/audio_extract_test.dart b/example/integration_test/audio_extract_test.dart index 727089d7..3235a48b 100644 --- a/example/integration_test/audio_extract_test.dart +++ b/example/integration_test/audio_extract_test.dart @@ -140,6 +140,70 @@ void main() { }, skip: skipPlatform || !isFormatSupported(format), ); + + testWidgets( + 'extractAudio with $format at 2x speed produces valid, shorter output', + (tester) async { + if (!isFormatSupported(format)) return; + + final directory = await getTemporaryDirectory(); + final ts = DateTime.now().millisecondsSinceEpoch; + final normalPath = + '${directory.path}/test_audio_1x_$ts.${format.extension}'; + final fastPath = + '${directory.path}/test_audio_2x_$ts.${format.extension}'; + + await pve.extractAudioToFile( + normalPath, + AudioExtractConfigs(video: testVideo, format: format), + ); + await pve.extractAudioToFile( + fastPath, + AudioExtractConfigs(video: testVideo, format: format, speed: 2.0), + ); + + final normalFile = File(normalPath); + final fastFile = File(fastPath); + + expect( + await fastFile.exists(), + isTrue, + reason: 'Sped-up audio file should exist', + ); + + // Extension-based MIME detection (see notes on the base test). + final mimeType = lookupMimeType(fastPath); + final expectedMimeTypes = switch (format) { + AudioFormat.aac => [format.mimeType, 'audio/mp4'], + AudioFormat.wav => [format.mimeType, 'audio/wav'], + _ => [format.mimeType], + }; + expect(expectedMimeTypes, contains(mimeType)); + + expect( + await fastFile.length(), + greaterThan(1000), + reason: 'Sped-up audio should have content (>1KB)', + ); + + // WAV is uncompressed, so 2x speed roughly halves the PCM data. A + // generous bound keeps this robust across sample rates / bit depths. + if (format == AudioFormat.wav) { + final normalSize = await normalFile.length(); + final fastSize = await fastFile.length(); + expect( + fastSize, + lessThan(normalSize * 0.75), + reason: '2x WAV should be markedly smaller than the 1x extraction', + ); + } + + // Clean up + if (await normalFile.exists()) await normalFile.delete(); + if (await fastFile.exists()) await fastFile.delete(); + }, + skip: skipPlatform || !isFormatSupported(format), + ); } testWidgets('extractAudio emits progress updates', (tester) async { diff --git a/example/lib/features/audio/audio_extract_example_page.dart b/example/lib/features/audio/audio_extract_example_page.dart index 15620a16..9356a306 100644 --- a/example/lib/features/audio/audio_extract_example_page.dart +++ b/example/lib/features/audio/audio_extract_example_page.dart @@ -28,6 +28,7 @@ class _AudioExtractExamplePageState extends State { String? _extractedAudioPath; bool _isExtracting = false; AudioFormat _selectedFormat = AudioFormat.mp3; + double _selectedSpeed = 1.0; final String _taskId = 'AudioExtractionTaskId'; final AudioPlayer _audioPlayer = AudioPlayer(); @@ -110,6 +111,7 @@ class _AudioExtractExamplePageState extends State { final config = AudioExtractConfigs( video: EditorVideo.asset(kVideoEditorExampleH264Path), format: _selectedFormat, + speed: _selectedSpeed, // Optional: Add trimming // startTime: Duration(seconds: 5), // endTime: Duration(seconds: 15), @@ -365,6 +367,7 @@ class _AudioExtractExamplePageState extends State { children: [ _AudioExtractionCard( selectedFormat: _selectedFormat, + selectedSpeed: _selectedSpeed, isFormatSupported: _isFormatSupported, isExtracting: _isExtracting, taskId: _taskId, @@ -375,6 +378,9 @@ class _AudioExtractExamplePageState extends State { onFormatChanged: (format) => setState(() { _selectedFormat = format; }), + onSpeedChanged: (speed) => setState(() { + _selectedSpeed = speed; + }), onExtractAudio: _extractAudio, onPlayPause: _playAudio, onSeek: (value) async { @@ -429,6 +435,7 @@ class _AudioExtractExamplePageState extends State { class _AudioExtractionCard extends StatelessWidget { const _AudioExtractionCard({ required this.selectedFormat, + required this.selectedSpeed, required this.isFormatSupported, required this.isExtracting, required this.taskId, @@ -437,6 +444,7 @@ class _AudioExtractionCard extends StatelessWidget { required this.position, required this.duration, required this.onFormatChanged, + required this.onSpeedChanged, required this.onExtractAudio, required this.onPlayPause, required this.onSeek, @@ -444,6 +452,7 @@ class _AudioExtractionCard extends StatelessWidget { }); final AudioFormat selectedFormat; + final double selectedSpeed; final bool Function(AudioFormat) isFormatSupported; final bool isExtracting; final String taskId; @@ -452,11 +461,14 @@ class _AudioExtractionCard extends StatelessWidget { final Duration position; final Duration duration; final ValueChanged onFormatChanged; + final ValueChanged onSpeedChanged; final VoidCallback onExtractAudio; final VoidCallback onPlayPause; final ValueChanged onSeek; final VoidCallback onDelete; + static const List _speedOptions = [0.5, 1.0, 1.5, 2.0]; + String _formatDuration(Duration dur) { String twoDigits(int n) => n.toString().padLeft(2, '0'); final minutes = twoDigits(dur.inMinutes.remainder(60)); @@ -508,6 +520,26 @@ class _AudioExtractionCard extends StatelessWidget { }).toList(), ), const SizedBox(height: 16), + const Text( + 'Speed:', + style: TextStyle(fontSize: 14, fontWeight: FontWeight.w500), + ), + const SizedBox(height: 8), + Wrap( + spacing: 8, + children: _speedOptions.map((speed) { + return ChoiceChip( + label: Text('${speed}x'), + selected: selectedSpeed == speed, + onSelected: isExtracting + ? null + : (selected) { + if (selected) onSpeedChanged(speed); + }, + ); + }).toList(), + ), + const SizedBox(height: 16), SizedBox( width: double.infinity, child: ElevatedButton.icon( diff --git a/lib/core/models/audio/audio_extract_configs_model.dart b/lib/core/models/audio/audio_extract_configs_model.dart index 446b6933..b9fae48c 100644 --- a/lib/core/models/audio/audio_extract_configs_model.dart +++ b/lib/core/models/audio/audio_extract_configs_model.dart @@ -17,14 +17,17 @@ class AudioExtractConfigs { /// [startTime] Optional start time for trimming. If null, starts from /// beginning. /// [endTime] Optional end time for trimming. If null, goes to video end. + /// [speed] Playback speed multiplier for the extracted audio (default `1.0`). /// [id] Unique task identifier. Generated automatically if not provided. AudioExtractConfigs({ required this.video, this.format = AudioFormat.mp3, this.startTime, this.endTime, + this.speed = 1.0, String? id, - }) : id = id ?? DateTime.now().millisecondsSinceEpoch.toString(); + }) : assert(speed > 0, '[speed] must be greater than 0'), + id = id ?? DateTime.now().millisecondsSinceEpoch.toString(); /// The source video to extract audio from. final EditorVideo video; @@ -44,6 +47,15 @@ class AudioExtractConfigs { /// If null, extraction continues to the end of the video. final Duration? endTime; + /// Playback speed multiplier applied to the extracted audio. + /// + /// For example, `0.5` for half speed, `2.0` for double speed. + /// The pitch is preserved (time-stretch), matching the editor's video + /// rendering behavior. + /// + /// **Default**: `1.0` (original speed) + final double speed; + /// Unique identifier for tracking progress of this extraction task. /// /// Used with [ProVideoEditor.progressStreamById] to monitor extraction @@ -57,6 +69,7 @@ class AudioExtractConfigs { 'format': format.name, 'startTime': startTime?.inMicroseconds, 'endTime': endTime?.inMicroseconds, + 'speed': speed, }; } @@ -66,6 +79,7 @@ class AudioExtractConfigs { AudioFormat? format, Duration? startTime, Duration? endTime, + double? speed, String? id, }) { return AudioExtractConfigs( @@ -73,6 +87,7 @@ class AudioExtractConfigs { format: format ?? this.format, startTime: startTime ?? this.startTime, endTime: endTime ?? this.endTime, + speed: speed ?? this.speed, id: id ?? this.id, ); } @@ -86,6 +101,7 @@ class AudioExtractConfigs { other.format == format && other.startTime == startTime && other.endTime == endTime && + other.speed == speed && other.id == id; } @@ -95,12 +111,13 @@ class AudioExtractConfigs { format.hashCode ^ startTime.hashCode ^ endTime.hashCode ^ + speed.hashCode ^ id.hashCode; } @override String toString() { return 'AudioExtractConfigs(video: $video, format: $format, ' - 'startTime: $startTime, endTime: $endTime, id: $id)'; + 'startTime: $startTime, endTime: $endTime, speed: $speed, id: $id)'; } } diff --git a/pubspec.yaml b/pubspec.yaml index daab2119..3bfbfdaf 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -1,6 +1,6 @@ name: pro_video_editor description: "A Flutter video editor: Seamlessly enhance your videos with user-friendly editing features." -version: 1.17.6 +version: 1.18.0 homepage: https://github.com/hm21/pro_video_editor/ repository: https://github.com/hm21/pro_video_editor/ documentation: https://github.com/hm21/pro_video_editor/