Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 1.18.0
- **FEAT**(android, iOS, macOS): Add `speed` to `AudioExtractConfigs` for pitch-preserving playback-speed changes during audio extraction (e.g. `2.0` for double speed, `0.5` for half). WAV applies the time-stretch directly on the decoded PCM (Android `SonicAudioProcessor`; AVFoundation `AVAssetReaderAudioMixOutput` with the spectral time-pitch algorithm), while compressed formats are re-encoded — Android via a Media3 `Transformer`, Darwin via `AVAssetExportPresetAppleM4A` with a scaled time range. Note: on Apple platforms a non-`1.0` speed re-encodes to AAC/M4A, so `caf` output falls back to M4A content.

## 1.17.6
- **FIX**(iOS, macOS): Fix unstable/glitchy audio on reversed clips inside multi-clip timelines. `AudioReverser` now preserves the source audio format (sample rate and channel count) instead of forcing 44.1 kHz stereo, so a reversed clip's audio matches its untouched neighbours and AVFoundation no longer has to reconcile mixed formats within a single composition audio track during looped playback. The reversed segment is also written as an AVFoundation-authored CAF/PCM file (replacing the hand-authored WAV) and inserted at its decoded duration without padding, avoiding encoder priming artifacts and audio-timing drift in the exported video.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,28 @@ import android.media.MediaCodec
import android.media.MediaExtractor
import android.media.MediaFormat
import android.media.MediaMuxer
import android.net.Uri
import android.os.Handler
import android.os.Looper
import androidx.media3.common.MediaItem
import androidx.media3.common.MimeTypes
import androidx.media3.common.audio.AudioProcessor
import androidx.media3.common.audio.SonicAudioProcessor
import androidx.media3.common.util.UnstableApi
import androidx.media3.transformer.Composition
import androidx.media3.transformer.EditedMediaItem
import androidx.media3.transformer.Effects
import androidx.media3.transformer.ExportException
import androidx.media3.transformer.ExportResult
import androidx.media3.transformer.ProgressHolder
import androidx.media3.transformer.Transformer
import ch.waio.pro_video_editor.src.features.audio.models.AudioExtractConfig
import ch.waio.pro_video_editor.src.features.audio.models.AudioExtractJobHandle
import ch.waio.pro_video_editor.src.shared.logging.PluginLog as Log
import java.io.File
import java.nio.ByteBuffer
import java.util.concurrent.atomic.AtomicBoolean
import java.util.concurrent.atomic.AtomicReference

/**
* Exception thrown when no audio track is found in the video file.
Expand All @@ -31,7 +45,13 @@ class NoAudioTrackException(message: String) : Exception(message)
*
* For WAV format, uses custom WAV file writer with PCM encoding.
* For other formats, uses Android MediaExtractor and MediaMuxer.
*
* When a playback speed other than 1.0 is requested, compressed formats are
* re-encoded through a Media3 [Transformer] (with [SonicAudioProcessor] for a
* pitch-preserving time-stretch), while WAV applies the speed change directly
* on the decoded PCM in [WavFileWriter].
*/
@UnstableApi
class ExtractAudio(private val context: Context) {

companion object {
Expand All @@ -58,12 +78,18 @@ class ExtractAudio(private val context: Context) {
onComplete: (ByteArray?) -> Unit,
onError: (Throwable) -> Unit
): AudioExtractJobHandle {
return if (config.format.lowercase() == "wav") {
// WAV format requires special handling
extractToWav(config, onProgress, onComplete, onError)
} else {
// Use MediaMuxer for other formats
extractWithMuxer(config, onProgress, onComplete, onError)
val applySpeed = config.speed > 0f && config.speed != 1.0f
return when {
// WAV applies the speed change on the decoded PCM (see WavFileWriter).
config.format.lowercase() == "wav" ->
extractToWav(config, onProgress, onComplete, onError)
// Compressed formats can't be re-timed by copying samples, so a
// speed change requires re-encoding through Media3 Transformer.
applySpeed ->
extractWithSpeed(config, onProgress, onComplete, onError)
// Fast path: copy the compressed audio samples unchanged.
else ->
extractWithMuxer(config, onProgress, onComplete, onError)
}
}

Expand Down Expand Up @@ -154,8 +180,9 @@ class ExtractAudio(private val context: Context) {
throw IllegalArgumentException("endUs must be greater than startUs")
}

// Create WAV writer (handles both compressed and PCM audio)
wavWriter = WavFileWriter(outputFile)
// Create WAV writer (handles both compressed and PCM audio,
// and applies a pitch-preserving speed change when requested)
wavWriter = WavFileWriter(outputFile, config.speed)

mainHandler.post { onProgress(0.0) }

Expand Down Expand Up @@ -430,6 +457,169 @@ class ExtractAudio(private val context: Context) {
}
}

/**
* Extracts audio while applying a pitch-preserving playback speed change.
*
* Compressed output formats (AAC/M4A/MP3) can't be re-timed by simply
* copying samples, so the audio is re-encoded with a Media3 [Transformer].
* A [SonicAudioProcessor] performs the time-stretch and the result is muxed
* into an MP4/M4A container (matching the muxer path's output container).
*/
private fun extractWithSpeed(
config: AudioExtractConfig,
onProgress: (Double) -> Unit,
onComplete: (ByteArray?) -> Unit,
onError: (Throwable) -> Unit
): AudioExtractJobHandle {
val mainHandler = Handler(Looper.getMainLooper())
val shouldStopPolling = AtomicBoolean(false)
val canceled = AtomicBoolean(false)
val transformerRef = AtomicReference<Transformer?>(null)

val outputFile = if (config.outputPath != null) {
File(config.outputPath)
} else {
File(
context.cacheDir,
"audio_output_${System.currentTimeMillis()}.${config.getExtension()}"
)
}

fun cleanupOnFailure() {
if (config.outputPath == null && outputFile.exists()) {
outputFile.delete()
}
}

// Run the audio-track pre-check off the main thread, then start the
// Transformer (which must run on a thread with a Looper) on the main
// thread, mirroring the render pipeline.
Thread {
var probe: MediaExtractor? = null
val hasAudio: Boolean
try {
probe = MediaExtractor()
probe.setDataSource(config.inputPath)
hasAudio = findAudioTrack(probe) >= 0
} catch (e: Exception) {
mainHandler.post { onError(e) }
return@Thread
} finally {
try {
probe?.release()
} catch (e: Exception) {
Log.w(TAG, "Error releasing probe extractor: ${e.message}")
}
}

if (!hasAudio) {
mainHandler.post {
onError(NoAudioTrackException("No audio track found in video file"))
}
return@Thread
}

mainHandler.post {
if (canceled.get()) return@post
try {
val mediaItemBuilder = MediaItem.Builder()
.setUri(Uri.fromFile(File(config.inputPath)))

if (config.startUs != null || config.endUs != null) {
val clipping = MediaItem.ClippingConfiguration.Builder().apply {
config.startUs?.let { setStartPositionUs(it) }
config.endUs?.let { setEndPositionUs(it) }
}.build()
mediaItemBuilder.setClippingConfiguration(clipping)
}

val sonic = SonicAudioProcessor().apply { setSpeed(config.speed) }
val editedMediaItem = EditedMediaItem.Builder(mediaItemBuilder.build())
.setRemoveVideo(true)
.setEffects(Effects(listOf<AudioProcessor>(sonic), emptyList()))
.build()

val transformer = Transformer.Builder(context)
.setAudioMimeType(MimeTypes.AUDIO_AAC)
.addListener(object : Transformer.Listener {
override fun onCompleted(
composition: Composition,
result: ExportResult
) {
shouldStopPolling.set(true)
onProgress(1.0)
try {
if (config.outputPath != null) {
onComplete(null)
} else {
onComplete(outputFile.readBytes())
}
} catch (e: Exception) {
onError(e)
} finally {
if (config.outputPath == null) outputFile.delete()
}
}

override fun onError(
composition: Composition,
result: ExportResult,
exception: ExportException
) {
shouldStopPolling.set(true)
cleanupOnFailure()
onError(exception)
}
})
.build()

transformerRef.set(transformer)
if (canceled.get()) {
transformer.cancel()
cleanupOnFailure()
return@post
}

// Transformer fails if the destination already exists.
if (outputFile.exists()) outputFile.delete()

onProgress(0.0)
transformer.start(editedMediaItem, outputFile.absolutePath)

// Poll for progress until the export finishes.
val progressHolder = ProgressHolder()
mainHandler.post(object : Runnable {
override fun run() {
if (shouldStopPolling.get()) return
val progressState = transformer.getProgress(progressHolder)
if (progressHolder.progress >= 0) {
onProgress(progressHolder.progress / 100.0)
}
if (!shouldStopPolling.get() &&
progressState != Transformer.PROGRESS_STATE_NOT_STARTED
) {
mainHandler.postDelayed(this, 200)
}
}
})
} catch (e: Exception) {
Log.e(TAG, "Error extracting audio with speed: ${e.message}", e)
cleanupOnFailure()
onError(e)
}
}
}.start()

return AudioExtractJobHandle {
canceled.set(true)
shouldStopPolling.set(true)
mainHandler.post {
transformerRef.get()?.cancel()
cleanupOnFailure()
}
}
}

/**
* Finds the first audio track in the media file.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package ch.waio.pro_video_editor.src.features.audio

import androidx.media3.common.C
import androidx.media3.common.audio.AudioProcessor
import androidx.media3.common.audio.SonicAudioProcessor
import androidx.media3.common.util.UnstableApi
import java.io.ByteArrayOutputStream
import java.nio.ByteBuffer
import java.nio.ByteOrder

/**
* Applies a pitch-preserving playback speed change to a stream of 16-bit PCM
* audio using Media3's [SonicAudioProcessor].
*
* The processor is fed interleaved little-endian 16-bit PCM via [process] and
* returns the time-stretched output. [drain] must be called once at the end of
* the stream to flush any samples buffered internally by Sonic.
*
* This mirrors the speed handling used in the video render pipeline
* (see `applyPlaybackSpeed`), keeping audio extraction consistent: a value of
* `2.0` plays twice as fast while keeping the original pitch.
*/
@UnstableApi
class PcmSpeedProcessor(
speed: Float,
sampleRate: Int,
channelCount: Int
) {
private val sonic = SonicAudioProcessor().apply {
setSpeed(speed)
// Pitch is intentionally left at 1.0 so only the duration changes.
configure(
AudioProcessor.AudioFormat(sampleRate, channelCount, C.ENCODING_PCM_16BIT)
)
// SonicAudioProcessor only overrides flush(StreamMetadata); the no-arg
// flush() default throws, so call the StreamMetadata overload directly.
flush(AudioProcessor.StreamMetadata.DEFAULT)
}

/**
* Feeds a chunk of 16-bit PCM and returns whatever output is currently
* available (may be empty while Sonic buffers internally).
*/
fun process(pcm: ByteArray): ByteArray {
sonic.queueInput(ByteBuffer.wrap(pcm).order(ByteOrder.LITTLE_ENDIAN))
return collectOutput()
}

/**
* Signals end-of-stream and returns any remaining buffered output.
*/
fun drain(): ByteArray {
sonic.queueEndOfStream()
val output = ByteArrayOutputStream()
output.write(collectOutput())
while (!sonic.isEnded) {
val chunk = collectOutput()
if (chunk.isEmpty()) break
output.write(chunk)
}
sonic.reset()
return output.toByteArray()
}

private fun collectOutput(): ByteArray {
val output = ByteArrayOutputStream()
var buffer = sonic.output
while (buffer.hasRemaining()) {
val chunk = ByteArray(buffer.remaining())
buffer.get(chunk)
output.write(chunk)
buffer = sonic.output
}
return output.toByteArray()
}
}
Loading
Loading