Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 21 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,22 @@ Beautiful, modern reports with:
## 🚀 Quick Start

### Installation

#### Requirements

Requires [FFmpeg](https://ffmpeg.org/).

```shell
# Ubuntu/Debian:
sudo apt install ffmpeg

# macOS:
brew install ffmpeg

# Arch Linux:
sudo pacman -S ffmpeg
```

#### PyPi
```bash
# pip
Expand Down Expand Up @@ -247,35 +263,17 @@ Or pipe directly:
drcheck analyze album/ --format csv > results.csv
```

## 🎵 Supported Audio Formats
## 🎵 Supported Audio Codecs

**Lossless (via libsndfile):**
- FLAC (.flac)
- WAV (.wav)
- AIFF (.aiff, .aif, .aifc)
- OGG Vorbis (.ogg, .oga)
- Opus (.opus)

**Lossy (requires pydub + ffmpeg):**
- Vorbis (.ogg, .oga)
- Opus (.opus, .ogg)
- MP3 (.mp3)
- M4A/AAC (.m4a, .mp4, .aac)
- WMA (.wma)

To enable MP3/M4A support:
```bash
# Install pydub
uv pip install pydub

# Install ffmpeg (system package)
# Ubuntu/Debian:
sudo apt install ffmpeg

# macOS:
brew install ffmpeg

# Arch Linux:
sudo pacman -S ffmpeg
```
- ... and any other format supported by your build of `ffmpeg`.

## 🔬 Understanding DR Values

Expand All @@ -292,27 +290,20 @@ The DR (Dynamic Range) scale measures the difference between the loudest and ave
## 📊 Error Handling (v1.1.1)
**Smart Error Messages (v1.1.1):**
When errors occur, DR Check now provides helpful, actionable guidance:
- **Missing MP3/M4A support?** Shows exact install command
- **Corrupted files?** Suggests how to fix them
- **Audio too short?** Explains the 6-second minimum requirement

Examples of intelligent error messages with helpful suggestions:

```
================================================================================
⚠️ Failed to process 3 of 50 file(s)
⚠️ Failed to process 1 of 50 file(s)
================================================================================

📋 Missing Lossy Format Support (2 file(s)):
• song1.mp3
→ Install: pip install drcheck[lossy]
→ Also ensure ffmpeg is installed on your system

📋 Audio Too Short (1 file(s)):
• intro.wav (need at least 6 seconds for DR analysis)

💡 Tips:
• For MP3/M4A support, install: pip install drcheck[lossy]
• DR analysis requires at least 6 seconds of audio
(two 3-second blocks for measurement)
```
Expand Down
185 changes: 35 additions & 150 deletions drcheck/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,74 +5,27 @@

import logging
from dataclasses import dataclass
from importlib.util import find_spec
from pathlib import Path
from typing import Any

import numpy as np
import soundfile as sf
from numpy.typing import NDArray
from pydub import AudioSegment
from tinytag import TinyTag

logger = logging.getLogger(__name__)


def _read_tags(filepath: Path) -> tuple[str | None, str | None]:
"""
Read artist and album tags from audio file.

Args:
filepath: Path to audio file

Returns:
Tuple of (artist, album) or (None, None) if tags cannot be read
"""
try:
from mutagen._file import File

audio = File(filepath, easy=True)

if audio is None:
return None, None

# Try to get artist and album tags
# Different formats use different tag names, but mutagen.File(easy=True) normalizes them
artist = None
album = None

if hasattr(audio, "tags") and audio.tags:
# Easy tags interface (works for most formats)
artist_tags = audio.tags.get("artist", []) or audio.tags.get(
"albumartist", []
)
album_tags = audio.tags.get("album", [])

if artist_tags:
artist = (
artist_tags[0]
if isinstance(artist_tags, list)
else str(artist_tags)
)
if album_tags:
album = (
album_tags[0] if isinstance(album_tags, list) else str(album_tags)
)

return artist, album

except Exception as e:
logger.debug(f"Could not read tags from {filepath}: {e}")
return None, None


@dataclass
class AudioData:
"""Container for decoded audio data and metadata."""

samples: NDArray[np.float32] | None # Audio samples, shape (samples, channels)
samples: np.ndarray[Any, np.dtype[np.float32]] | None # Audio samples, shape (samples, channels)
sample_rate: int
channels: int
duration_seconds: float
filepath: Path
bit_depth: int | None = None # Bits per sample (16, 24, 32, etc.)
bit_rate: int | None = None # Bit rate (kb/s)
format_name: str | None = None # Format/codec name (FLAC, WAV, etc.)
artist: str | None = None # Artist tag
album: str | None = None # Album tag
Expand Down Expand Up @@ -121,7 +74,7 @@ def read_audio_file(filepath: Path | str) -> AudioData:
Read an audio file and return decoded audio data.

Supports formats: FLAC, WAV, OGG, MP3, M4A, AIFF, and others
supported by libsndfile.
supported by ffmpeg.

Args:
filepath: Path to audio file
Expand All @@ -146,46 +99,39 @@ def read_audio_file(filepath: Path | str) -> AudioData:

# Get format name from extension
format_name = filepath.suffix.upper().lstrip(".")
bit_depth = None

# Read metadata tags
artist, album = _read_tags(filepath)

try:
# Read audio file using soundfile (libsndfile backend)
# This handles FLAC, WAV, OGG, and many others natively
samples, sample_rate = sf.read(filepath, dtype="float32", always_2d=True)

# Try to get bit depth from file info
try:
info = sf.info(filepath)
# Map soundfile subtypes to bit depths
subtype_map = {
"PCM_16": 16,
"PCM_24": 24,
"PCM_32": 32,
"FLOAT": 32,
"DOUBLE": 64,
}
bit_depth = subtype_map.get(info.subtype, None)

# Get more accurate format name if available
if hasattr(info, "format"):
format_name = info.format
except Exception:
pass # bit_depth remains None if we can't determine it

except sf.LibsndfileError as e:
# libsndfile couldn't read it - might be MP3 or M4A
logger.debug(f"libsndfile failed, trying alternative decoder: {e}")
samples, sample_rate, bit_depth_fallback = _read_with_fallback(filepath)
if bit_depth is None:
bit_depth = bit_depth_fallback
tag: TinyTag = TinyTag.get(filepath)
artist = tag.artist
album = tag.album
bit_rate = round(tag.bitrate)
sample_rate = tag.samplerate
channels = tag.channels

# Load with pydub (uses ffmpeg)
audio = AudioSegment.from_file(str(filepath))

# Convert to numpy array
samples: np.ndarray[Any, np.dtype[np.float32]] = np.array(audio.get_array_of_samples(), dtype=np.float32)

# Get bit depth
bit_depth = audio.sample_width * 8

# Normalize to [-1.0, 1.0] range
max_val = 2 ** (audio.sample_width * 8 - 1)
samples = samples / max_val

# Reshape for multi-channel
if channels > 1:
samples = samples.reshape((-1, channels))
else:
samples = samples.reshape((-1, 1))

logger.debug(f"Decoded with pydub/ffmpeg: {filepath.name}")

except Exception as e:
raise AudioReadError(f"Failed to read audio file {filepath}: {e}") from e

channels = samples.shape[1]
duration = len(samples) / sample_rate

logger.info(
Expand All @@ -198,83 +144,22 @@ def read_audio_file(filepath: Path | str) -> AudioData:
channels=channels,
duration_seconds=duration,
filepath=filepath,
bit_rate=bit_rate,
bit_depth=bit_depth,
format_name=format_name,
artist=artist,
album=album,
)


def _read_with_fallback(filepath: Path) -> tuple[NDArray[np.floating], int, int | None]:
"""
Fallback reader for formats not supported by libsndfile (MP3, M4A).

Uses pydub with ffmpeg backend for decoding.

Args:
filepath: Path to audio file

Returns:
Tuple of (samples, sample_rate, bit_depth)

Raises:
UnsupportedFormatError: If format cannot be decoded
"""
try:
from pydub import AudioSegment
except ImportError:
raise UnsupportedFormatError(
f"Cannot read {filepath.suffix} files. "
"Install pydub and ffmpeg: pip install pydub"
)

try:
# Load with pydub (uses ffmpeg)
audio = AudioSegment.from_file(str(filepath))

# Convert to numpy array
samples = np.array(audio.get_array_of_samples(), dtype=np.float32)

# Get bit depth
bit_depth = audio.sample_width * 8

# Normalize to [-1.0, 1.0] range
max_val = 2 ** (audio.sample_width * 8 - 1)
samples = samples / max_val

# Reshape for multi-channel
if audio.channels > 1:
samples = samples.reshape((-1, audio.channels))
else:
samples = samples.reshape((-1, 1))

sample_rate = audio.frame_rate

logger.debug(f"Decoded with pydub/ffmpeg: {filepath.name}")
return samples, sample_rate, bit_depth

except Exception as e:
raise UnsupportedFormatError(f"Cannot decode {filepath}: {e}") from e


def get_supported_extensions() -> set[str]:
"""
Get set of supported audio file extensions.

Returns:
Set of lowercase file extensions (including the dot)
"""
# Core formats supported by libsndfile
core_formats = {".flac", ".wav", ".aiff", ".aif", ".aifc", ".ogg", ".oga", ".opus"}

# Formats requiring pydub/ffmpeg
extended_formats = {".mp3", ".m4a", ".mp4", ".aac", ".wma"}

if find_spec("pydub") is not None:
return core_formats | extended_formats
else:
logger.debug("pydub not available, extended formats disabled")
return core_formats
return {".mp3", ".m4a", ".mp4", ".aac", ".wma", ".flac", ".wav", ".aiff", ".aif", ".aifc", ".ogg", ".oga", ".opus"}


def is_supported_file(filepath: Path | str) -> bool:
Expand Down
Loading