diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cd6a3fb..11eaad9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -163,6 +163,12 @@ jobs: examples/mnist_cnn/data/raw key: datasets-raw-${{ hashFiles('examples/har_classifier/prepare_data.py', 'examples/ecg_anomaly_ae/prepare_data.py', 'examples/mnist_mlp/prepare_data.py', 'examples/mnist_cnn/prepare_data.py') }} + - name: Cache SpeechCommands raw download (shared, ~2.3 GB) + uses: actions/cache@v4 + with: + path: examples/_shared/data/speech_commands + key: speechcommands-raw-${{ hashFiles('examples/_shared/speechcommands_data.py') }} + - name: Prepare HAR data run: uv run examples/har_classifier/prepare_data.py @@ -187,6 +193,20 @@ jobs: - name: Train PyTorch MNIST CNN (produces reference predictions + weights) run: uv run examples/mnist_cnn/train_pytorch.py + - name: Cache kws_mfcc processed data (6-class) + id: kws-mfcc-cache + uses: actions/cache@v4 + with: + path: examples/kws_mfcc/data/6class + key: kws-mfcc-6class-${{ hashFiles('examples/kws_mfcc/prepare_data.py', 'examples/_shared/speechcommands_data.py') }} + + - name: Prepare kws_mfcc data (6-class; only on cache miss) + if: steps.kws-mfcc-cache.outputs.cache-hit != 'true' + run: uv run examples/kws_mfcc/prepare_data.py + + - name: Train PyTorch kws_mfcc (produces reference predictions + weights) + run: uv run examples/kws_mfcc/train_pytorch.py + - name: Configure run: cmake --preset examples @@ -238,6 +258,16 @@ jobs: --c examples/mnist_cnn/outputs/c_predictions.npy \ --dtype int32 + - name: Run kws_mfcc in BIT_PARITY mode + run: BIT_PARITY=1 build/examples/examples/kws_mfcc/train_c_kws_mfcc + + - name: Diff kws_mfcc predictions (int32, exact match required) + run: | + uv run examples/_shared/compare_predictions.py \ + --pytorch examples/kws_mfcc/outputs/6class/pytorch_predictions.npy \ + --c examples/kws_mfcc/outputs/6class/c_predictions.npy \ + --dtype int32 + python-test: runs-on: ubuntu-latest diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 0eb6d73..d34dd41 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -3,3 +3,4 @@ add_subdirectory(har_classifier) add_subdirectory(ecg_anomaly_ae) add_subdirectory(mnist_mlp) add_subdirectory(mnist_cnn) +add_subdirectory(kws_mfcc) diff --git a/examples/_shared/speechcommands_data.py b/examples/_shared/speechcommands_data.py new file mode 100644 index 0000000..4cb301e --- /dev/null +++ b/examples/_shared/speechcommands_data.py @@ -0,0 +1,153 @@ +"""Shared SpeechCommands loader for the kws_mfcc and kws_raw examples. + +Wraps torchaudio.datasets.SPEECHCOMMANDS (v0.02) so both KWS examples download +the ~2.3 GB corpus once into a shared raw root and deliver identical waveform +arrays. Output is the native 16 kHz mono waveform (float32 in [-1, 1], the range +torchaudio yields from the int16 PCM), pad/truncated to exactly 16000 samples. +Feature extraction (MFCC) and downsampling are the model's job, not the loader's, +per the repo's data-shape convention. + + load_speechcommands(root, num_classes) -> dict + num_classes in {6, 35} + returns {"train": (x, y), "val": (x, y), "test": (x, y)} + x: float32 [N, 1, 16000] + y: int32 [N] (0..num_classes-1) + +6-class config (labels 0..5, fixed order): + 0 yes 1 no 2 up 3 down + 4 silence -- synthetic low-amplitude Gaussian noise (fixed per-split seed) + 5 unknown -- random clips drawn from the other 31 keywords (fixed per-split seed) +35-class config (labels 0..34): the 35 natural keywords, alphabetical. No synthetic classes. +""" +from __future__ import annotations + +import wave +from pathlib import Path + +import numpy as np +from torchaudio.datasets import SPEECHCOMMANDS + +SAMPLE_RATE = 16000 +CLIP_LEN = 16000 # 1 s +KEYWORDS_6 = ["yes", "no", "up", "down"] +SILENCE_STD = 0.05 +SHUFFLE_SEED = 42 # mirrors examples/_shared/seeds.py; kept local to avoid an import cycle +_SUBSETS = {"train": "training", "val": "validation", "test": "testing"} + + +def _fix_length(wav: np.ndarray) -> np.ndarray: + """Pad with zeros / truncate a mono waveform to exactly CLIP_LEN samples.""" + n = wav.shape[0] + if n == CLIP_LEN: + return wav + if n > CLIP_LEN: + return wav[:CLIP_LEN] + out = np.zeros(CLIP_LEN, dtype=np.float32) + out[:n] = wav + return out + + +def _read_wav_int16(path) -> np.ndarray: + """Read a 16 kHz mono 16-bit PCM .wav as float32 in [-1, 1] (stdlib only). + + torchaudio 2.11 (maintenance mode) routes its dataset decode through + torchcodec, which needs a system FFmpeg. We sidestep that with the stdlib + `wave` reader the spec blessed as the fallback: int16 PCM / 32768 reproduces + exactly what torchaudio/torchcodec would yield from these clips. + """ + with wave.open(str(path), "rb") as w: + assert w.getnchannels() == 1 and w.getsampwidth() == 2, ( + f"{path}: expected mono 16-bit PCM, got " + f"{w.getnchannels()}ch/{w.getsampwidth() * 8}bit (int16/32768 decode would be wrong)" + ) + frames = w.readframes(w.getnframes()) + return np.frombuffer(frames, dtype=np.int16).astype(np.float32) / 32768.0 + + +def _paths_by_label(ds) -> dict[str, list[Path]]: + """Map each label string to its list of absolute .wav paths for a subset. + + Uses ds.get_metadata (which does NOT decode audio, so no torchcodec / FFmpeg + dependency); the metadata path is relative to ds._archive (pinned to + torchaudio 2.11's SPEECHCOMMANDS layout). Returning paths instead of decoded + waveforms lets the 6-class build decode only the clips it keeps, bounding + peak memory (the CI runner has ~7 GB; decoding all 35 words would exceed it). + """ + by_label: dict[str, list[Path]] = {} + archive = Path(ds._archive) + for i in range(len(ds)): + relpath, sample_rate, label, *_ = ds.get_metadata(i) + assert sample_rate == SAMPLE_RATE, sample_rate + by_label.setdefault(label, []).append(archive / relpath) + return by_label + + +def _decode(paths: list[Path]) -> list[np.ndarray]: + """Decode + length-fix a list of .wav paths to float32 [16000] waveforms.""" + return [_fix_length(_read_wav_int16(p)) for p in paths] + + +def _stack(clips: list[np.ndarray], label_id: int) -> tuple[np.ndarray, np.ndarray]: + x = np.stack(clips).astype(np.float32)[:, None, :] # [N, 1, 16000] + y = np.full((x.shape[0],), label_id, dtype=np.int32) + return x, y + + +def _build_split_6(paths_by_label, split_index: int) -> tuple[np.ndarray, np.ndarray]: + xs, ys = [], [] + for label_id, kw in enumerate(KEYWORDS_6): + x, y = _stack(_decode(paths_by_label.get(kw, [])), label_id) + xs.append(x) + ys.append(y) + n_per = int(round(np.mean([len(paths_by_label.get(kw, [])) for kw in KEYWORDS_6]))) + + rng = np.random.default_rng(SHUFFLE_SEED + split_index) + # silence (label 4): synthetic low-amplitude Gaussian noise + silence = rng.normal(0.0, SILENCE_STD, size=(n_per, CLIP_LEN)).astype(np.float32) + silence = np.clip(silence, -1.0, 1.0) + xs.append(silence[:, None, :]) + ys.append(np.full((n_per,), 4, dtype=np.int32)) + # unknown (label 5): random draw of paths from the other 31 keywords in THIS + # split, decoding only the selected clips (memory-bounded). + pool = [p for lab, ps in paths_by_label.items() if lab not in KEYWORDS_6 for p in ps] + idx = rng.choice(len(pool), size=min(n_per, len(pool)), replace=False) + unknown = np.stack(_decode([pool[i] for i in idx])).astype(np.float32) + xs.append(unknown[:, None, :]) + ys.append(np.full((unknown.shape[0],), 5, dtype=np.int32)) + + return np.concatenate(xs, axis=0), np.concatenate(ys, axis=0) + + +def _build_split_35(paths_by_label, keywords_35) -> tuple[np.ndarray, np.ndarray]: + xs, ys = [], [] + for label_id, kw in enumerate(keywords_35): + paths = paths_by_label.get(kw, []) + if not paths: + continue + x, y = _stack(_decode(paths), label_id) + xs.append(x) + ys.append(y) + return np.concatenate(xs, axis=0), np.concatenate(ys, axis=0) + + +def load_speechcommands(root, num_classes: int) -> dict: + assert num_classes in (6, 35), num_classes + root = Path(root) + root.mkdir(parents=True, exist_ok=True) + + grouped = {} + for split, subset in _SUBSETS.items(): + ds = SPEECHCOMMANDS(root=str(root), download=True, subset=subset) + grouped[split] = _paths_by_label(ds) + + if num_classes == 35: + keywords_35 = sorted({lab for g in grouped.values() for lab in g}) + assert len(keywords_35) == 35, (len(keywords_35), keywords_35) + + out = {} + for split_index, split in enumerate(("train", "val", "test")): + if num_classes == 6: + out[split] = _build_split_6(grouped[split], split_index) + else: + out[split] = _build_split_35(grouped[split], keywords_35) + return out diff --git a/examples/kws_mfcc/CMakeLists.txt b/examples/kws_mfcc/CMakeLists.txt new file mode 100644 index 0000000..42ce7b3 --- /dev/null +++ b/examples/kws_mfcc/CMakeLists.txt @@ -0,0 +1,65 @@ +add_executable(train_c_kws_mfcc train_c.c) + +target_link_libraries(train_c_kws_mfcc PRIVATE + DataLoaderApi + DataLoader + NPYLoaderApi + NPYLoader + + Layer + + Conv1dApi + Conv1d + + LinearApi + Linear + + ReluApi + Relu + + FlattenApi + Flatten + + Pool1dApi + MaxPool1d + AvgPool1d + + AdaptivePool1dApi + AdaptiveAvgPool1d + + QuantizationApi + Quantization + + TensorApi + Tensor + Rounding + + TrainingLoopApi + CalculateGradsSequential + TrainingBatchDefault + TrainingEpochDefault + Optimizer + + LossFunction + CrossEntropy + + SoftmaxApi + Softmax + + Sgd + SgdApi + + InferenceApi + + StateDictApi + LayerWeightsApi + LayerQuant + LayerCommon + Distributions + + Common + StorageApi + RNG + + examples_shared +) diff --git a/examples/kws_mfcc/README.md b/examples/kws_mfcc/README.md new file mode 100644 index 0000000..69bda41 --- /dev/null +++ b/examples/kws_mfcc/README.md @@ -0,0 +1,54 @@ +# KWS MFCC — PyTorch + C Parity Demo + +Trains a small 1D-CNN keyword-spotter on Google SpeechCommands MFCC features in +both PyTorch (reference) and the ODT C framework. Stage 3 of the 1D-CNN example +suite. Each 1 s clip → log-MFCC `[40, 32]` (40 mel-cepstra × 32 frames); MFCC is +computed once in `prepare_data.py` so PyTorch and C read **identical** `.npy` — +feature extraction sits outside the parity check. + +One binary, two verification modes — **bit-parity** (`BIT_PARITY=1`, the exact CI +gate: loads PyTorch's trained weights and runs inference only; C predictions must +be bit-identical) and a **train-from-scratch** informational demo (independent +random init; `compare.py` checks convergence within tolerance + emits plots). + +## Class-count knob + +`KWS_CLASSES` (default **6**) selects the subset. CI runs **6-class only**; 35 is +local-only. Per-config artifacts live under `class/` subdirs. + +- **6-class** (labels 0..5): `yes`, `no`, `up`, `down`, `silence` (synthetic + low-amplitude Gaussian noise), `unknown` (random clips from the other 31 keywords). +- **35-class**: the 35 natural keywords, alphabetical. + +## Run it (6-class) + +```bash +uv run python examples/kws_mfcc/prepare_data.py # downloads ~2.3 GB once (shared root) +uv run python examples/kws_mfcc/train_pytorch.py +cmake --preset examples +cmake --build --preset examples --target train_c_kws_mfcc + +# Bit-parity (exact — the CI gate) +BIT_PARITY=1 ./build/examples/examples/kws_mfcc/train_c_kws_mfcc +uv run python examples/_shared/compare_predictions.py \ + --pytorch examples/kws_mfcc/outputs/6class/pytorch_predictions.npy \ + --c examples/kws_mfcc/outputs/6class/c_predictions.npy --dtype int32 + +# …or the train-from-scratch demo + plots (SLOW — C trains one sample at a time) +./build/examples/examples/kws_mfcc/train_c_kws_mfcc +uv run python examples/kws_mfcc/compare.py +``` + +Run the full 35-class set with `KWS_CLASSES=35 …` on every command (local-only). + +## Model + +- Input: `[40, 32]` (40 MFCC channels, 32 frames) → `reshapeItemsAddBatchDim` → `[1, 40, 32]` +- `Conv1d(40→32,K3,SAME) → ReLU → MaxPool(2) → Conv1d(32→64,K3,SAME) → ReLU → + MaxPool(2) → AdaptiveAvgPool1d(1) → Flatten → Linear(64→C) → Softmax → CE` +- Lengths: 32 → 16 → 8 → 1; ~16 K params +- State-dict layers: `conv1`, `conv2`, `fc` + +The train-from-scratch tolerances (`test_acc ±2.5 pp`, `test_loss ±0.15 nats`) are +informational; bit-parity mode requires exact equality. See +`examples/_shared/DETERMINISM.md` for the determinism contract. diff --git a/examples/kws_mfcc/compare.py b/examples/kws_mfcc/compare.py new file mode 100644 index 0000000..aed9da3 --- /dev/null +++ b/examples/kws_mfcc/compare.py @@ -0,0 +1,88 @@ +"""Compare PyTorch and C runs of the kws_mfcc classifier. + +Reads logs/class/{pytorch,c}.json and outputs/class/{pytorch,c}_predictions.npy. +Writes plots into plots/class/. Prints a final-state parity report within tolerances. +INFORMATIONAL only — the bit-parity check (compare_predictions.py) is the gate. +""" +from __future__ import annotations + +import os +import sys +from pathlib import Path + +import numpy as np + +REPO_ROOT = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(REPO_ROOT)) + +from examples._shared.log_schema import load_log # noqa: E402 +from examples._shared.parity import ParityCheck, run_parity_checks # noqa: E402 +from examples._shared.plotting import ( # noqa: E402 + plot_accuracy_curves, + plot_confusion_matrix, + plot_loss_curves, +) + +HERE = Path(__file__).resolve().parent +NUM_CLASSES = int(os.environ.get("KWS_CLASSES", "6")) +assert NUM_CLASSES in (6, 35), NUM_CLASSES +TAG = f"{NUM_CLASSES}class" +LOGS = HERE / "logs" / TAG +OUTPUTS = HERE / "outputs" / TAG +PLOTS = HERE / "plots" / TAG +DATA = HERE / "data" / TAG + +CLASS_NAMES = ( + ["yes", "no", "up", "down", "silence", "unknown"] + if NUM_CLASSES == 6 + else [str(i) for i in range(NUM_CLASSES)] +) + +CHECKS = [ + ParityCheck("test_acc", abs_tol=0.025), # ±2.5 pp + ParityCheck("test_loss", abs_tol=0.15), # ±0.15 nats (informational) +] + + +def confusion_matrix(preds: np.ndarray, labels: np.ndarray, num_classes: int) -> np.ndarray: + cm = np.zeros((num_classes, num_classes), dtype=np.int64) + for p, a in zip(preds, labels): + cm[int(p), int(a)] += 1 + return cm + + +def main() -> int: + PLOTS.mkdir(parents=True, exist_ok=True) + pt = load_log(LOGS / "pytorch.json") + c = load_log(LOGS / "c.json") + + plot_loss_curves(PLOTS / "loss_curves.png", pt, c) + plot_accuracy_curves(PLOTS / "accuracy_curves.png", pt, c) + + test_y = np.load(DATA / "test_y.npy") + pt_pred = np.load(OUTPUTS / "pytorch_predictions.npy") + c_pred = np.load(OUTPUTS / "c_predictions.npy") + cm_pt = confusion_matrix(pt_pred, test_y, len(CLASS_NAMES)) + cm_c = confusion_matrix(c_pred, test_y, len(CLASS_NAMES)) + plot_confusion_matrix(PLOTS / "confusion_matrix_pt.png", cm_pt, CLASS_NAMES, "PyTorch KWS MFCC") + plot_confusion_matrix(PLOTS / "confusion_matrix_c.png", cm_c, CLASS_NAMES, "C KWS MFCC") + + pt_finals = pt["final"] + c_finals = c["final"] + overall_pass, results = run_parity_checks( + CHECKS, + {"test_acc": pt_finals["test_acc"], "test_loss": pt_finals["test_loss"]}, + {"test_acc": c_finals["test_acc"], "test_loss": c_finals["test_loss"]}, + ) + + print("\nParity report (PyTorch vs C) — INFORMATIONAL:") + print(f"{'metric':<14} {'pt':>10} {'c':>10} {'diff':>10} {'tol':>8} {'type':>5} {'pass':>6}") + for r in results: + print(f"{r.metric:<14} {r.pt_value:>10.5f} {r.c_value:>10.5f} {r.diff:>10.5f} " + f"{r.tolerance:>8.4f} {r.tolerance_type:>5} {str(r.passed):>6}") + print(f"\nOverall: {'PASS' if overall_pass else 'FAIL'} (informational; not a CI gate)") + return 0 if overall_pass else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/examples/kws_mfcc/prepare_data.py b/examples/kws_mfcc/prepare_data.py new file mode 100644 index 0000000..0549c6f --- /dev/null +++ b/examples/kws_mfcc/prepare_data.py @@ -0,0 +1,68 @@ +"""Prepare SpeechCommands MFCC features for the kws_mfcc example. + +For each clip: log-MFCC via torchaudio (n_mfcc=40, n_fft=400, hop=512, n_mels=40) +over the native 16 kHz waveform -> [40, 32] frames (T=32 exact, no trim). + +Output (under examples/kws_mfcc/data/class/, n = KWS_CLASSES in {6,35}, default 6): + {train,val,test}_x.npy [N,40,32] f32 + {train,val,test}_y.npy [N] i32 (0..n-1) +""" +from __future__ import annotations + +import os +import sys +from pathlib import Path + +import numpy as np +import torch +from torchaudio.transforms import MFCC + +REPO_ROOT = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(REPO_ROOT)) +from examples._shared.speechcommands_data import load_speechcommands # noqa: E402 + +HERE = Path(__file__).resolve().parent +RAW_ROOT = REPO_ROOT / "examples" / "_shared" / "data" / "speech_commands" +N_MFCC = 40 +T_FRAMES = 32 + + +def _mfcc_features(x: np.ndarray) -> np.ndarray: + """x: [N,1,16000] f32 waveform -> [N,40,32] f32 MFCC (frame axis fixed to 32).""" + mfcc = MFCC( + sample_rate=16000, + n_mfcc=N_MFCC, + melkwargs={"n_fft": 400, "hop_length": 512, "n_mels": N_MFCC}, + ) + feats = np.empty((x.shape[0], N_MFCC, T_FRAMES), dtype=np.float32) + with torch.no_grad(): + for i in range(x.shape[0]): + m = mfcc(torch.from_numpy(x[i])) # [1,40,frames] + m = m.squeeze(0).numpy().astype(np.float32) # [40,frames] + if m.shape[1] >= T_FRAMES: + m = m[:, :T_FRAMES] + else: + pad = np.zeros((N_MFCC, T_FRAMES), dtype=np.float32) + pad[:, : m.shape[1]] = m + m = pad + feats[i] = m + return feats + + +def main() -> None: + num_classes = int(os.environ.get("KWS_CLASSES", "6")) + assert num_classes in (6, 35), num_classes + data_dir = HERE / "data" / f"{num_classes}class" + data_dir.mkdir(parents=True, exist_ok=True) + + splits = load_speechcommands(RAW_ROOT, num_classes) + for split in ("train", "val", "test"): + x_wav, y = splits[split] + x = _mfcc_features(x_wav) + np.save(data_dir / f"{split}_x.npy", x) + np.save(data_dir / f"{split}_y.npy", y.astype(np.int32)) + print(f"{split}: x={x.shape} y={y.shape} classes={num_classes}", flush=True) + + +if __name__ == "__main__": + main() diff --git a/examples/kws_mfcc/train_c.c b/examples/kws_mfcc/train_c.c new file mode 100644 index 0000000..2b0c81c --- /dev/null +++ b/examples/kws_mfcc/train_c.c @@ -0,0 +1,408 @@ +#define SOURCE_FILE "kws_mfcc_train_c" + +#include +#include +#include +#include +#include +#include +#include + +#include "AdaptivePool1dApi.h" +#include "CalculateGradsSequential.h" +#include "Common.h" +#include "Conv1dApi.h" +#include "DataLoader.h" +#include "DataLoaderApi.h" +#include "FlattenApi.h" +#include "InferenceApi.h" +#include "Layer.h" +#include "LayerCommon.h" +#include "LayerQuant.h" +#include "LinearApi.h" +#include "LossFunction.h" +#include "NPYLoaderApi.h" +#include "Pool1dApi.h" +#include "Quantization.h" +#include "QuantizationApi.h" +#include "ReluApi.h" +#include "SgdApi.h" +#include "SoftmaxApi.h" +#include "StateDictApi.h" +#include "StorageApi.h" +#include "Tensor.h" +#include "TensorApi.h" +#include "TrainingLoopApi.h" + +#include "npy_writer.h" + +#define EPOCHS 15 +#define BATCH 32 +#define LR 0.001f +#define MOMENTUM 0.9f +#define SEED 42 +#define SHUFFLE_SEED 42 +#define NUM_CLASSES_DEFAULT 6 + +#define IN_CHANNELS 40 +#define LEN_INPUT 32 +#define C1_OUT 32 +#define C1_K 3 +#define C2_OUT 64 +#define C2_K 3 + +/* 2x(Conv1d+ReLU+MaxPool) + AdaptiveAvgPool + Flatten + Linear + Softmax = 10 layers */ +#define MODEL_SIZE 10 + +static dataset_t g_trainDataset; +static dataset_t g_valDataset; +static dataset_t g_testDataset; + +static size_t g_numClasses = NUM_CLASSES_DEFAULT; + +static size_t readNumClasses(void) { + const char *env = getenv("KWS_CLASSES"); + if (env == NULL || env[0] == '\0') { + return NUM_CLASSES_DEFAULT; + } + long v = strtol(env, NULL, 10); + if (v != 6 && v != 35) { + fprintf(stderr, "KWS_CLASSES must be 6 or 35 (got '%s'); using %d\n", env, + NUM_CLASSES_DEFAULT); + return NUM_CLASSES_DEFAULT; + } + return (size_t)v; +} + +static void reshapeItemsAddBatchDim(tensorArray_t *items) { + for (size_t i = 0; i < items->size; ++i) { + tensor_t *t = items->array[i]; + size_t oldRank = t->shape->numberOfDimensions; + size_t newRank = oldRank + 1; + + size_t *newDims = reserveMemory(newRank * sizeof(size_t)); + size_t *newOrder = reserveMemory(newRank * sizeof(size_t)); + newDims[0] = 1; + for (size_t d = 0; d < oldRank; ++d) { + newDims[d + 1] = t->shape->dimensions[d]; + } + for (size_t d = 0; d < newRank; ++d) { + newOrder[d] = d; + } + + freeReservedMemory(t->shape->dimensions); + freeReservedMemory(t->shape->orderOfDimensions); + t->shape->dimensions = newDims; + t->shape->orderOfDimensions = newOrder; + t->shape->numberOfDimensions = newRank; + } +} + +static tensorArray_t *buildOneHotLabels(tensorArray_t *intLabels) { + tensorArray_t *out = reserveMemory(sizeof(tensorArray_t)); + tensor_t **arr = reserveMemory(intLabels->size * sizeof(tensor_t *)); + out->array = arr; + out->size = intLabels->size; + + for (size_t i = 0; i < intLabels->size; ++i) { + size_t *dims = reserveMemory(1 * sizeof(size_t)); + size_t *order = reserveMemory(1 * sizeof(size_t)); + dims[0] = g_numClasses; + order[0] = 0; + shape_t *shape = reserveMemory(sizeof(shape_t)); + shape->dimensions = dims; + shape->orderOfDimensions = order; + shape->numberOfDimensions = 1; + + quantization_t *q = quantizationInitFloat(); + tensor_t *t = initTensor(shape, q, NULL); + + int32_t cls = ((int32_t *)intLabels->array[i]->data)[0]; + float *data = (float *)t->data; + for (size_t c = 0; c < g_numClasses; ++c) { + data[c] = (c == (size_t)cls) ? 1.0f : 0.0f; + } + arr[i] = t; + } + return out; +} + +static void initDataSets(const char *dataDir) { + char path[300]; + snprintf(path, sizeof(path), "%s/train_x.npy", dataDir); + tensorArray_t *trainItems = npyLoad(path); + snprintf(path, sizeof(path), "%s/train_y.npy", dataDir); + tensorArray_t *trainLabelsRaw = npyLoad(path); + reshapeItemsAddBatchDim(trainItems); + g_trainDataset.items = trainItems; + g_trainDataset.labels = buildOneHotLabels(trainLabelsRaw); + + snprintf(path, sizeof(path), "%s/val_x.npy", dataDir); + tensorArray_t *valItems = npyLoad(path); + snprintf(path, sizeof(path), "%s/val_y.npy", dataDir); + tensorArray_t *valLabelsRaw = npyLoad(path); + reshapeItemsAddBatchDim(valItems); + g_valDataset.items = valItems; + g_valDataset.labels = buildOneHotLabels(valLabelsRaw); + + snprintf(path, sizeof(path), "%s/test_x.npy", dataDir); + tensorArray_t *testItems = npyLoad(path); + snprintf(path, sizeof(path), "%s/test_y.npy", dataDir); + tensorArray_t *testLabelsRaw = npyLoad(path); + reshapeItemsAddBatchDim(testItems); + g_testDataset.items = testItems; + g_testDataset.labels = buildOneHotLabels(testLabelsRaw); +} + +static sample_t *getTrainSample(size_t id) { + return npyGetSample(&g_trainDataset, id); +} +static sample_t *getValSample(size_t id) { + return npyGetSample(&g_valDataset, id); +} +static sample_t *getTestSample(size_t id) { + return npyGetSample(&g_testDataset, id); +} +static size_t getTrainSize(void) { + return g_trainDataset.items->size; +} +static size_t getValSize(void) { + return g_valDataset.items->size; +} +static size_t getTestSize(void) { + return g_testDataset.items->size; +} + +static void buildModel(layer_t **model, layerQuant_t *lq) { + /* Input reshaped to [1, 40, 32]. */ + model[0] = conv1dLayerInit( + &(conv1dInit_t){ + .inChannels = IN_CHANNELS, .outChannels = C1_OUT, .kernelSize = C1_K, .padding = SAME}, + lq); + model[1] = reluLayerInit(lq); + model[2] = maxPool1dLayerInit( + &(maxPool1dInit_t){ + .kernelSize = 2, .stride = 2, .inputChannels = C1_OUT, .inputLength = LEN_INPUT}, + lq); + + model[3] = conv1dLayerInit( + &(conv1dInit_t){ + .inChannels = C1_OUT, .outChannels = C2_OUT, .kernelSize = C2_K, .padding = SAME}, + lq); + model[4] = reluLayerInit(lq); + model[5] = maxPool1dLayerInit( + &(maxPool1dInit_t){ + .kernelSize = 2, .stride = 2, .inputChannels = C2_OUT, .inputLength = LEN_INPUT / 2}, + lq); + + /* Rate-agnostic head: AdaptiveAvgPool1d(1) -> Flatten -> Linear -> Softmax. */ + model[6] = adaptiveAvgPool1dLayerInit(&(adaptiveAvgPool1dInit_t){.outputSize = 1}, lq); + model[7] = flattenLayerInit(); + model[8] = + linearLayerInit(&(linearInit_t){.inFeatures = C2_OUT, .outFeatures = g_numClasses}, lq); + model[9] = softmaxLayerInit(lq); +} + +/* Load PyTorch state_dict from per-layer .npy files written by + * examples/kws_mfcc/train_pytorch.py --save-weights. + * + * Returns 0 on success, non-zero on first missing file. */ +static int loadStateDictFromDir(layer_t **model, const char *weightsDir) { + char wPath[256], bPath[256]; + const char *names[3] = {"conv1", "conv2", "fc"}; + tensor_t *w[3] = {0}; + tensor_t *b[3] = {0}; + + for (int i = 0; i < 3; i++) { + snprintf(wPath, sizeof(wPath), "%s/%s.weight.npy", weightsDir, names[i]); + snprintf(bPath, sizeof(bPath), "%s/%s.bias.npy", weightsDir, names[i]); + w[i] = npyLoadFlat(wPath); + b[i] = npyLoadFlat(bPath); + if (w[i] == NULL || b[i] == NULL) { + fprintf(stderr, "loadStateDictFromDir: missing %s or %s\n", wPath, bPath); + return 1; + } + } + + modelLoadStateDict( + model, MODEL_SIZE, + (stateDictEntry_t[]){ + {.name = names[0], .weightData = (float *)w[0]->data, .biasData = (float *)b[0]->data}, + {.name = names[1], .weightData = (float *)w[1]->data, .biasData = (float *)b[1]->data}, + {.name = names[2], .weightData = (float *)w[2]->data, .biasData = (float *)b[2]->data}, + }, + 3); + + for (int i = 0; i < 3; i++) { + freeTensor(w[i]); + freeTensor(b[i]); + } + return 0; +} + +static FILE *g_log_file = NULL; +static int g_first_epoch = 1; +static struct timespec g_epoch_t0; + +static void epochCallback(size_t epoch, float trainLoss, epochStats_t evalStats) { + struct timespec t1; + clock_gettime(CLOCK_MONOTONIC, &t1); + double wall_s = + (double)(t1.tv_sec - g_epoch_t0.tv_sec) + (double)(t1.tv_nsec - g_epoch_t0.tv_nsec) * 1e-9; + + if (!g_first_epoch) { + fprintf(g_log_file, ",\n"); + } + fprintf(g_log_file, + " {\"epoch\": %zu, \"step_losses\": [], \"train_loss\": %.6f, " + "\"val_loss\": %.6f, \"val_acc\": %.6f, \"wall_s\": %.4f}", + epoch, (double)trainLoss, (double)evalStats.loss, (double)evalStats.accuracy, wall_s); + fflush(g_log_file); + g_first_epoch = 0; + + fprintf(stdout, "epoch %zu: train_loss=%.4f val_loss=%.4f val_acc=%.4f wall_s=%.2f\n", epoch, + (double)trainLoss, (double)evalStats.loss, (double)evalStats.accuracy, wall_s); + fflush(stdout); + + clock_gettime(CLOCK_MONOTONIC, &g_epoch_t0); +} + +static int ensureDir(const char *p) { + if (mkdir(p, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) == 0) { + return 0; + } + if (errno == EEXIST) { + return 0; + } + fprintf(stderr, "ERROR: cannot create %s: %s\n", p, strerror(errno)); + return 1; +} + +int main(void) { + g_numClasses = readNumClasses(); + + char dataDir[256], weightsDir[256], logsDir[256], outputsDir[256]; + snprintf(dataDir, sizeof(dataDir), "examples/kws_mfcc/data/%zuclass", g_numClasses); + snprintf(weightsDir, sizeof(weightsDir), "examples/kws_mfcc/weights/%zuclass", g_numClasses); + snprintf(logsDir, sizeof(logsDir), "examples/kws_mfcc/logs/%zuclass", g_numClasses); + snprintf(outputsDir, sizeof(outputsDir), "examples/kws_mfcc/outputs/%zuclass", g_numClasses); + + if (ensureDir("examples/kws_mfcc/logs") != 0 || ensureDir(logsDir) != 0) { + return 1; + } + if (ensureDir("examples/kws_mfcc/outputs") != 0 || ensureDir(outputsDir) != 0) { + return 1; + } + + initDataSets(dataDir); + + dataLoader_t *testLoader = dataLoaderInit(getTestSample, getTestSize, 1, NULL, NULL, + /*shuffle*/ false, /*shuffleSeed*/ 0, + /*dropLast*/ true); + + layerQuant_t lq; + layerQuantInitUniform(&lq, quantizationInitFloat()); + + layer_t *model[MODEL_SIZE]; + buildModel(model, &lq); + + const char *bitParity = getenv("BIT_PARITY"); + if (bitParity != NULL && bitParity[0] != '\0') { + /* Bit-parity mode: load PyTorch state_dict, skip training, run inference. */ + if (loadStateDictFromDir(model, weightsDir) != 0) { + fprintf(stderr, "BIT_PARITY: state_dict load failed\n"); + return 1; + } + fprintf(stdout, "BIT_PARITY: loaded state_dict from %s\n", weightsDir); + } else { + dataLoader_t *trainLoader = dataLoaderInit(getTrainSample, getTrainSize, BATCH, NULL, NULL, + /*shuffle*/ true, /*shuffleSeed*/ SHUFFLE_SEED, + /*dropLast*/ true); + dataLoader_t *valLoader = dataLoaderInit(getValSample, getValSize, 1, NULL, NULL, + /*shuffle*/ false, /*shuffleSeed*/ 0, + /*dropLast*/ true); + + optimizer_t *sgd = + sgdMCreateOptim(LR, MOMENTUM, /*weightDecay*/ 0.0f, model, MODEL_SIZE, FLOAT32); + + char logPath[300]; + snprintf(logPath, sizeof(logPath), "%s/c.json", logsDir); + g_log_file = fopen(logPath, "w"); + if (!g_log_file) { + fprintf(stderr, "ERROR: cannot open log file for writing\n"); + return 1; + } + fprintf(g_log_file, + "{\n" + " \"impl\": \"c\",\n" + " \"example\": \"kws_mfcc\",\n" + " \"config\": {\"epochs\": %d, \"batch\": %d, \"lr\": %.6f, " + "\"momentum\": %.6f, \"seed\": %d, \"shuffle_seed\": %d},\n" + " \"epochs\": [\n", + EPOCHS, BATCH, (double)LR, (double)MOMENTUM, SEED, SHUFFLE_SEED); + fflush(g_log_file); + + clock_gettime(CLOCK_MONOTONIC, &g_epoch_t0); + + trainingRunResult_t result = + trainingRun(model, MODEL_SIZE, + (lossConfig_t){.funcType = CROSS_ENTROPY, + .backwardReduction = REDUCTION_MEAN, + .classWeights = NULL}, + trainLoader, valLoader, sgd, EPOCHS, calculateGradsSequential, + inferenceWithLoss, epochCallback); + (void)result; + + epochStats_t testStats = evaluationEpochWithMetrics( + model, MODEL_SIZE, CROSS_ENTROPY, testLoader, inferenceWithLoss, REDUCTION_MEAN); + + fprintf(g_log_file, + "\n ],\n" + " \"final\": {\"test_loss\": %.6f, \"test_acc\": %.6f, " + "\"test_auc\": null}\n" + "}\n", + (double)testStats.loss, (double)testStats.accuracy); + fclose(g_log_file); + + fprintf(stdout, "FINAL test_loss=%.4f test_acc=%.4f\n", (double)testStats.loss, + (double)testStats.accuracy); + } + + /* Predictions on test set (both modes). */ + size_t numTest = getTestSize(); + int32_t *predictions = malloc(numTest * sizeof(int32_t)); + if (!predictions) { + fprintf(stderr, "OOM allocating predictions\n"); + return 1; + } + + for (size_t i = 0; i < numTest; ++i) { + sample_t *s = getTestSample(i); + tensor_t *out = inference(model, MODEL_SIZE, s->item); + float *probs = (float *)out->data; + size_t argmax = 0; + float best = probs[0]; + for (size_t c = 1; c < g_numClasses; ++c) { + if (probs[c] > best) { + best = probs[c]; + argmax = c; + } + } + predictions[i] = (int32_t)argmax; + freeTensor(out); + freeSample(s); + } + + char predPath[300]; + snprintf(predPath, sizeof(predPath), "%s/c_predictions.npy", outputsDir); + size_t outShape[] = {numTest}; + int status = 0; + int rc = npyWriteInt32(predPath, predictions, outShape, 1); + if (rc != 0) { + fprintf(stderr, "ERROR: npyWriteInt32 failed (rc=%d)\n", rc); + status = 1; + } + free(predictions); + + return status; +} diff --git a/examples/kws_mfcc/train_pytorch.py b/examples/kws_mfcc/train_pytorch.py new file mode 100644 index 0000000..cfc016c --- /dev/null +++ b/examples/kws_mfcc/train_pytorch.py @@ -0,0 +1,165 @@ +"""PyTorch reference implementation of the kws_mfcc 1D-CNN classifier. + +Input: MFCC [40,32] from prepare_data.py. Output: logs/class/pytorch.json + +outputs/class/pytorch_predictions.npy + weights/class/{conv1,conv2,fc}.{weight,bias}.npy +for the C-side BIT_PARITY mode. num_classes from KWS_CLASSES (default 6). +""" +from __future__ import annotations + +import os +import sys +import time +from pathlib import Path + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +REPO_ROOT = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(REPO_ROOT)) +from examples._shared.log_schema import RunLog, dump_log # noqa: E402 +from examples._shared.seeds import SEED, SHUFFLE_SEED # noqa: E402 +from examples._shared.xorshift32 import shuffle_indices # noqa: E402 + +HERE = Path(__file__).resolve().parent +NUM_CLASSES = int(os.environ.get("KWS_CLASSES", "6")) +assert NUM_CLASSES in (6, 35), NUM_CLASSES +TAG = f"{NUM_CLASSES}class" +DATA = HERE / "data" / TAG +LOGS = HERE / "logs" / TAG +OUTPUTS = HERE / "outputs" / TAG +WEIGHTS = HERE / "weights" / TAG + +EPOCHS = 15 +BATCH = 32 +LR = 0.001 +MOMENTUM = 0.9 + + +class KwsDataset(torch.utils.data.Dataset): + def __init__(self, x: np.ndarray, y: np.ndarray) -> None: + self.x = torch.from_numpy(x.astype(np.float32)) + self.y = torch.from_numpy(y.astype(np.int64)) + + def __len__(self) -> int: + return self.x.shape[0] + + def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]: + return self.x[idx], self.y[idx] + + +class XorShift32Sampler(torch.utils.data.Sampler[int]): + """Single-shot shuffle, no per-epoch reshuffle, matching framework DataLoader.c.""" + def __init__(self, n: int, seed: int) -> None: + self.indices = shuffle_indices(n, seed) + + def __iter__(self): + return iter(self.indices) + + def __len__(self) -> int: + return len(self.indices) + + +class KwsMfccCnn(nn.Module): + def __init__(self, num_classes: int) -> None: + super().__init__() + self.conv1 = nn.Conv1d(40, 32, kernel_size=3, padding=1) # SAME (K odd, stride 1) + self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1) + self.fc = nn.Linear(64, num_classes) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = F.relu(self.conv1(x)) # [B,32,32] + x = F.max_pool1d(x, 2) # [B,32,16] + x = F.relu(self.conv2(x)) # [B,64,16] + x = F.max_pool1d(x, 2) # [B,64,8] + x = F.adaptive_avg_pool1d(x, 1) # [B,64,1] + x = x.flatten(start_dim=1) # [B,64] + return self.fc(x) + + +def evaluate(model: nn.Module, x: np.ndarray, y: np.ndarray, batch: int) -> tuple[float, float]: + model.eval() + total_loss, total_correct, total = 0.0, 0, 0 + with torch.no_grad(): + for i in range(0, len(x), batch): + xb = torch.from_numpy(x[i : i + batch].astype(np.float32)) + yb = torch.from_numpy(y[i : i + batch].astype(np.int64)) + logits = model(xb) + loss = F.cross_entropy(logits, yb, reduction="sum") + total_loss += loss.item() + total_correct += (logits.argmax(dim=1) == yb).sum().item() + total += yb.shape[0] + return total_loss / total, total_correct / total + + +def main() -> None: + torch.manual_seed(SEED) + np.random.seed(SEED) + torch.use_deterministic_algorithms(True, warn_only=True) + + train_x = np.load(DATA / "train_x.npy") + train_y = np.load(DATA / "train_y.npy") + val_x = np.load(DATA / "val_x.npy") + val_y = np.load(DATA / "val_y.npy") + test_x = np.load(DATA / "test_x.npy") + test_y = np.load(DATA / "test_y.npy") + + train_ds = KwsDataset(train_x, train_y) + sampler = XorShift32Sampler(len(train_ds), SHUFFLE_SEED) + loader = torch.utils.data.DataLoader(train_ds, batch_size=BATCH, sampler=sampler, drop_last=True) + + model = KwsMfccCnn(NUM_CLASSES) + optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM) + + epoch_records = [] + for epoch in range(EPOCHS): + t0 = time.time() + model.train() + step_losses: list[float] = [] + for xb, yb in loader: + optimizer.zero_grad() + loss = F.cross_entropy(model(xb), yb) + loss.backward() + optimizer.step() + step_losses.append(loss.item()) + train_loss = float(np.mean(step_losses)) if step_losses else 0.0 + val_loss, val_acc = evaluate(model, val_x, val_y, BATCH) + epoch_records.append({ + "epoch": epoch, "step_losses": step_losses, "train_loss": train_loss, + "val_loss": val_loss, "val_acc": val_acc, "wall_s": time.time() - t0, + }) + print(f"epoch {epoch:2d}: train_loss={train_loss:.4f} val_loss={val_loss:.4f} val_acc={val_acc:.4f}", flush=True) + + test_loss, test_acc = evaluate(model, test_x, test_y, BATCH) + log: RunLog = { + "impl": "pytorch", "example": "kws_mfcc", + "config": {"epochs": EPOCHS, "batch": BATCH, "lr": LR, "momentum": MOMENTUM, + "seed": SEED, "shuffle_seed": SHUFFLE_SEED}, + "epochs": epoch_records, # type: ignore[typeddict-item] + "final": {"test_loss": test_loss, "test_acc": test_acc, "test_auc": None}, + } + LOGS.mkdir(parents=True, exist_ok=True) + OUTPUTS.mkdir(parents=True, exist_ok=True) + dump_log(LOGS / "pytorch.json", log) + + model.eval() + with torch.no_grad(): + preds = model(torch.from_numpy(test_x.astype(np.float32))).argmax(dim=1).numpy().astype(np.int32) + np.save(OUTPUTS / "pytorch_predictions.npy", preds) + print(f"FINAL test_loss={test_loss:.4f} test_acc={test_acc:.4f}", flush=True) + + WEIGHTS.mkdir(parents=True, exist_ok=True) + layer_map = {"conv1": model.conv1, "conv2": model.conv2, "fc": model.fc} + print("Saving per-layer weights:", flush=True) + for name, layer in layer_map.items(): + w = layer.weight.detach().cpu().numpy().astype(np.float32) + np.save(WEIGHTS / f"{name}.weight.npy", w) + if layer.bias is not None: + b = layer.bias.detach().cpu().numpy().astype(np.float32) + np.save(WEIGHTS / f"{name}.bias.npy", b) + print(f" wrote {name}.weight.npy shape={w.shape}", flush=True) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index f0602cb..69ae28f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ dependencies = [ "elasticai-creator @ git+https://github.com/es-ude/elastic-ai.creator.git@training-implementation-provider", "matplotlib>=3.10.9", "torch>=2.11.0", + "torchaudio>=2.11.0", "torchvision>=0.26.0", ] diff --git a/uv.lock b/uv.lock index b1e2ff6..31444ed 100644 --- a/uv.lock +++ b/uv.lock @@ -731,6 +731,7 @@ dependencies = [ { name = "elasticai-creator" }, { name = "matplotlib" }, { name = "torch" }, + { name = "torchaudio" }, { name = "torchvision" }, ] @@ -744,6 +745,7 @@ requires-dist = [ { name = "elasticai-creator", git = "https://github.com/es-ude/elastic-ai.creator.git?rev=training-implementation-provider" }, { name = "matplotlib", specifier = ">=3.10.9" }, { name = "torch", specifier = ">=2.11.0" }, + { name = "torchaudio", specifier = ">=2.11.0" }, { name = "torchvision", specifier = ">=0.26.0" }, ] @@ -965,6 +967,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cf/bf/c8d12a2c86dbfd7f40fb2f56fbf5a505ccf2d9ce131eb559dfc7c51e1a04/torch-2.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b2a43985ff5ef6ddd923bbcf99943e5f58059805787c5c9a2622bf05ca2965b0", size = 114792991, upload-time = "2026-03-23T18:08:19.216Z" }, ] +[[package]] +name = "torchaudio" +version = "2.11.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/b1/77658817acacd01a72b714440c62f419efc4d90170e704e8e7a2c0918988/torchaudio-2.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a1cf1acc883bee9cb906a933572fed6a8a933f86ef34e9ea7d803f72317e8c1b", size = 684226, upload-time = "2026-03-23T18:13:40.023Z" }, + { url = "https://files.pythonhosted.org/packages/78/28/c7adc053039f286c2aca0038b766cbe3294e66fec6b29a820e95128f9ede/torchaudio-2.11.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:bc653defca1c16154398517a1adc98d0fb7f1dd08e58ced217558d213c2c6e29", size = 1626670, upload-time = "2026-03-23T18:13:42.162Z" }, + { url = "https://files.pythonhosted.org/packages/88/d8/d6d0f896e064aa67377484efef4911cdcc07bce2929474e1417cc0af18c2/torchaudio-2.11.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6503c0bdb29daf2e6281bb70ea2dfe2c3553b782b619eb5d73bdadd8a3f7cecf", size = 1771992, upload-time = "2026-03-23T18:13:33.188Z" }, + { url = "https://files.pythonhosted.org/packages/23/a8/941277ecc39f7a0a169d554302a1f1afd87c1d94a8aec828891916cea59a/torchaudio-2.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:478110f981e5d40a8d82221732c57a56c85a1d5895fb8fe646e86ee15eded3bd", size = 328663, upload-time = "2026-03-23T18:13:19.218Z" }, + { url = "https://files.pythonhosted.org/packages/fb/9e/f76fcd9877c8c78f258ee34e0fb8291fdb91e6218d582d9ca66b1e4bd4ae/torchaudio-2.11.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e3f9696a9ef1d49acc452159b052370c636406d072e9d8f10895fda87b591ea9", size = 679904, upload-time = "2026-03-23T18:13:28.329Z" }, + { url = "https://files.pythonhosted.org/packages/85/70/249c1498ebdad3e7752866635ec0855fc0dcf898beccda5a9d2b9df8e4d0/torchaudio-2.11.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:b034d7672f1c415434f48ef17807f2cce47f29e8795338c751d4e596c9fbe8b5", size = 1618523, upload-time = "2026-03-23T18:13:15.703Z" }, + { url = "https://files.pythonhosted.org/packages/4f/98/be13fe35d9aa5c26381c0e453c828a789d15c007f8f7d08c95341d19974d/torchaudio-2.11.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1c1101c1243ef0e4063ec63298977e2d3655c15cf88d9eb0a1bd4fe2db9f47ea", size = 1771992, upload-time = "2026-03-23T18:13:35.343Z" }, + { url = "https://files.pythonhosted.org/packages/e2/8b/2bbb3dca6ff28cba0de250874d5ef4fc2822c47a934b59b3974cff3219ef/torchaudio-2.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:986f4df5ed17b003dc52489468601720090e65f964f8bebccf90eb45bba75744", size = 328662, upload-time = "2026-03-23T18:13:18.308Z" }, + { url = "https://files.pythonhosted.org/packages/fe/ce/52c652d30af7d6e96c8f1735d26131e94708e3f38d852b8fa97958804dd8/torchaudio-2.11.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:bda09ea630ae7207384fb0f28c35e4f8c0d82dd6eba020b6b335ad0caa9fed49", size = 680814, upload-time = "2026-03-23T18:13:17.08Z" }, + { url = "https://files.pythonhosted.org/packages/06/95/1ad1507482e7263e556709a3f5f87fecd375a0742cdaf238806c8e72eaad/torchaudio-2.11.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:9fe3083c62e035646483a14e180d33561bdc2eed436c9ab1259c137fb7120b4a", size = 1618546, upload-time = "2026-03-23T18:13:29.686Z" }, + { url = "https://files.pythonhosted.org/packages/98/4c/480328ba07487eb9890406720304d0d460dd7a6a64098614f5aa53b662ca/torchaudio-2.11.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:13cff988697ccbad539987599f9dc672f40c417bed67570b365e4e5002bbd096", size = 1771991, upload-time = "2026-03-23T18:13:30.843Z" }, + { url = "https://files.pythonhosted.org/packages/3e/98/5d4790e2d6548768999acd34999d5aeefce8bcc23a07afaa5f03e723f557/torchaudio-2.11.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ed404c4399ad7f172c86a47c1b25293d322d1d58e26b10b0456a86cf67d37d84", size = 328661, upload-time = "2026-03-23T18:13:34.359Z" }, + { url = "https://files.pythonhosted.org/packages/39/fe/ffa618b4f0d9732d7df7a2fa2bd48657d896599bc224e5af3c70d46c546b/torchaudio-2.11.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:cc09cd1f6015b8549e7fe255fb1be5346b57e7fee06541d3f3dbb012d8c4715f", size = 679901, upload-time = "2026-03-23T18:13:25.472Z" }, + { url = "https://files.pythonhosted.org/packages/5c/54/f414d7b92dd0b3094a2409c95a97bd6c49aa0620da722a0e55462f9bd9cb/torchaudio-2.11.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:79fb3cb99169fd41bd9719647261402a164da0d105a4d81f42a3260844ec5e79", size = 1618527, upload-time = "2026-03-23T18:13:26.68Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a8/bf2e1f6ce24c990192400ae49b4acc1a0d0295b6c6a06bceecdc46ce08de/torchaudio-2.11.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:00e9f71ab9c656f0abdb40c515bd65d4658ab0ad380dee27a2efd7d51dabd3d6", size = 1771995, upload-time = "2026-03-23T18:13:23.373Z" }, + { url = "https://files.pythonhosted.org/packages/83/6f/b0efb44e0bfe8dd4d78d76ae3be280354e1fb5c8631c782785d74cd8a7b1/torchaudio-2.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:1424638adb8bb40087bc7b6eb103e8e4fe398210f09076f33b7b5e61501b5d66", size = 328662, upload-time = "2026-03-23T18:13:32.243Z" }, + { url = "https://files.pythonhosted.org/packages/60/84/1c792b0b700eac9a96772cfd9f96c097b17bca3234a2fde3c64b8063660d/torchaudio-2.11.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:da2725e250866da42a12934c9a6552f65a18b7187fd7a6221387f0e605fb3b96", size = 679926, upload-time = "2026-03-23T18:13:24.452Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a0/62a5842062f739239691f2e57523e0570dd06704ad987755f7644a3afa23/torchaudio-2.11.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:1be3767064364ae82705bdf2b15c1e8b41fea82c4cd04d47428a8684b634b6ed", size = 1618552, upload-time = "2026-03-23T18:13:21.09Z" }, + { url = "https://files.pythonhosted.org/packages/6d/89/c293d818f9f899db93bf291b42401c05ae29acfb2e53d5341c30ea703e62/torchaudio-2.11.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:67f6edac29ed004652c11db5c19d9debb5d835695930574f564efc8bdd061bba", size = 1771986, upload-time = "2026-03-23T18:13:22.153Z" }, + { url = "https://files.pythonhosted.org/packages/93/f7/ee5da8c03f1a3c7662c6c6a119f24a4b3e646da94be56dce3201e3a6ee9b/torchaudio-2.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:88fb5e29f670a33d9bac6aabb1d2734460cf6e461bde5cdc352826035851b16d", size = 328661, upload-time = "2026-03-23T18:13:20.1Z" }, +] + [[package]] name = "torchvision" version = "0.26.0"