Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 124 additions & 80 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,114 @@
# CUDA packages for whisper.cpp GPU acceleration
cudaPackages = pkgs.cudaPackages_12;

in {
devShells.default = pkgs.mkShell {
# Platform-specific library paths (Linux)
# CUDA-enabled sherpa-onnx prebuilt (k2-fsa release) for GPU Parakeet.
# The `parakeet-cuda` cargo feature links sherpa-onnx as `shared`, and
# SHERPA_ONNX_LIB_DIR points it here instead of downloading the CPU build.
# This archive ships libsherpa-onnx-c-api.so + libonnxruntime.so with the
# CUDA execution provider (libonnxruntime_providers_cuda.so). cuDNN/cudart
# are supplied via LD_LIBRARY_PATH in the `cuda` dev shell below.
sherpaOnnxCuda = pkgs.stdenvNoCC.mkDerivation {
pname = "sherpa-onnx-cuda";
version = "1.13.2";
src = pkgs.fetchurl {
url = "https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.13.2/sherpa-onnx-v1.13.2-cuda-12.x-cudnn-9.x-linux-x64-gpu.tar.bz2";
hash = "sha256-vRE8k6GLoPm24MrEaramoXYGvde3cbbq7gy9b5bOY/4=";
};
dontConfigure = true;
dontBuild = true;
installPhase = "mkdir -p $out && cp -r lib $out/lib";
};

# Dev-shell packages (shared by both shells).
commonPackages = with pkgs; [
# Rust / Tauri
rustToolchain
cargo
rustc
rust-analyzer

# Tauri dependencies (platform-specific)
openssl
pkg-config
] ++ pkgs.lib.optionals pkgs.stdenv.isLinux [
# Linux-only Tauri dependencies
webkitgtk_4_1
libappindicator-gtk3
librsvg
alsa-lib
# whisper.cpp needs libclang for bindgen
llvmPackages.libclang
# X11 development libraries for x11rb (mouse tracking, display detection)
libx11
libxcursor
libxrandr
libxi
# Vulkan for whisper.cpp GPU acceleration (AMD & NVIDIA)
vulkan-loader
vulkan-headers
vulkan-tools
# Shader compiler for Vulkan
shaderc
# CUDA toolkit for whisper.cpp CUDA acceleration (NVIDIA GPUs)
cudaPackages.cudatoolkit
cudaPackages.cuda_nvcc
cudaPackages.cuda_cudart
cudaPackages.cuda_cccl
cudaPackages.libcublas
# GCC for CUDA compilation
gcc
] ++ pkgs.lib.optionals pkgs.stdenv.isDarwin [
# macOS: applesoft libraries (via Xcode) are used automatically
libiconv
# libclang for bindgen (whisper.cpp)
llvmPackages.libclang
] ++ [
# Frontend
nodejs_22
pnpm

# Build tools
cmake
] ++ pkgs.lib.optionals pkgs.stdenv.isLinux [
glib
libsecret
# Native Wayland keyboard simulation (alternative to X11-based enigo)
wtype
];

# whisper-rs-sys runs bindgen over ggml-vulkan.h. bindgen invokes libclang
# directly, bypassing the nix cc-wrapper, so it cannot find the libc headers
# (stdio.h) or clang's own builtin headers (stddef.h). bindgen then errors and
# whisper-rs-sys SILENTLY falls back to its bundled no-Vulkan bindings, so the
# ggml_backend_vk_* symbols go missing and whisper-rs fails to compile its
# Vulkan module (issue #64). Feed bindgen the cc-wrapper's libc flags plus
# clang's resource dir. A standard apt system finds these in /usr/include and
# lib/clang, so CI does not need this.
bindgenHook = pkgs.lib.optionalString pkgs.stdenv.isLinux ''

export BINDGEN_EXTRA_CLANG_ARGS="$(< ${pkgs.stdenv.cc}/nix-support/libc-cflags) -idirafter ${pkgs.llvmPackages.libclang.lib}/lib/clang/${pkgs.lib.versions.major pkgs.llvmPackages.libclang.version}/include"
'';

# One dev-shell definition, optionally wired for GPU Parakeet (CUDA).
mkThothShell = { gpuParakeet ? false }: pkgs.mkShell ({
# Platform-specific library paths (Linux). With gpuParakeet, also expose
# the CUDA sherpa-onnx libs + cuDNN so the CUDA execution provider loads.
LD_LIBRARY_PATH = pkgs.lib.optionalString pkgs.stdenv.isLinux
(pkgs.lib.makeLibraryPath ([
pkgs.libappindicator-gtk3
pkgs.vulkan-loader
] ++ pkgs.lib.optionals pkgs.stdenv.isLinux [
# CUDA runtime libraries for whisper.cpp linking
cudaPackages.cuda_cudart
cudaPackages.cuda_cccl
cudaPackages.libcublas
] ++ pkgs.lib.optionals gpuParakeet [
sherpaOnnxCuda # libsherpa-onnx-c-api.so + onnxruntime CUDA EP
cudaPackages.cudnn # libcudnn.so.9
# The onnxruntime CUDA execution provider dlopen()s the full CUDA
# math-library set; a single missing one makes it abort (no CPU
# fallback), so provide all of them.
cudaPackages.libcurand # libcurand.so.10
cudaPackages.libcufft # libcufft.so.11
cudaPackages.libcusparse # libcusparse.so.12
]) + ":/run/opengl-driver/lib"); # NVIDIA driver (libcuda.so)

# Workaround for webkit2gtk Wayland issues (Linux only)
Expand All @@ -57,71 +153,24 @@
# Linker search path for CUDA driver (libcuda.so)
RUSTFLAGS = pkgs.lib.optionalString pkgs.stdenv.isLinux "-L /run/opengl-driver/lib";

packages = with pkgs; [
# Rust / Tauri
rustToolchain
cargo
rustc
rust-analyzer

# Tauri dependencies (platform-specific)
openssl
pkg-config
] ++ lib.optionals stdenv.isLinux [
# Linux-only Tauri dependencies
webkitgtk_4_1
libappindicator-gtk3
librsvg
alsa-lib
# whisper.cpp needs libclang for bindgen
llvmPackages.libclang
# X11 development libraries for x11rb (mouse tracking, display detection)
libx11
libxcursor
libxrandr
libxi
# Vulkan for whisper.cpp GPU acceleration (AMD & NVIDIA)
vulkan-loader
vulkan-headers
vulkan-tools
# Shader compiler for Vulkan
shaderc
# CUDA toolkit for whisper.cpp CUDA acceleration (NVIDIA GPUs)
cudaPackages.cudatoolkit
cudaPackages.cuda_nvcc
cudaPackages.cuda_cudart
cudaPackages.cuda_cccl
cudaPackages.libcublas
# GCC for CUDA compilation
gcc
] ++ lib.optionals stdenv.isDarwin [
# macOS: applesoft libraries (via Xcode) are used automatically
libiconv
# libclang for bindgen (whisper.cpp)
llvmPackages.libclang
] ++ [
# Frontend
nodejs_22
pnpm

# Build tools
cmake

# Useful utilities (Linux-only)
] ++ lib.optionals stdenv.isLinux [
glib
libsecret
# Native Wayland keyboard simulation (alternative to X11-based enigo)
wtype
];
packages = commonPackages
++ pkgs.lib.optionals (gpuParakeet && pkgs.stdenv.isLinux) [ cudaPackages.cudnn ];

shellHook = ''
echo "𓅝 Thoth Development Environment"
echo "𓅝 Thoth Development Environment${pkgs.lib.optionalString gpuParakeet " (GPU Parakeet / CUDA)"}"
echo "================================"
echo " Rust: $(rustc --version)"
echo " Node: $(node --version)"
echo " pnpm: $(pnpm --version)"
echo ""
'' + (if gpuParakeet then ''
echo "GPU Parakeet (NVIDIA CUDA) is wired up. Build/run with:"
echo " pnpm tauri dev --no-default-features --features parakeet-cuda,vulkan"
echo " pnpm tauri build --no-default-features --features parakeet-cuda,vulkan"
echo ""
echo "Then transcribe and watch 'nvidia-smi' to confirm the GPU engages."
echo "Logs show 'Attempting CUDA provider...' / 'CUDA provider initialised'."
'' else ''
echo "Commands:"
echo " pnpm install - Install dependencies"
echo " pnpm tauri dev - Start development build"
Expand All @@ -130,22 +179,17 @@
echo " cargo test - Run Rust tests (from src-tauri/)"
echo ""
echo "GPU Acceleration (Linux):"
echo " --features cuda - NVIDIA GPUs (requires CUDA drivers)"
echo " --features hipblas - AMD GPUs (requires ROCm)"
echo " --features vulkan - Cross-platform (experimental)"
'' + pkgs.lib.optionalString pkgs.stdenv.isLinux ''

# whisper-rs-sys runs bindgen over ggml-vulkan.h. bindgen invokes
# libclang directly, bypassing the nix cc-wrapper, so it cannot find
# the libc headers (stdio.h) or clang's own builtin headers
# (stddef.h). bindgen then errors and whisper-rs-sys SILENTLY falls
# back to its bundled no-Vulkan bindings, so the ggml_backend_vk_*
# symbols go missing and whisper-rs fails to compile its Vulkan
# module (issue #64). Feed bindgen the cc-wrapper's libc flags plus
# clang's resource dir. A standard apt system finds these in
# /usr/include and lib/clang, so CI does not need this.
export BINDGEN_EXTRA_CLANG_ARGS="$(< ${pkgs.stdenv.cc}/nix-support/libc-cflags) -idirafter ${pkgs.llvmPackages.libclang.lib}/lib/clang/${pkgs.lib.versions.major pkgs.llvmPackages.libclang.version}/include"
'';
};
echo " --features cuda - NVIDIA GPUs (Whisper)"
echo " --features vulkan - Cross-platform (Whisper)"
echo " nix develop .#cuda - GPU Parakeet (NVIDIA, via sherpa-onnx CUDA)"
'') + bindgenHook;
} // pkgs.lib.optionalAttrs gpuParakeet {
# Make sherpa-onnx-sys link the CUDA libs instead of downloading CPU ones.
SHERPA_ONNX_LIB_DIR = "${sherpaOnnxCuda}/lib";
});

in {
devShells.default = mkThothShell { };
devShells.cuda = mkThothShell { gpuParakeet = true; };
});
}
15 changes: 12 additions & 3 deletions src-tauri/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,12 @@ webrtc-vad = "0.4"
symphonia = { version = "0.6", features = ["mp3", "aac", "ogg", "vorbis", "flac", "wav", "pcm", "isomp4"] }

# Transcription
# Official k2-fsa crate; static + auto-download by default; macOS Parakeet TDT fallback
# (Linux CI builds without the parakeet feature).
sherpa-onnx = { version = "1.13", optional = true }
# Official k2-fsa crate. default-features = false drops its built-in `static`
# marker; sherpa-onnx-sys still defaults to static linking when no link feature
# is set (= the `parakeet` feature → CPU, unchanged). The `parakeet-cuda` feature
# switches it to `shared` so a GPU build can link a CUDA-enabled sherpa-onnx via
# the SHERPA_ONNX_LIB_DIR env var. (Linux CI builds without the parakeet feature.)
sherpa-onnx = { version = "1.13", default-features = false, optional = true }
# fluidaudio-rs: Apple Neural Engine backend via CoreML (macOS-only, Apple Silicon).
# WHY fork, not crates.io: the upstream crate lacks platform compile-guards (fails to
# build on Linux/Windows); the fork wraps the implementation in
Expand Down Expand Up @@ -127,6 +130,12 @@ objc2-core-foundation = { version = "0.3", features = ["CFString"] }
[features]
default = ["parakeet", "fluidaudio"]
parakeet = ["dep:sherpa-onnx"]
# GPU Parakeet (NVIDIA): links a CUDA-enabled sherpa-onnx (shared). Build with
# cargo build --no-default-features --features "parakeet-cuda,vulkan"
# and point SHERPA_ONNX_LIB_DIR at the k2-fsa CUDA prebuilt's lib dir. At runtime
# the recognizer requests the `cuda` provider and falls back to CPU if the EP
# isn't available. See flake.nix's `cuda` dev shell which wires this up.
parakeet-cuda = ["parakeet", "sherpa-onnx/shared"]
fluidaudio = ["dep:fluidaudio-rs"]
# GPU acceleration for whisper.cpp (mutually exclusive, only use one)
# Use: cargo build --features cuda for NVIDIA GPUs
Expand Down
22 changes: 21 additions & 1 deletion src-tauri/src/transcription/parakeet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,27 @@ impl TranscriptionService {
}
};

#[cfg(not(target_os = "macos"))]
// Linux/Windows: with the `parakeet-cuda` feature, try the CUDA execution
// provider (NVIDIA GPU) first and fall back to CPU. Without it, CPU only.
// Note: onnxruntime silently falls back to CPU if the CUDA EP/libs aren't
// present, so confirm real GPU use via `nvidia-smi` during a transcription.
#[cfg(all(not(target_os = "macos"), feature = "parakeet-cuda"))]
let recognizer = {
tracing::info!("Attempting CUDA provider for GPU acceleration");
match OfflineRecognizer::create(&build_config(Some("cuda".into()))) {
Some(r) => {
tracing::info!("CUDA provider initialised (verify GPU use with nvidia-smi)");
r
}
None => {
tracing::warn!("CUDA provider failed, falling back to CPU");
OfflineRecognizer::create(&build_config(Some("cpu".into())))
.ok_or_else(|| anyhow!("Failed to create Parakeet recognizer with CPU"))?
}
}
};

#[cfg(all(not(target_os = "macos"), not(feature = "parakeet-cuda")))]
let recognizer = {
tracing::info!("Using CPU provider");
OfflineRecognizer::create(&build_config(Some("cpu".into())))
Expand Down
Loading