diff --git a/flake.nix b/flake.nix index e52e911..7c3c9c4 100644 --- a/flake.nix +++ b/flake.nix @@ -29,18 +29,114 @@ # CUDA packages for whisper.cpp GPU acceleration cudaPackages = pkgs.cudaPackages_12; - in { - devShells.default = pkgs.mkShell { - # Platform-specific library paths (Linux) + # CUDA-enabled sherpa-onnx prebuilt (k2-fsa release) for GPU Parakeet. + # The `parakeet-cuda` cargo feature links sherpa-onnx as `shared`, and + # SHERPA_ONNX_LIB_DIR points it here instead of downloading the CPU build. + # This archive ships libsherpa-onnx-c-api.so + libonnxruntime.so with the + # CUDA execution provider (libonnxruntime_providers_cuda.so). cuDNN/cudart + # are supplied via LD_LIBRARY_PATH in the `cuda` dev shell below. + sherpaOnnxCuda = pkgs.stdenvNoCC.mkDerivation { + pname = "sherpa-onnx-cuda"; + version = "1.13.2"; + src = pkgs.fetchurl { + url = "https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.13.2/sherpa-onnx-v1.13.2-cuda-12.x-cudnn-9.x-linux-x64-gpu.tar.bz2"; + hash = "sha256-vRE8k6GLoPm24MrEaramoXYGvde3cbbq7gy9b5bOY/4="; + }; + dontConfigure = true; + dontBuild = true; + installPhase = "mkdir -p $out && cp -r lib $out/lib"; + }; + + # Dev-shell packages (shared by both shells). + commonPackages = with pkgs; [ + # Rust / Tauri + rustToolchain + cargo + rustc + rust-analyzer + + # Tauri dependencies (platform-specific) + openssl + pkg-config + ] ++ pkgs.lib.optionals pkgs.stdenv.isLinux [ + # Linux-only Tauri dependencies + webkitgtk_4_1 + libappindicator-gtk3 + librsvg + alsa-lib + # whisper.cpp needs libclang for bindgen + llvmPackages.libclang + # X11 development libraries for x11rb (mouse tracking, display detection) + libx11 + libxcursor + libxrandr + libxi + # Vulkan for whisper.cpp GPU acceleration (AMD & NVIDIA) + vulkan-loader + vulkan-headers + vulkan-tools + # Shader compiler for Vulkan + shaderc + # CUDA toolkit for whisper.cpp CUDA acceleration (NVIDIA GPUs) + cudaPackages.cudatoolkit + cudaPackages.cuda_nvcc + cudaPackages.cuda_cudart + cudaPackages.cuda_cccl + cudaPackages.libcublas + # GCC for CUDA compilation + gcc + ] ++ pkgs.lib.optionals pkgs.stdenv.isDarwin [ + # macOS: applesoft libraries (via Xcode) are used automatically + libiconv + # libclang for bindgen (whisper.cpp) + llvmPackages.libclang + ] ++ [ + # Frontend + nodejs_22 + pnpm + + # Build tools + cmake + ] ++ pkgs.lib.optionals pkgs.stdenv.isLinux [ + glib + libsecret + # Native Wayland keyboard simulation (alternative to X11-based enigo) + wtype + ]; + + # whisper-rs-sys runs bindgen over ggml-vulkan.h. bindgen invokes libclang + # directly, bypassing the nix cc-wrapper, so it cannot find the libc headers + # (stdio.h) or clang's own builtin headers (stddef.h). bindgen then errors and + # whisper-rs-sys SILENTLY falls back to its bundled no-Vulkan bindings, so the + # ggml_backend_vk_* symbols go missing and whisper-rs fails to compile its + # Vulkan module (issue #64). Feed bindgen the cc-wrapper's libc flags plus + # clang's resource dir. A standard apt system finds these in /usr/include and + # lib/clang, so CI does not need this. + bindgenHook = pkgs.lib.optionalString pkgs.stdenv.isLinux '' + + export BINDGEN_EXTRA_CLANG_ARGS="$(< ${pkgs.stdenv.cc}/nix-support/libc-cflags) -idirafter ${pkgs.llvmPackages.libclang.lib}/lib/clang/${pkgs.lib.versions.major pkgs.llvmPackages.libclang.version}/include" + ''; + + # One dev-shell definition, optionally wired for GPU Parakeet (CUDA). + mkThothShell = { gpuParakeet ? false }: pkgs.mkShell ({ + # Platform-specific library paths (Linux). With gpuParakeet, also expose + # the CUDA sherpa-onnx libs + cuDNN so the CUDA execution provider loads. LD_LIBRARY_PATH = pkgs.lib.optionalString pkgs.stdenv.isLinux (pkgs.lib.makeLibraryPath ([ pkgs.libappindicator-gtk3 pkgs.vulkan-loader - ] ++ pkgs.lib.optionals pkgs.stdenv.isLinux [ - # CUDA runtime libraries for whisper.cpp linking cudaPackages.cuda_cudart cudaPackages.cuda_cccl cudaPackages.libcublas + ] ++ pkgs.lib.optionals gpuParakeet [ + sherpaOnnxCuda # libsherpa-onnx-c-api.so + onnxruntime CUDA EP + cudaPackages.cudnn # libcudnn.so.9 + # The onnxruntime CUDA execution provider dlopen()s the full CUDA + # math-library set; a single missing one makes it abort (no CPU + # fallback), so provide all of them. + cudaPackages.libcurand # libcurand.so.10 + cudaPackages.libcufft # libcufft.so.11 + cudaPackages.libcusparse # libcusparse.so.12 ]) + ":/run/opengl-driver/lib"); # NVIDIA driver (libcuda.so) # Workaround for webkit2gtk Wayland issues (Linux only) @@ -57,71 +153,24 @@ # Linker search path for CUDA driver (libcuda.so) RUSTFLAGS = pkgs.lib.optionalString pkgs.stdenv.isLinux "-L /run/opengl-driver/lib"; - packages = with pkgs; [ - # Rust / Tauri - rustToolchain - cargo - rustc - rust-analyzer - - # Tauri dependencies (platform-specific) - openssl - pkg-config - ] ++ lib.optionals stdenv.isLinux [ - # Linux-only Tauri dependencies - webkitgtk_4_1 - libappindicator-gtk3 - librsvg - alsa-lib - # whisper.cpp needs libclang for bindgen - llvmPackages.libclang - # X11 development libraries for x11rb (mouse tracking, display detection) - libx11 - libxcursor - libxrandr - libxi - # Vulkan for whisper.cpp GPU acceleration (AMD & NVIDIA) - vulkan-loader - vulkan-headers - vulkan-tools - # Shader compiler for Vulkan - shaderc - # CUDA toolkit for whisper.cpp CUDA acceleration (NVIDIA GPUs) - cudaPackages.cudatoolkit - cudaPackages.cuda_nvcc - cudaPackages.cuda_cudart - cudaPackages.cuda_cccl - cudaPackages.libcublas - # GCC for CUDA compilation - gcc - ] ++ lib.optionals stdenv.isDarwin [ - # macOS: applesoft libraries (via Xcode) are used automatically - libiconv - # libclang for bindgen (whisper.cpp) - llvmPackages.libclang - ] ++ [ - # Frontend - nodejs_22 - pnpm - - # Build tools - cmake - - # Useful utilities (Linux-only) - ] ++ lib.optionals stdenv.isLinux [ - glib - libsecret - # Native Wayland keyboard simulation (alternative to X11-based enigo) - wtype - ]; + packages = commonPackages + ++ pkgs.lib.optionals (gpuParakeet && pkgs.stdenv.isLinux) [ cudaPackages.cudnn ]; shellHook = '' - echo "𓅝 Thoth Development Environment" + echo "𓅝 Thoth Development Environment${pkgs.lib.optionalString gpuParakeet " (GPU Parakeet / CUDA)"}" echo "================================" echo " Rust: $(rustc --version)" echo " Node: $(node --version)" echo " pnpm: $(pnpm --version)" echo "" + '' + (if gpuParakeet then '' + echo "GPU Parakeet (NVIDIA CUDA) is wired up. Build/run with:" + echo " pnpm tauri dev --no-default-features --features parakeet-cuda,vulkan" + echo " pnpm tauri build --no-default-features --features parakeet-cuda,vulkan" + echo "" + echo "Then transcribe and watch 'nvidia-smi' to confirm the GPU engages." + echo "Logs show 'Attempting CUDA provider...' / 'CUDA provider initialised'." + '' else '' echo "Commands:" echo " pnpm install - Install dependencies" echo " pnpm tauri dev - Start development build" @@ -130,22 +179,17 @@ echo " cargo test - Run Rust tests (from src-tauri/)" echo "" echo "GPU Acceleration (Linux):" - echo " --features cuda - NVIDIA GPUs (requires CUDA drivers)" - echo " --features hipblas - AMD GPUs (requires ROCm)" - echo " --features vulkan - Cross-platform (experimental)" - '' + pkgs.lib.optionalString pkgs.stdenv.isLinux '' - - # whisper-rs-sys runs bindgen over ggml-vulkan.h. bindgen invokes - # libclang directly, bypassing the nix cc-wrapper, so it cannot find - # the libc headers (stdio.h) or clang's own builtin headers - # (stddef.h). bindgen then errors and whisper-rs-sys SILENTLY falls - # back to its bundled no-Vulkan bindings, so the ggml_backend_vk_* - # symbols go missing and whisper-rs fails to compile its Vulkan - # module (issue #64). Feed bindgen the cc-wrapper's libc flags plus - # clang's resource dir. A standard apt system finds these in - # /usr/include and lib/clang, so CI does not need this. - export BINDGEN_EXTRA_CLANG_ARGS="$(< ${pkgs.stdenv.cc}/nix-support/libc-cflags) -idirafter ${pkgs.llvmPackages.libclang.lib}/lib/clang/${pkgs.lib.versions.major pkgs.llvmPackages.libclang.version}/include" - ''; - }; + echo " --features cuda - NVIDIA GPUs (Whisper)" + echo " --features vulkan - Cross-platform (Whisper)" + echo " nix develop .#cuda - GPU Parakeet (NVIDIA, via sherpa-onnx CUDA)" + '') + bindgenHook; + } // pkgs.lib.optionalAttrs gpuParakeet { + # Make sherpa-onnx-sys link the CUDA libs instead of downloading CPU ones. + SHERPA_ONNX_LIB_DIR = "${sherpaOnnxCuda}/lib"; + }); + + in { + devShells.default = mkThothShell { }; + devShells.cuda = mkThothShell { gpuParakeet = true; }; }); } diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 7110994..721cdd9 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -36,9 +36,12 @@ webrtc-vad = "0.4" symphonia = { version = "0.6", features = ["mp3", "aac", "ogg", "vorbis", "flac", "wav", "pcm", "isomp4"] } # Transcription -# Official k2-fsa crate; static + auto-download by default; macOS Parakeet TDT fallback -# (Linux CI builds without the parakeet feature). -sherpa-onnx = { version = "1.13", optional = true } +# Official k2-fsa crate. default-features = false drops its built-in `static` +# marker; sherpa-onnx-sys still defaults to static linking when no link feature +# is set (= the `parakeet` feature → CPU, unchanged). The `parakeet-cuda` feature +# switches it to `shared` so a GPU build can link a CUDA-enabled sherpa-onnx via +# the SHERPA_ONNX_LIB_DIR env var. (Linux CI builds without the parakeet feature.) +sherpa-onnx = { version = "1.13", default-features = false, optional = true } # fluidaudio-rs: Apple Neural Engine backend via CoreML (macOS-only, Apple Silicon). # WHY fork, not crates.io: the upstream crate lacks platform compile-guards (fails to # build on Linux/Windows); the fork wraps the implementation in @@ -127,6 +130,12 @@ objc2-core-foundation = { version = "0.3", features = ["CFString"] } [features] default = ["parakeet", "fluidaudio"] parakeet = ["dep:sherpa-onnx"] +# GPU Parakeet (NVIDIA): links a CUDA-enabled sherpa-onnx (shared). Build with +# cargo build --no-default-features --features "parakeet-cuda,vulkan" +# and point SHERPA_ONNX_LIB_DIR at the k2-fsa CUDA prebuilt's lib dir. At runtime +# the recognizer requests the `cuda` provider and falls back to CPU if the EP +# isn't available. See flake.nix's `cuda` dev shell which wires this up. +parakeet-cuda = ["parakeet", "sherpa-onnx/shared"] fluidaudio = ["dep:fluidaudio-rs"] # GPU acceleration for whisper.cpp (mutually exclusive, only use one) # Use: cargo build --features cuda for NVIDIA GPUs diff --git a/src-tauri/src/transcription/parakeet.rs b/src-tauri/src/transcription/parakeet.rs index 11a9598..1b0c8a7 100644 --- a/src-tauri/src/transcription/parakeet.rs +++ b/src-tauri/src/transcription/parakeet.rs @@ -85,7 +85,27 @@ impl TranscriptionService { } }; - #[cfg(not(target_os = "macos"))] + // Linux/Windows: with the `parakeet-cuda` feature, try the CUDA execution + // provider (NVIDIA GPU) first and fall back to CPU. Without it, CPU only. + // Note: onnxruntime silently falls back to CPU if the CUDA EP/libs aren't + // present, so confirm real GPU use via `nvidia-smi` during a transcription. + #[cfg(all(not(target_os = "macos"), feature = "parakeet-cuda"))] + let recognizer = { + tracing::info!("Attempting CUDA provider for GPU acceleration"); + match OfflineRecognizer::create(&build_config(Some("cuda".into()))) { + Some(r) => { + tracing::info!("CUDA provider initialised (verify GPU use with nvidia-smi)"); + r + } + None => { + tracing::warn!("CUDA provider failed, falling back to CPU"); + OfflineRecognizer::create(&build_config(Some("cpu".into()))) + .ok_or_else(|| anyhow!("Failed to create Parakeet recognizer with CPU"))? + } + } + }; + + #[cfg(all(not(target_os = "macos"), not(feature = "parakeet-cuda")))] let recognizer = { tracing::info!("Using CPU provider"); OfflineRecognizer::create(&build_config(Some("cpu".into())))