poodle64 · nephalemsec · Jun 12, 2026 · Jun 12, 2026
diff --git a/flake.nix b/flake.nix
@@ -29,18 +29,114 @@
         # CUDA packages for whisper.cpp GPU acceleration
         cudaPackages = pkgs.cudaPackages_12;
 
-      in {
-        devShells.default = pkgs.mkShell {
-          # Platform-specific library paths (Linux)
+        # CUDA-enabled sherpa-onnx prebuilt (k2-fsa release) for GPU Parakeet.
+        # The `parakeet-cuda` cargo feature links sherpa-onnx as `shared`, and
+        # SHERPA_ONNX_LIB_DIR points it here instead of downloading the CPU build.
+        # This archive ships libsherpa-onnx-c-api.so + libonnxruntime.so with the
+        # CUDA execution provider (libonnxruntime_providers_cuda.so). cuDNN/cudart
+        # are supplied via LD_LIBRARY_PATH in the `cuda` dev shell below.
+        sherpaOnnxCuda = pkgs.stdenvNoCC.mkDerivation {
+          pname = "sherpa-onnx-cuda";
+          version = "1.13.2";
+          src = pkgs.fetchurl {
+            url = "https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.13.2/sherpa-onnx-v1.13.2-cuda-12.x-cudnn-9.x-linux-x64-gpu.tar.bz2";
+            hash = "sha256-vRE8k6GLoPm24MrEaramoXYGvde3cbbq7gy9b5bOY/4=";
+          };
+          dontConfigure = true;
+          dontBuild = true;
+          installPhase = "mkdir -p $out && cp -r lib $out/lib";
+        };
+
+        # Dev-shell packages (shared by both shells).
+        commonPackages = with pkgs; [
+          # Rust / Tauri
+          rustToolchain
+          cargo
+          rustc
+          rust-analyzer
+
+          # Tauri dependencies (platform-specific)
+          openssl
+          pkg-config
+        ] ++ pkgs.lib.optionals pkgs.stdenv.isLinux [
+          # Linux-only Tauri dependencies
+          webkitgtk_4_1
+          libappindicator-gtk3
+          librsvg
+          alsa-lib
+          # whisper.cpp needs libclang for bindgen
+          llvmPackages.libclang
+          # X11 development libraries for x11rb (mouse tracking, display detection)
+          libx11
+          libxcursor
+          libxrandr
+          libxi
+          # Vulkan for whisper.cpp GPU acceleration (AMD & NVIDIA)
+          vulkan-loader
+          vulkan-headers
+          vulkan-tools
+          # Shader compiler for Vulkan
+          shaderc
+          # CUDA toolkit for whisper.cpp CUDA acceleration (NVIDIA GPUs)
+          cudaPackages.cudatoolkit
+          cudaPackages.cuda_nvcc
+          cudaPackages.cuda_cudart
+          cudaPackages.cuda_cccl
+          cudaPackages.libcublas
+          # GCC for CUDA compilation
+          gcc
+        ] ++ pkgs.lib.optionals pkgs.stdenv.isDarwin [
+          # macOS: applesoft libraries (via Xcode) are used automatically
+          libiconv
+          # libclang for bindgen (whisper.cpp)
+          llvmPackages.libclang
+        ] ++ [
+          # Frontend
+          nodejs_22
+          pnpm
+
+          # Build tools
+          cmake
+        ] ++ pkgs.lib.optionals pkgs.stdenv.isLinux [
+          glib
+          libsecret
+          # Native Wayland keyboard simulation (alternative to X11-based enigo)
+          wtype
+        ];
+
+        # whisper-rs-sys runs bindgen over ggml-vulkan.h. bindgen invokes libclang
+        # directly, bypassing the nix cc-wrapper, so it cannot find the libc headers
+        # (stdio.h) or clang's own builtin headers (stddef.h). bindgen then errors and
+        # whisper-rs-sys SILENTLY falls back to its bundled no-Vulkan bindings, so the
+        # ggml_backend_vk_* symbols go missing and whisper-rs fails to compile its
+        # Vulkan module (issue #64). Feed bindgen the cc-wrapper's libc flags plus
+        # clang's resource dir. A standard apt system finds these in /usr/include and
+        # lib/clang, so CI does not need this.
+        bindgenHook = pkgs.lib.optionalString pkgs.stdenv.isLinux ''
+
+          export BINDGEN_EXTRA_CLANG_ARGS="$(< ${pkgs.stdenv.cc}/nix-support/libc-cflags) -idirafter ${pkgs.llvmPackages.libclang.lib}/lib/clang/${pkgs.lib.versions.major pkgs.llvmPackages.libclang.version}/include"
+        '';
+
+        # One dev-shell definition, optionally wired for GPU Parakeet (CUDA).
+        mkThothShell = { gpuParakeet ? false }: pkgs.mkShell ({
+          # Platform-specific library paths (Linux). With gpuParakeet, also expose
+          # the CUDA sherpa-onnx libs + cuDNN so the CUDA execution provider loads.
           LD_LIBRARY_PATH = pkgs.lib.optionalString pkgs.stdenv.isLinux
             (pkgs.lib.makeLibraryPath ([
               pkgs.libappindicator-gtk3
               pkgs.vulkan-loader
-            ] ++ pkgs.lib.optionals pkgs.stdenv.isLinux [
-              # CUDA runtime libraries for whisper.cpp linking
               cudaPackages.cuda_cudart
               cudaPackages.cuda_cccl
               cudaPackages.libcublas
+            ] ++ pkgs.lib.optionals gpuParakeet [
+              sherpaOnnxCuda           # libsherpa-onnx-c-api.so + onnxruntime CUDA EP
+              cudaPackages.cudnn       # libcudnn.so.9
+              # The onnxruntime CUDA execution provider dlopen()s the full CUDA
+              # math-library set; a single missing one makes it abort (no CPU
+              # fallback), so provide all of them.
+              cudaPackages.libcurand   # libcurand.so.10
+              cudaPackages.libcufft    # libcufft.so.11
+              cudaPackages.libcusparse # libcusparse.so.12
             ]) + ":/run/opengl-driver/lib");  # NVIDIA driver (libcuda.so)
 
           # Workaround for webkit2gtk Wayland issues (Linux only)
@@ -57,71 +153,24 @@
           # Linker search path for CUDA driver (libcuda.so)
           RUSTFLAGS = pkgs.lib.optionalString pkgs.stdenv.isLinux "-L /run/opengl-driver/lib";
 
-          packages = with pkgs; [
-            # Rust / Tauri
-            rustToolchain
-            cargo
-            rustc
-            rust-analyzer
-
-            # Tauri dependencies (platform-specific)
-            openssl
-            pkg-config
-          ] ++ lib.optionals stdenv.isLinux [
-            # Linux-only Tauri dependencies
-            webkitgtk_4_1
-            libappindicator-gtk3
-            librsvg
-            alsa-lib
-            # whisper.cpp needs libclang for bindgen
-            llvmPackages.libclang
-            # X11 development libraries for x11rb (mouse tracking, display detection)
-            libx11
-            libxcursor
-            libxrandr
-            libxi
-            # Vulkan for whisper.cpp GPU acceleration (AMD & NVIDIA)
-            vulkan-loader
-            vulkan-headers
-            vulkan-tools
-            # Shader compiler for Vulkan
-            shaderc
-            # CUDA toolkit for whisper.cpp CUDA acceleration (NVIDIA GPUs)
-            cudaPackages.cudatoolkit
-            cudaPackages.cuda_nvcc
-            cudaPackages.cuda_cudart
-            cudaPackages.cuda_cccl
-            cudaPackages.libcublas
-            # GCC for CUDA compilation
-            gcc
-          ] ++ lib.optionals stdenv.isDarwin [
-            # macOS: applesoft libraries (via Xcode) are used automatically
-            libiconv
-            # libclang for bindgen (whisper.cpp)
-            llvmPackages.libclang
-          ] ++ [
-            # Frontend
-            nodejs_22
-            pnpm
-
-            # Build tools
-            cmake
-
-            # Useful utilities (Linux-only)
-          ] ++ lib.optionals stdenv.isLinux [
-            glib
-            libsecret
-            # Native Wayland keyboard simulation (alternative to X11-based enigo)
-            wtype
-          ];
+          packages = commonPackages
+            ++ pkgs.lib.optionals (gpuParakeet && pkgs.stdenv.isLinux) [ cudaPackages.cudnn ];
 
           shellHook = ''
-            echo "𓅝 Thoth Development Environment"
+            echo "𓅝 Thoth Development Environment${pkgs.lib.optionalString gpuParakeet " (GPU Parakeet / CUDA)"}"
             echo "================================"
             echo "  Rust: $(rustc --version)"
             echo "  Node: $(node --version)"
             echo "  pnpm: $(pnpm --version)"
             echo ""
+          '' + (if gpuParakeet then ''
+            echo "GPU Parakeet (NVIDIA CUDA) is wired up. Build/run with:"
+            echo "  pnpm tauri dev --no-default-features --features parakeet-cuda,vulkan"
+            echo "  pnpm tauri build --no-default-features --features parakeet-cuda,vulkan"
+            echo ""
+            echo "Then transcribe and watch 'nvidia-smi' to confirm the GPU engages."
+            echo "Logs show 'Attempting CUDA provider...' / 'CUDA provider initialised'."
+          '' else ''
             echo "Commands:"
             echo "  pnpm install        - Install dependencies"
             echo "  pnpm tauri dev      - Start development build"
@@ -130,22 +179,17 @@
             echo "  cargo test          - Run Rust tests (from src-tauri/)"
             echo ""
             echo "GPU Acceleration (Linux):"
-            echo "  --features cuda     - NVIDIA GPUs (requires CUDA drivers)"
-            echo "  --features hipblas  - AMD GPUs (requires ROCm)"
-            echo "  --features vulkan   - Cross-platform (experimental)"
-          '' + pkgs.lib.optionalString pkgs.stdenv.isLinux ''
-
-            # whisper-rs-sys runs bindgen over ggml-vulkan.h. bindgen invokes
-            # libclang directly, bypassing the nix cc-wrapper, so it cannot find
-            # the libc headers (stdio.h) or clang's own builtin headers
-            # (stddef.h). bindgen then errors and whisper-rs-sys SILENTLY falls
-            # back to its bundled no-Vulkan bindings, so the ggml_backend_vk_*
-            # symbols go missing and whisper-rs fails to compile its Vulkan
-            # module (issue #64). Feed bindgen the cc-wrapper's libc flags plus
-            # clang's resource dir. A standard apt system finds these in
-            # /usr/include and lib/clang, so CI does not need this.
-            export BINDGEN_EXTRA_CLANG_ARGS="$(< ${pkgs.stdenv.cc}/nix-support/libc-cflags) -idirafter ${pkgs.llvmPackages.libclang.lib}/lib/clang/${pkgs.lib.versions.major pkgs.llvmPackages.libclang.version}/include"
-          '';
-        };
+            echo "  --features cuda     - NVIDIA GPUs (Whisper)"
+            echo "  --features vulkan   - Cross-platform (Whisper)"
+            echo "  nix develop .#cuda  - GPU Parakeet (NVIDIA, via sherpa-onnx CUDA)"
+          '') + bindgenHook;
+        } // pkgs.lib.optionalAttrs gpuParakeet {
+          # Make sherpa-onnx-sys link the CUDA libs instead of downloading CPU ones.
+          SHERPA_ONNX_LIB_DIR = "${sherpaOnnxCuda}/lib";
+        });
+
+      in {
+        devShells.default = mkThothShell { };
+        devShells.cuda = mkThothShell { gpuParakeet = true; };
       });
 }
diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml
@@ -36,9 +36,12 @@ webrtc-vad = "0.4"
 symphonia = { version = "0.6", features = ["mp3", "aac", "ogg", "vorbis", "flac", "wav", "pcm", "isomp4"] }
 
 # Transcription
-# Official k2-fsa crate; static + auto-download by default; macOS Parakeet TDT fallback
-# (Linux CI builds without the parakeet feature).
-sherpa-onnx = { version = "1.13", optional = true }
+# Official k2-fsa crate. default-features = false drops its built-in `static`
+# marker; sherpa-onnx-sys still defaults to static linking when no link feature
+# is set (= the `parakeet` feature → CPU, unchanged). The `parakeet-cuda` feature
+# switches it to `shared` so a GPU build can link a CUDA-enabled sherpa-onnx via
+# the SHERPA_ONNX_LIB_DIR env var. (Linux CI builds without the parakeet feature.)
+sherpa-onnx = { version = "1.13", default-features = false, optional = true }
 # fluidaudio-rs: Apple Neural Engine backend via CoreML (macOS-only, Apple Silicon).
 # WHY fork, not crates.io: the upstream crate lacks platform compile-guards (fails to
 # build on Linux/Windows); the fork wraps the implementation in
@@ -127,6 +130,12 @@ objc2-core-foundation = { version = "0.3", features = ["CFString"] }
 [features]
 default = ["parakeet", "fluidaudio"]
 parakeet = ["dep:sherpa-onnx"]
+# GPU Parakeet (NVIDIA): links a CUDA-enabled sherpa-onnx (shared). Build with
+#   cargo build --no-default-features --features "parakeet-cuda,vulkan"
+# and point SHERPA_ONNX_LIB_DIR at the k2-fsa CUDA prebuilt's lib dir. At runtime
+# the recognizer requests the `cuda` provider and falls back to CPU if the EP
+# isn't available. See flake.nix's `cuda` dev shell which wires this up.
+parakeet-cuda = ["parakeet", "sherpa-onnx/shared"]
 fluidaudio = ["dep:fluidaudio-rs"]
 # GPU acceleration for whisper.cpp (mutually exclusive, only use one)
 # Use: cargo build --features cuda      for NVIDIA GPUs

diff --git a/src-tauri/src/transcription/parakeet.rs b/src-tauri/src/transcription/parakeet.rs
@@ -85,7 +85,27 @@ impl TranscriptionService {
             }
         };
 
-        #[cfg(not(target_os = "macos"))]
+        // Linux/Windows: with the `parakeet-cuda` feature, try the CUDA execution
+        // provider (NVIDIA GPU) first and fall back to CPU. Without it, CPU only.
+        // Note: onnxruntime silently falls back to CPU if the CUDA EP/libs aren't
+        // present, so confirm real GPU use via `nvidia-smi` during a transcription.
+        #[cfg(all(not(target_os = "macos"), feature = "parakeet-cuda"))]
+        let recognizer = {
+            tracing::info!("Attempting CUDA provider for GPU acceleration");
+            match OfflineRecognizer::create(&build_config(Some("cuda".into()))) {
+                Some(r) => {
+                    tracing::info!("CUDA provider initialised (verify GPU use with nvidia-smi)");
+                    r
+                }
+                None => {
+                    tracing::warn!("CUDA provider failed, falling back to CPU");
+                    OfflineRecognizer::create(&build_config(Some("cpu".into())))
+                        .ok_or_else(|| anyhow!("Failed to create Parakeet recognizer with CPU"))?
+                }
+            }
+        };
+
+        #[cfg(all(not(target_os = "macos"), not(feature = "parakeet-cuda")))]
         let recognizer = {
             tracing::info!("Using CPU provider");
             OfflineRecognizer::create(&build_config(Some("cpu".into())))