diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..6ed6316 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,183 @@ +# Cuts a release on a v* tag: cross-compiles the Go launcher for all 5 targets, +# builds whisper.cpp per platform, publishes both as release assets (the names +# the npm wrapper, self-update, and provisioner expect), then publishes npm. +# +# The whisper.cpp matrix below is unverified until the first tagged run — the +# binary name / cmake flags may need adjustment for the pinned whisper.cpp ref. +name: release + +on: + push: + tags: + - 'v*' + +# Default to read-only; only the release job that publishes assets gets write. +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + include: + - { goos: darwin, goarch: amd64 } + - { goos: darwin, goarch: arm64 } + - { goos: linux, goarch: amd64 } + - { goos: linux, goarch: arm64 } + - { goos: windows, goarch: amd64 } + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: '1.23' + - name: Build launcher + working-directory: cli + env: + GOOS: ${{ matrix.goos }} + GOARCH: ${{ matrix.goarch }} + CGO_ENABLED: '0' + run: | + VERSION="${GITHUB_REF_NAME#v}" + go generate ./... # sync backend + PodStack commands for go:embed + EXT="" + [ "${{ matrix.goos }}" = "windows" ] && EXT=".exe" + OUT="podcli-${{ matrix.goos }}-${{ matrix.goarch }}${EXT}" + go build -trimpath -ldflags "-s -w -X main.Version=${VERSION}" -o "../${OUT}" . + echo "ASSET=${OUT}" >> "$GITHUB_ENV" + - uses: actions/upload-artifact@v4 + with: + name: ${{ env.ASSET }} + path: ${{ env.ASSET }} + + whisper: + strategy: + fail-fast: false + matrix: + include: + - { runner: macos-14, goos: darwin, goarch: arm64 } + - { runner: macos-13, goos: darwin, goarch: amd64 } + - { runner: ubuntu-latest, goos: linux, goarch: amd64 } + - { runner: ubuntu-24.04-arm, goos: linux, goarch: arm64 } + - { runner: windows-latest, goos: windows, goarch: amd64 } + runs-on: ${{ matrix.runner }} + env: + WHISPER_REF: v1.7.4 # pin a known-good whisper.cpp tag + steps: + - name: Clone whisper.cpp + uses: actions/checkout@v4 + with: + repository: ggml-org/whisper.cpp + ref: ${{ env.WHISPER_REF }} + - name: Build (static; Metal embedded on macOS) + shell: bash + run: | + EXTRA="" + if [ "${{ matrix.goos }}" = "darwin" ]; then EXTRA="-DGGML_METAL_EMBED_LIBRARY=ON"; fi + cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release $EXTRA + cmake --build build --config Release --target whisper-cli -j + - name: Stage asset + shell: bash + run: | + EXT=""; [ "${{ matrix.goos }}" = "windows" ] && EXT=".exe" + SRC=$(find build -name "whisper-cli${EXT}" -type f | head -1) + OUT="whisper-cli-${{ matrix.goos }}-${{ matrix.goarch }}${EXT}" + cp "$SRC" "$OUT" + echo "ASSET=$OUT" >> "$GITHUB_ENV" + - uses: actions/upload-artifact@v4 + with: + name: ${{ env.ASSET }} + path: ${{ env.ASSET }} + + studio: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: '20' + - name: Build studio bundle + run: | + npm ci + sh scripts/build-studio.sh dist/studio + tar -czf studio-bundle.tar.gz -C dist/studio . + - uses: actions/upload-artifact@v4 + with: + name: studio-bundle.tar.gz + path: studio-bundle.tar.gz + + remotion: + # Per-platform: @rspack and the Remotion compositor ship native binaries, so + # the bundle is not portable across os/arch. + strategy: + fail-fast: false + matrix: + include: + - { runner: macos-14, goos: darwin, goarch: arm64 } + - { runner: macos-13, goos: darwin, goarch: amd64 } + - { runner: ubuntu-latest, goos: linux, goarch: amd64 } + - { runner: ubuntu-24.04-arm, goos: linux, goarch: arm64 } + - { runner: windows-latest, goos: windows, goarch: amd64 } + runs-on: ${{ matrix.runner }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: '20' + - name: Build remotion bundle + shell: bash + run: | + sh scripts/build-remotion.sh dist/remotion-bundle + OUT="remotion-bundle-${{ matrix.goos }}-${{ matrix.goarch }}.tar.gz" + tar -czf "$OUT" -C dist/remotion-bundle . + echo "ASSET=$OUT" >> "$GITHUB_ENV" + - uses: actions/upload-artifact@v4 + with: + name: ${{ env.ASSET }} + path: ${{ env.ASSET }} + + release: + needs: [build, whisper, studio, remotion] + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/download-artifact@v4 + with: + path: dist + merge-multiple: true + - name: Generate checksums + working-directory: dist + run: | + sha256sum * > "$RUNNER_TEMP/checksums.txt" + mv "$RUNNER_TEMP/checksums.txt" checksums.txt + - uses: softprops/action-gh-release@v2 + with: + files: dist/* + generate_release_notes: true + + publish-npm: + needs: release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: '20' + registry-url: 'https://registry.npmjs.org' + - name: Assert NPM_TOKEN is set + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: | + if [ -z "$NODE_AUTH_TOKEN" ]; then + echo "::error::NPM_TOKEN secret is not set — refusing to publish. The GitHub release and assets are already published; set the secret and re-run this job." >&2 + exit 1 + fi + - name: Publish wrapper pinned to the tag + working-directory: npm + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: | + VERSION="${GITHUB_REF_NAME#v}" + npm version "$VERSION" --no-git-tag-version --allow-same-version + npm publish --access public diff --git a/.gitignore b/.gitignore index 977be1e..2c5527a 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,7 @@ episodes/ *.srt uploads/ output/ +cli/internal/backend/files/ +cli/internal/podstack/commands/ +cli/podcli +podcli-clips/ diff --git a/AGENTS.podstack.md b/AGENTS.podstack.md index 13623a9..a046563 100644 --- a/AGENTS.podstack.md +++ b/AGENTS.podstack.md @@ -174,7 +174,8 @@ PodStack ships one source-of-truth (`commands/`) and installs to the right locat | opencode | `.opencode/commands/*.md` | `AGENTS.md` | | Generic | `commands/*.md` | `AGENTS.md` | -Install: `./setup.sh --host `. See `README.md` for per-host usage examples. +These command files ship with podcli; place the set for your tool (left column) in +its command dir. See `README.md` for per-host usage examples. --- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3468c15..5f9ddfe 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,20 +2,41 @@ Thank you for helping improve podcli. This project is AGPL-3.0 — contributions are welcome under the same license. +## Architecture + +podcli ships as a native Go launcher (`cli/`) that provisions hermetic runtimes +and routes commands to the Python backend (`backend/`) and the Node studio/MCP +server (`src/`). End users install a prebuilt binary; the source trees below are +for development. + ## Development setup +Backend (Python) + studio (TypeScript): + ```bash -./setup.sh +./setup.sh # venv + Python deps npm install -npm run build +npm run build # tsc + studio bundle +``` + +Native launcher (Go 1.23+): + +```bash +cd cli +go generate ./... # sync backend + PodStack commands for go:embed +go build -o podcli . ``` -Python backend lives in `backend/`. TypeScript MCP server and Web UI live in `src/`. +`cli/internal/backend/files/` is generated by `go generate` (gitignored) — edit +the canonical `backend/` instead. ## Project layout -| Path | Purpose | +| Path | Purpose | | --------------------------------------- | --------------------------------------------------------------- | +| `cli/` | Go launcher — provisioning, self-update, command routing | +| `backend/` | Python engine (transcription, rendering, clip generation) | +| `src/` | TypeScript MCP server + Web UI | | `.podcli/` | Config home (knowledge, presets, assets, settings) — gitignored | | `data/` | Runtime data (cache, output, working) — gitignored | | `backend/config/paths.py` | Canonical path resolution (Python) | @@ -25,10 +46,11 @@ Python backend lives in `backend/`. TypeScript MCP server and Web UI live in `sr ## Before you open a PR -1. Run tests: `python3 -m unittest discover -s tests -v` -2. Run TypeScript build: `npm run build` -3. If you change paths, env vars, or cache layout, update `README.md` and add a note to the config migration logic. -4. Keep diffs focused — one feature or fix per PR when possible. +1. Python tests: `python3 -m pytest tests/ -q` +2. TypeScript: `npm run build` and `npx vitest run` +3. Go (if you touched `cli/`): `cd cli && go build ./... && go test ./... && gofmt -l .` +4. If you change paths, env vars, or cache layout, update `README.md` and the config migration logic, and keep `backend/config/paths.py` and `src/config/paths.ts` aligned. +5. Keep diffs focused — one feature or fix per PR when possible. ## Path and cache conventions diff --git a/README.md b/README.md index 7ecaac1..24fbfde 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@

Watch with sound on X

```bash -./podcli process episode.mp4 +podcli process episode.mp4 ``` One command transcribes, picks the best moments, crops to the face, and burns captions in. Nothing leaves your machine. @@ -63,14 +63,14 @@ The first half is **video processing** — podcli's core engine. The second half ### 1. Drop in your episode ```bash -./setup.sh --ui +podcli # then choose "Open Web UI" # → http://localhost:3847 ``` Drag your video into the Web UI, or use the CLI: ```bash -./podcli process episode.mp4 +podcli process episode.mp4 ``` ### 2. Get clips automatically @@ -147,63 +147,47 @@ Both halves share the same **knowledge base** (`.podcli/knowledge/`) — your sh - **Preset system** — save named configurations per show - **MCP server** — 17 tools for Claude Desktop / Claude Code integration - **Web UI** — single-page flow at `localhost:3847` -- **CLI** — one-command processing: `./podcli process episode.mp4` +- **CLI** — one-command processing: `podcli process episode.mp4` --- -## Prerequisites +## Install -| Tool | Install | -| -------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | -| **Node.js** >= 18 | [nodejs.org](https://nodejs.org) | -| **Python** >= 3.10 | [python.org](https://python.org) | -| **FFmpeg** | `brew install ffmpeg` / `sudo apt install ffmpeg` | -| **Claude Code** (optional) | [docs.anthropic.com](https://docs.anthropic.com/en/docs/claude-code) — needed for PodStack slash commands | -| **Codex** (optional) | [openai.com/codex](https://openai.com/index/introducing-codex/) — alternative AI engine for clip suggestion (auto-detected if Claude is unavailable) | - -## Quick Start +No prerequisites — the install fetches a self-contained binary, and the first run +provisions everything it needs (Python, Node, FFmpeg, whisper.cpp, models) into a +managed directory. You don't need Go, Node, Python, or FFmpeg installed. **macOS / Linux** ```bash -git clone https://github.com/nmbrthirteen/podcli.git -cd podcli -chmod +x setup.sh podcli -./setup.sh +curl -fsSL https://raw.githubusercontent.com/nmbrthirteen/podcli/main/install.sh | sh ``` -**Windows** +**Windows (PowerShell)** ```powershell -git clone https://github.com/nmbrthirteen/podcli.git -cd podcli +irm https://raw.githubusercontent.com/nmbrthirteen/podcli/main/install.ps1 | iex ``` -Then double-click **`install.cmd`** (or run it in a terminal). It installs everything and keeps the window open so you can see the result. To launch the studio afterwards: +**With npm** (if you already have Node): -```powershell -powershell -ExecutionPolicy Bypass -File setup.ps1 -Ui +```bash +npm install -g podcli ``` -This will: - -1. Check system dependencies (Node, Python, FFmpeg) -2. Create a Python virtual environment and install packages -3. Install Node packages and build TypeScript -4. Set up PodStack slash commands and knowledge base templates -5. Create the local `.podcli/` data directory -6. Launch the web UI at **http://localhost:3847** - -### Setup options +Then just run it — the first launch sets itself up: ```bash -./setup.sh # full install + launch UI -./setup.sh --install # install only -./setup.sh --ui # launch UI only (skip install) -./setup.sh --mcp # print MCP config for Claude +podcli # interactive menu (and Web UI) +podcli process episode.mp4 # transcribe + export clips ``` -On Windows, use `.\setup.ps1` with `-Install`, `-Ui`, or `-Mcp`, and run the CLI via `podcli.cmd` (e.g. `podcli process video.mp4 --top 5`). +**Optional**, for AI clip suggestion and the PodStack slash commands: install +[Claude Code](https://docs.anthropic.com/en/docs/claude-code) or +[Codex](https://openai.com/index/introducing-codex/) (auto-detected). + +> Building from source needs Go 1.23+ (and Node for the studio bundle); see +> [`plans/native-cli.md`](plans/native-cli.md). --- @@ -212,7 +196,7 @@ On Windows, use `.\setup.ps1` with `-Install`, `-Ui`, or `-Mcp`, and run the CLI ### Web UI ```bash -./setup.sh --ui +podcli # then choose "Open Web UI" # → http://localhost:3847 ``` @@ -227,17 +211,17 @@ On Windows, use `.\setup.ps1` with `-Install`, `-Ui`, or `-Mcp`, and run the CLI ```bash # One command. Auto-transcribes, picks moments, renders clips. -./podcli process episode.mp4 +podcli process episode.mp4 ``` With more control: ```bash # Use an existing transcript instead of transcribing -./podcli process episode.mp4 --transcript transcript.txt --top 5 +podcli process episode.mp4 --transcript transcript.txt --top 5 # Full options -./podcli process episode.mp4 \ +podcli process episode.mp4 \ --transcript transcript.txt \ --top 8 \ --caption-style branded \ @@ -248,9 +232,9 @@ With more control: ### Presets ```bash -./podcli presets save myshow --caption-style branded --logo logo.png --top 5 -./podcli presets list -./podcli process video.mp4 --preset myshow +podcli presets save myshow --caption-style branded --logo logo.png --top 5 +podcli presets list +podcli process video.mp4 --preset myshow ``` ### Content Workflow (PodStack) @@ -305,30 +289,25 @@ Manage via the web UI at `/knowledge.html` (drag & drop, inline editor) or throu podcli is a [Model Context Protocol](https://modelcontextprotocol.io) server — Claude can use it as a tool to create clips through conversation. +**Claude Code** — register the bundled MCP server in one command: + +```bash +podcli mcp install +``` + **Claude Desktop** — add to `claude_desktop_config.json`: ```json { "mcpServers": { "podcli": { - "command": "node", - "args": ["/path/to/podcli/dist/index.js"], - "env": { - "PYTHON_PATH": "/path/to/podcli/venv/bin/python3" - } + "command": "podcli", + "args": ["mcp"] } } } ``` -**Claude Code:** - -```bash -claude mcp add podcli -- node /path/to/podcli/dist/index.js -``` - -Run `./setup.sh --mcp` to get the exact config with your paths filled in. - ### MCP Tools | Tool | Description | @@ -368,8 +347,9 @@ Run `./setup.sh --mcp` to get the exact config with your paths filled in. ``` podcli/ -├── podcli # CLI entry point -├── setup.sh # one-command install & launch +├── cli/ # Go launcher (native binary, provisioning, self-update) +├── install.sh / install.ps1 # node-less installers +├── setup.sh # dev environment setup (venv + npm) ├── package.json ├── CLAUDE.md # PodStack master config │ diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 0000000..f518e74 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,50 @@ +# Releasing podcli + +Distribution is fully automated by `.github/workflows/release.yml`: pushing a +`v*` tag builds the launcher for all five platforms, builds whisper.cpp, bundles +the studio and Remotion, generates `checksums.txt`, publishes a GitHub release, +and publishes the npm package. End users then install with no prerequisites: + +```bash +curl -fsSL https://raw.githubusercontent.com/nmbrthirteen/podcli/main/install.sh | sh +# or: npm install -g podcli (Windows: install.ps1) +``` + +## One-time setup + +- Set the **`NPM_TOKEN`** repository secret (an npm automation token with publish + rights). Without it the GitHub release still publishes; the npm job fails and + can be re-run after the secret is added. + +## Cutting a release + +1. Pick the version `X.Y.Z`. Set it in **`npm/package.json`** (`install.js` fetches + the binary from `releases/download/vX.Y.Z/`, so the npm version must equal the + tag). Optionally align the root `package.json`. +2. Merge to `main` and make sure CI is green. +3. Tag and push: + ```bash + git tag vX.Y.Z + git push origin vX.Y.Z + ``` +4. Watch the `release` workflow. It produces these assets (the names the npm + wrapper, self-update, and provisioner expect): + - `podcli-{darwin,linux,windows}-{amd64,arm64}[.exe]` — static launchers + - `whisper-cli--[.exe]` + - `studio-bundle.tar.gz` + - `remotion--.tar.gz` + - `checksums.txt` +5. Verify the npm publish succeeded (`npm view podcli version`). + +## Smoke test (ideally one machine per OS) + +```bash +curl -fsSL .../install.sh | sh # or npm i -g podcli +podcli doctor # paths + engine resolution +podcli process sample.mp4 --top 1 # transcribe -> suggest -> export a clip +podcli # interactive menu -> Open Web UI +``` + +First run downloads the hermetic runtimes (Python, Node, FFmpeg, model) once. +Needs outbound HTTPS to github.com, huggingface.co, nodejs.org, and the ffmpeg +hosts. glibc Linux only (Alpine/musl is unsupported). diff --git a/backend/cli.py b/backend/cli.py index 9bf6a98..dd735cf 100644 --- a/backend/cli.py +++ b/backend/cli.py @@ -14,6 +14,7 @@ import json import os import shutil +import subprocess import sys import textwrap import time @@ -26,7 +27,7 @@ except (ValueError, OSError): pass -_env_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".env") +_env_file = os.environ.get("PODCLI_ENV_FILE") or os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".env") try: from dotenv import load_dotenv load_dotenv(_env_file) @@ -152,17 +153,24 @@ def _auto_migrate_cli(args) -> None: summary = auto_migrate_legacy_if_pending(quiet=True) if not summary: return + home = summary.get("home_migration") or {} + imported_home = home.get("imported") moved_cache = summary.get("moved_json") or summary.get("moved_remotion_bundle") moved_presets = (summary.get("presets_migration") or {}).get("moved") - if moved_cache or moved_presets: + copied_env = (summary.get("env_migration") or {}).get("copied") + if imported_home or moved_cache or moved_presets or copied_env: gray = "\033[38;5;245m" green = "\033[38;2;74;222;128m" reset = "\033[0m" + if imported_home: + print(f" {green}✓{reset} {gray}Imported your presets, knowledge & assets → {home.get('target_home')}{reset}") if moved_cache: - print(f" {green}✓{reset} {gray}Migrated legacy cache → {summary.get('target_dir')}{reset}") + print(f" {green}✓{reset} {gray}Migrated transcript cache → {summary.get('target_dir')}{reset}") if moved_presets: pm = summary.get("presets_migration") or {} print(f" {green}✓{reset} {gray}Migrated presets → {pm.get('target_dir')}{reset}") + if copied_env: + print(f" {green}✓{reset} {gray}Copied .env → {(summary.get('env_migration') or {}).get('target')}{reset}") print() @@ -285,7 +293,11 @@ def _resolve_output_dir( if explicit_output_dir: return explicit_output_dir - base_output_dir = configured_output_dir or os.path.join(os.path.dirname(video_path), "clips") + base_output_dir = ( + configured_output_dir + or os.environ.get("PODCLI_OUTPUT") + or os.path.join(os.path.dirname(video_path), "clips") + ) if not preset_name: return base_output_dir @@ -313,7 +325,7 @@ def _should_enter_post_render_loop(config: dict, interrupted: bool, results: lis def cmd_studio(args): """Cut a fragment + wrap it with Remotion intro/outro bookends. - Thin wrapper around scripts/clip_studio.py (which orchestrates the + Thin wrapper around clip_studio.py (which orchestrates the fragment render, bookend renders, and the concat). Runs it as a subprocess with this same interpreter so the venv is reused. """ @@ -321,8 +333,7 @@ def cmd_studio(args): print("Error: provide either --start and --end, or --paragraph \"text\"", file=sys.stderr) sys.exit(1) - project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - script = os.path.join(project_root, "scripts", "clip_studio.py") + script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "clip_studio.py") if not os.path.exists(script): print(f"Error: clip_studio.py not found at {script}", file=sys.stderr) sys.exit(1) @@ -416,6 +427,8 @@ def cmd_process(args): save_corrections(merged) # CLI overrides + if getattr(args, "engine", None): + os.environ["PODCLI_ENGINE"] = args.engine if args.caption_style: config["caption_style"] = args.caption_style if args.crop: @@ -586,12 +599,18 @@ def _spinner(): def _transcribe_progress(pct, msg): _spin_msg[0] = f"{msg} ({pct}%)" if pct < 100 else msg - result = transcribe_file( - file_path=video_path, - model_size=config.get("whisper_model", "base"), - enable_diarization=not config.get("no_speakers", False), - progress_callback=_transcribe_progress, - ) + try: + result = transcribe_file( + file_path=video_path, + model_size=config.get("whisper_model", "base"), + enable_diarization=not config.get("no_speakers", False), + progress_callback=_transcribe_progress, + ) + except (RuntimeError, FileNotFoundError, subprocess.CalledProcessError) as e: + _spin_stop.set() + spin_thread.join(timeout=1) + print(f"\r{' ' * 70}\r ✗ {e}\n", flush=True) + sys.exit(1) _spin_stop.set() spin_thread.join(timeout=1) words = result["words"] @@ -1103,160 +1122,10 @@ def _print_clips(clips: list): def _find_moment_with_claude(description: str, segments: list, existing_clips: list) -> list: - """Use Claude to find a specific moment described by the user.""" - from services.claude_suggest import _find_ai_cli_candidates, _run_ai_command - from presets import MIN_CLIP_DURATION, MAX_CLIP_DURATION, TARGET_CLIP_DURATION_MIN, TARGET_CLIP_DURATION_MAX - - candidates = _find_ai_cli_candidates() - if not candidates: - print(" ⚠ No AI CLI available for moment search") - return [] - - # Build transcript text - lines = [] - for seg in segments: - speaker = seg.get("speaker", "") - speaker_label = f"[{speaker}] " if speaker else "" - start = seg.get("start", 0) - text = seg.get("text", "").strip() - if text: - lines.append(f"[{start:.1f}s] {speaker_label}{text}") - transcript_text = "\n".join(lines) - - # Build list of existing clip timestamps to avoid - existing_desc = "" - if existing_clips: - existing_desc = "\n\nALREADY SELECTED (do not re-suggest these):\n" - for c in existing_clips: - existing_desc += f"- {c['start_second']}s-{c['end_second']}s: {c['title']}\n" - - import tempfile, subprocess, json as _json - - prompt = f"""Find the moment the user is describing in this podcast transcript. Return ONLY valid JSON. - -USER WANTS: "{description}" -{existing_desc} -RULES: -- Find the EXACT moment matching the user's description -- Return 1-3 matching moments (best match first) -- All timestamps in SECONDS as numbers -- Duration target: {TARGET_CLIP_DURATION_MIN}-{TARGET_CLIP_DURATION_MAX} seconds, max {MAX_CLIP_DURATION} seconds -- Cut tight: start at the hook, end when the point lands -- Use segments to cut filler if needed - -Return this JSON: -{{ - "clips": [ - {{ - "title": "First sentence of the moment", - "start_second": 123.4, - "end_second": 158.4, - "segments": [{{"start": 123.4, "end": 158.4}}], - "duration": 35, - "content_type": "guest_story", - "scores": {{"standalone": 4, "hook": 5, "relevance": 4, "quotability": 3}}, - "total_score": 16, - "quote": "The key quote", - "why": "Why this matches what the user asked for" - }} - ] -}} - -Transcript: -{transcript_text}""" - - project_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") - with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, dir=project_dir) as f: - f.write(prompt) - prompt_file = f.name - - try: - for idx, (cli_path, engine) in enumerate(candidates): - if idx > 0: - print(f" ⚠ Retrying moment search with {'Claude' if engine == 'claude' else 'Codex'}") - try: - result = _run_ai_command( - cli_path=cli_path, - engine=engine, - prompt=prompt, - prompt_file=prompt_file, - project_dir=project_dir, - timeout=300, - ) - except Exception: - continue - - if result.returncode != 0 or not result.stdout.strip(): - continue - - response = result.stdout.strip() - if "```" in response: - import re - fence_match = re.search(r"```(?:json)?\s*\n?(.*?)\n?\s*```", response, re.DOTALL) - if fence_match: - response = fence_match.group(1).strip() - - try: - json_start = response.find("{") - if json_start >= 0: - decoder = _json.JSONDecoder() - data, _ = decoder.raw_decode(response, json_start) - else: - data = _json.loads(response) - except Exception: - continue + """Use Claude/Codex to find a specific moment described by the user.""" + from services.claude_suggest import find_moments_from_text - found = [] - for c in data.get("clips", []): - scores = c.get("scores", {}) - total = sum(scores.values()) if scores else c.get("total_score", 0) - raw_segments = c.get("segments", []) - keep_segments = [] - for seg in raw_segments: - s = round(float(seg.get("start", 0)), 1) - e = round(float(seg.get("end", 0)), 1) - if e > s: - keep_segments.append({"start": s, "end": e}) - - start_sec = round(float(c.get("start_second", 0)), 1) - end_sec = round(float(c.get("end_second", 0)), 1) - if not keep_segments and end_sec > start_sec: - keep_segments = [{"start": start_sec, "end": end_sec}] - - kept_duration = sum(seg["end"] - seg["start"] for seg in keep_segments) - if kept_duration < MIN_CLIP_DURATION or kept_duration > MAX_CLIP_DURATION: - continue - - found.append({ - "title": c.get("title", "Untitled")[:55], - "start_second": keep_segments[0]["start"] if keep_segments else start_sec, - "end_second": keep_segments[-1]["end"] if keep_segments else end_sec, - "segments": keep_segments, - "duration": round(kept_duration), - "score": total, - "content_type": c.get("content_type", "unknown"), - "reasoning": c.get("why", ""), - "preview_text": c.get("quote", "")[:120], - "suggested_caption_style": "hormozi", - "quote": c.get("quote", ""), - "why": c.get("why", ""), - "reasons": [c.get("content_type", "")], - "preview": c.get("quote", "")[:120], - }) - - if found: - return found - - return [] - - except Exception as e: - print(f" ⚠ Search error: {e}") - return [] - finally: - try: - os.unlink(prompt_file) - except Exception: - pass + return find_moments_from_text(description, segments, existing_clips, max_results=3) def _review_clips(clips: list, segments: list, energy_scores: list | None, config: dict) -> list: @@ -2224,6 +2093,11 @@ def cmd_thumbnails(args): video = getattr(args, "video", None) as_json = getattr(args, "json", False) + # Interactive callers build a bare namespace without --output's argparse + # default, so fall back to the same default the CLI documents. + if not getattr(args, "output", None): + args.output = "./thumbnails" + # An exact timestamp wins: extract that frame from the video and use it as the photo. timestamp = getattr(args, "timestamp", None) if photo is None and video and timestamp is not None: @@ -2611,6 +2485,15 @@ def _print_config_result(action: str, data: dict) -> None: if action == "migrate": print(f"\n {bold}Legacy migration{reset}") + home_mig = data.get("home_migration") or {} + if home_mig.get("imported") or home_mig.get("skipped_existing"): + print(f" {gray}brand brain (presets, knowledge, assets, history, config){reset}") + print(f" {gray}from{reset}: {home_mig.get('legacy_home')}") + print(f" {gray}to{reset}: {home_mig.get('target_home')}") + if home_mig.get("skipped_existing"): + print(f" {gray}skipped{reset}: global home already has data") + else: + print(f" {gray}imported{reset}: yes") print(f" {gray}cache{reset}") print(f" {gray}from{reset}: {data.get('legacy_dir')}") print(f" {gray}to{reset}: {data.get('target_dir')}") @@ -2629,6 +2512,10 @@ def _print_config_result(action: str, data: dict) -> None: print(f" {gray}moved{reset}: {presets.get('moved')}") if presets.get("skipped"): print(f" {gray}skipped{reset}: {presets['skipped']} (already in target)") + env_mig = data.get("env_migration") or {} + if env_mig.get("copied"): + print(f" {gray}.env{reset}") + print(f" {gray}to{reset}: {env_mig.get('target')}") if not data.get("dry_run"): print(f"\n {green}✓{reset} Migration complete") print() @@ -3024,8 +2911,20 @@ def cmd_info(args): print(f" Platform: {info['system']}") print(f" Encoder: {info['best']}") print(f" Available: {', '.join(info['available'])}") + import importlib.util + try: + diarization_available = importlib.util.find_spec("pyannote.audio") is not None + except (ImportError, ValueError): + diarization_available = False + if not diarization_available: + speakers_status = f"{yellow}✗ not available in this install (source install only)" + elif hf_token: + speakers_status = f"{green}✓ configured" + else: + speakers_status = f"{yellow}✗ set HF_TOKEN in .env" + print(f" AI CLI: {green}{('Claude' if ai_engine == 'claude' else 'Codex') + ' (' + ai_path + ')' if ai_path else f'{yellow}not found — install Claude Code or Codex'}{reset}") - print(f" Speakers: {green + '✓ configured' if hf_token else yellow + '✗ set HF_TOKEN in .env'}{reset}") + print(f" Speakers: {speakers_status}{reset}") print() @@ -3084,7 +2983,12 @@ def print_banner(): hf_token = line.strip().split("=", 1)[1].strip() break - speakers_ok = bool(hf_token) + import importlib.util + try: + _diarization_ok = importlib.util.find_spec("pyannote.audio") is not None + except (ImportError, ValueError): + _diarization_ok = False + speakers_ok = bool(hf_token) and _diarization_ok # Check AI CLI (Claude Code or Codex) from services.claude_suggest import _find_ai_cli @@ -3235,6 +3139,7 @@ def main(): proc.add_argument("-n", "--top", type=int, help="Number of top clips to export (default: 5)") proc.add_argument("-o", "--output", help="Output directory (default: ./clips)") proc.add_argument("-p", "--preset", help="Load a saved preset") + proc.add_argument("--engine", choices=["whisper-py", "whispercpp"], help="Transcription engine (default: whisper-py; whispercpp is the native, PyTorch-free path)") proc.add_argument("--fast", action="store_true", help="Draft mode: tiny Whisper, heuristic selection, center crop, low quality") proc.add_argument("--caption-style", choices=["branded", "hormozi", "karaoke", "subtle"]) proc.add_argument("--crop", choices=["center", "face", "speaker", "speaker-hardcut"]) @@ -3558,20 +3463,47 @@ def interactive_menu(): return elif choice == "webui": import subprocess as sp - repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") - spa = os.path.join(repo, "dist", "ui", "public", "index.html") + import shutil as _shutil + backend_dir = os.path.dirname(os.path.abspath(__file__)) port = os.environ.get("PORT", "3847") - ok = True - # npm is npm.cmd on Windows; subprocess can't run a batch file without a shell. - _npm_shell = sys.platform == "win32" - if not os.path.exists(spa): - print(f"\n {gray}Building the studio (first run)…{reset}\n") - ok = sp.run(["npm", "run", "build"], cwd=repo, shell=_npm_shell).returncode == 0 - if not ok: - print(f"\n {yellow}Build failed — run 'npm install' then try again.{reset}\n") - if ok: + node = os.environ.get("PODCLI_NODE") or _shutil.which("node") + studio = os.environ.get("PODCLI_STUDIO") or os.path.join(backend_dir, "..", "studio") + server = os.path.join(studio, "web-server.mjs") + repo = os.path.join(backend_dir, "..") + if node and os.path.exists(server): + # Bundled studio: hermetic Node serves it, rendering delegated to + # this same Python backend + ffmpeg via the env below. + env = { + **os.environ, + "PORT": str(port), + "PODCLI_BACKEND": backend_dir, + "PYTHON_PATH": sys.executable, + "PODCLI_HOME": paths["home"], + # data_dir is the cache's parent — output is now decoupled + # (clips render to the working dir), so don't derive it from output. + "PODCLI_DATA": os.path.dirname(paths["cache"]), + "PODCLI_OUTPUT": paths["output"], + "FFMPEG_PATH": os.environ.get("PODCLI_FFMPEG", "ffmpeg"), + "FFPROBE_PATH": os.environ.get("PODCLI_FFPROBE", "ffprobe"), + } print(f"\n {gray}Studio:{reset} {accent}http://localhost:{port}{reset} {dim}(Ctrl+C to stop){reset}\n") - sp.run(["npm", "run", "ui:prod"], cwd=repo, shell=_npm_shell) + sp.run([node, server], env=env) + elif os.path.exists(os.path.join(repo, "package.json")) and _shutil.which("npm"): + # Source checkout (dev): build + serve via npm. + _npm_shell = sys.platform == "win32" + spa = os.path.join(repo, "dist", "ui", "public", "index.html") + ok = True + if not os.path.exists(spa): + print(f"\n {gray}Building the studio (first run)…{reset}\n") + ok = sp.run(["npm", "run", "build"], cwd=repo, shell=_npm_shell).returncode == 0 + if not ok: + print(f"\n {yellow}Build failed — run 'npm install' then try again.{reset}\n") + if ok: + print(f"\n {gray}Studio:{reset} {accent}http://localhost:{port}{reset} {dim}(Ctrl+C to stop){reset}\n") + sp.run(["npm", "run", "ui:prod"], cwd=repo, shell=_npm_shell) + else: + print(f"\n {yellow}Studio isn't provisioned yet.{reset}") + print(f" {dim}Run{reset} {accent}podcli setup{reset} {dim}to fetch the bundled studio + Node.{reset}\n") elif choice == "assets": _interactive_assets() elif choice == "presets": @@ -4478,6 +4410,7 @@ def _interactive_thumbnails(): video=video or None, logo=None, variations=3, + output="./thumbnails", ) cmd_thumbnails(args_ns) diff --git a/scripts/clip_studio.py b/backend/clip_studio.py similarity index 96% rename from scripts/clip_studio.py rename to backend/clip_studio.py index cac3eb2..bbb9d87 100644 --- a/scripts/clip_studio.py +++ b/backend/clip_studio.py @@ -37,6 +37,9 @@ FFMPEG = os.environ.get("PODCLI_FFMPEG", "ffmpeg") NODE = os.environ.get("PODCLI_NODE", "node") +# Share the prewarmed, project-independent composition bundle with the fragment +# caption render (clip_generator) and the bookend render. +os.environ.setdefault("PODCLI_CACHE_DIR", os.path.join(ROOT, "remotion", ".bundle-cache")) # Brand config: remembered handle / platforms / colors / outro title so they # don't have to be retyped each run. CLI flags override these; these override @@ -69,7 +72,7 @@ def _save_brand(data: dict): def _probe_duration(path: str) -> float: out = subprocess.run( - ["ffprobe", "-v", "error", "-show_entries", "format=duration", + [os.environ.get("PODCLI_FFPROBE", "ffprobe"), "-v", "error", "-show_entries", "format=duration", "-of", "default=nw=1:nk=1", path], capture_output=True, text=True, ) @@ -236,7 +239,8 @@ def main(): video = os.path.abspath(video) if not os.path.exists(video): raise SystemExit(f"Video not found: {video}") - out_dir = os.path.join(ROOT, "data", "output") + data_dir = os.environ.get("PODCLI_DATA") or os.path.join(ROOT, "data") + out_dir = os.path.join(data_dir, "output") os.makedirs(out_dir, exist_ok=True) # Need a transcript if cutting by paragraph or if rendering captions. diff --git a/backend/config/paths.py b/backend/config/paths.py index 66dd38a..2a603e3 100644 --- a/backend/config/paths.py +++ b/backend/config/paths.py @@ -44,6 +44,7 @@ def _build_paths() -> dict[str, str]: home = _resolve_home() project_root = _project_root() data_dir = Path(os.environ.get("PODCLI_DATA", str(project_root / "data"))).expanduser().resolve() + output_dir = Path(os.environ.get("PODCLI_OUTPUT", str(data_dir / "output"))).expanduser().resolve() return { "home": str(home), "project_root": str(project_root), @@ -51,7 +52,7 @@ def _build_paths() -> dict[str, str]: "transcripts": str(data_dir / "cache" / "transcripts"), "packed": str(home / "packed"), "working": str(data_dir / "working"), - "output": str(data_dir / "output"), + "output": str(output_dir), "logs": str(data_dir / "logs"), "assets": str(home / "assets"), "assetsRegistry": str(home / "assets" / "registry.json"), diff --git a/backend/config_bundle.py b/backend/config_bundle.py index 612be04..1e1e000 100644 --- a/backend/config_bundle.py +++ b/backend/config_bundle.py @@ -4,6 +4,7 @@ import os import re import shutil +import tempfile import zipfile from datetime import datetime, timezone from pathlib import Path @@ -60,12 +61,34 @@ def _migration_marker_path() -> Path: return _data_dir() / MIGRATION_MARKER_NAME +# Migration reads the working directory (the old project-local ./podcli folder) +# and imports it into the global store. PODCLI_CWD is injected by the launcher. +def _legacy_project_dir() -> Path: + return Path(os.environ.get("PODCLI_CWD") or os.getcwd()).expanduser().resolve() + + +def _global_home() -> Path: + return Path(paths["home"]).resolve() + + +def _legacy_home_dir() -> Path: + return _legacy_project_dir() / ".podcli" + + def _legacy_cache_dir() -> Path: - return Path(paths["project_root"]) / ".podcli" / "cache" + # Old layouts stored the transcript cache under /.podcli/cache (original) + # or /data/cache (interim project-local). Prefer whichever holds content. + proj = _legacy_project_dir() + for candidate in (proj / ".podcli" / "cache", proj / "data" / "cache"): + if candidate.is_dir() and any(candidate.iterdir()): + return candidate + return proj / ".podcli" / "cache" def _legacy_presets_dir() -> Path: - return Path(paths["project_root"]) / "presets" + # Ancient layout kept presets at /presets, outside .podcli. Presets + # inside .podcli/presets are covered by the brand-brain import instead. + return _legacy_project_dir() / "presets" def _legacy_presets_has_content() -> bool: @@ -73,8 +96,48 @@ def _legacy_presets_has_content() -> bool: return legacy.is_dir() and any(legacy.glob("*.json")) +def _legacy_env_file() -> Path: + return _legacy_project_dir() / ".env" + + +def _global_env_file() -> Path: + # Match the file the launcher/loader actually reads (PODCLI_ENV_FILE), so the + # migrated secrets land where they'll be loaded and "pending" clears correctly. + return Path(os.environ.get("PODCLI_ENV_FILE") or (_global_home() / ".env")) + + +def _legacy_env_pending() -> bool: + src = _legacy_env_file() + return src.is_file() and src.resolve() != _global_env_file().resolve() and not _global_env_file().exists() + + +def _legacy_home_pending() -> bool: + legacy = _legacy_home_dir() + if legacy.resolve() == _global_home(): + return False + return _has_managed_content(legacy) and not _has_managed_content(_global_home()) + + +def _asset_alias_keys(raw: str, source: Path) -> list[str]: + """All string forms of an asset path. The rewrite matches the literal stored + value, which differs from its realpath through a symlink (macOS /var).""" + keys: list[str] = [] + for k in (raw, str(Path(raw).expanduser()) if raw else "", str(source), str(source.resolve())): + if k and k not in keys: + keys.append(k) + return keys + + def _legacy_migration_pending() -> bool: - return _legacy_cache_has_content() or _legacy_presets_has_content() + # Never treat the global managed dir itself as a legacy project to import. + if _legacy_project_dir().resolve() == _global_home().resolve(): + return False + return ( + _legacy_home_pending() + or _legacy_cache_has_content() + or _legacy_presets_has_content() + or _legacy_env_pending() + ) def _read_json(path: Path) -> Any: @@ -150,16 +213,18 @@ def export_config(bundle_path: str, source_home: str | None = None) -> dict[str, for index, item in enumerate(raw_assets): if not isinstance(item, dict): continue - source = Path(str(item.get("path", ""))).expanduser() + raw_path = str(item.get("path", "")) + source = Path(raw_path).expanduser() if not source.exists(): continue archive_name = _archive_name_for(index, str(item.get("name", "asset")), source) archive_path = f"{ASSET_ARCHIVE_DIR}/{archive_name}" zf.write(source, arcname=archive_path) registry_export.append({**item, "path": archive_path}) - path_map[str(source.resolve())] = archive_path + for key in _asset_alias_keys(raw_path, source): + path_map[key] = archive_path - extra_sources: list[Path] = [] + extra_sources: list[tuple[str, Path]] = [] for rel in ["ui-state.json"]: src = home / rel if src.exists(): @@ -168,7 +233,7 @@ def export_config(bundle_path: str, source_home: str | None = None) -> dict[str, for candidate in _collect_asset_paths(raw): candidate_path = Path(candidate).expanduser() if candidate_path.exists(): - extra_sources.append(candidate_path) + extra_sources.append((candidate, candidate_path)) presets_dir = home / "presets" if presets_dir.exists(): @@ -179,16 +244,19 @@ def export_config(bundle_path: str, source_home: str | None = None) -> dict[str, for candidate in _collect_asset_paths(raw): candidate_path = Path(candidate).expanduser() if candidate_path.exists(): - extra_sources.append(candidate_path) + extra_sources.append((candidate, candidate_path)) - for source in extra_sources: + for raw_candidate, source in extra_sources: resolved = str(source.resolve()) if resolved in path_map: + # Already archived; still register this literal so its JSON gets rewritten. + path_map.setdefault(raw_candidate, path_map[resolved]) continue archive_name = _archive_name_for(len(path_map), source.stem or "asset", source) archive_path = f"{ASSET_ARCHIVE_DIR}/{archive_name}" zf.write(source, arcname=archive_path) - path_map[resolved] = archive_path + for key in _asset_alias_keys(raw_candidate, source): + path_map[key] = archive_path zf.writestr("assets/registry.json", json.dumps({"assets": registry_export}, indent=2) + "\n") manifest["path_map"] = path_map @@ -231,7 +299,7 @@ def migrate_legacy_cache(*, dry_run: bool = False) -> dict[str, Any]: "dry_run": dry_run, } - if not legacy.is_dir(): + if not legacy.is_dir() or legacy.resolve() == target.resolve(): return result target.mkdir(parents=True, exist_ok=True) @@ -299,7 +367,7 @@ def migrate_legacy_presets(*, dry_run: bool = False) -> dict[str, Any]: "skipped": 0, "dry_run": dry_run, } - if not legacy.is_dir(): + if not legacy.is_dir() or legacy.resolve() == target.resolve(): return result target.mkdir(parents=True, exist_ok=True) for src in legacy.glob("*.json"): @@ -319,6 +387,53 @@ def migrate_legacy_presets(*, dry_run: bool = False) -> dict[str, Any]: return result +def migrate_legacy_home(*, dry_run: bool = False) -> dict[str, Any]: + """Import a project-local .podcli into the global home — only when the global + home is empty, so it never clobbers an existing profile.""" + legacy = _legacy_home_dir() + home = _global_home() + result: dict[str, Any] = { + "legacy_home": str(legacy), + "target_home": str(home), + "imported": False, + "dry_run": dry_run, + } + if legacy.resolve() == home or not _has_managed_content(legacy): + return result + if _has_managed_content(home): + result["skipped_existing"] = True + return result + if dry_run: + result["imported"] = True + return result + # Reuse export/import so asset files get archived and absolute path references + # inside presets/config are rewritten to the new global home. + with tempfile.TemporaryDirectory() as tmp: + bundle = os.path.join(tmp, "legacy-home.zip") + export_config(bundle, source_home=str(legacy)) + import_config(bundle, target_home=str(home), activate=False) + result["imported"] = True + return result + + +def migrate_legacy_env(*, dry_run: bool = False) -> dict[str, Any]: + src = _legacy_env_file() + dest = _global_env_file() + result: dict[str, Any] = { + "source": str(src), + "target": str(dest), + "copied": False, + "dry_run": dry_run, + } + if not src.is_file() or src.resolve() == dest.resolve() or dest.exists(): + return result + if not dry_run: + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(str(src), str(dest)) + result["copied"] = True + return result + + def auto_migrate_legacy_if_pending(*, quiet: bool = True) -> dict[str, Any] | None: if not _legacy_migration_pending(): return None @@ -328,28 +443,31 @@ def auto_migrate_legacy_if_pending(*, quiet: bool = True) -> dict[str, Any] | No def ensure_legacy_migrated(*, quiet: bool = True) -> dict[str, Any]: marker = _migration_marker_path() had_marker = marker.exists() + home_summary = migrate_legacy_home(dry_run=False) summary = migrate_legacy_cache(dry_run=False) presets_summary = migrate_legacy_presets(dry_run=False) + env_summary = migrate_legacy_env(dry_run=False) + summary["home_migration"] = home_summary summary["presets_migration"] = presets_summary + summary["env_migration"] = env_summary try: from services.transcript_packer import migrate_transcript_cache_layout layout = migrate_transcript_cache_layout() summary["transcript_layout"] = layout - changed = bool( - summary.get("moved_json") - or summary.get("moved_remotion_bundle") - or summary.get("removed_duplicate_remotion_bundle") - or layout.get("moved_to_transcripts") - or presets_summary.get("moved") - ) + layout_moved = layout.get("moved_to_transcripts") except Exception: - changed = bool( - summary.get("moved_json") - or summary.get("moved_remotion_bundle") - or summary.get("removed_duplicate_remotion_bundle") - or presets_summary.get("moved") - ) + summary["transcript_layout"] = {"moved_to_transcripts": 0, "skipped": 0} + layout_moved = 0 + changed = bool( + home_summary.get("imported") + or summary.get("moved_json") + or summary.get("moved_remotion_bundle") + or summary.get("removed_duplicate_remotion_bundle") + or layout_moved + or presets_summary.get("moved") + or env_summary.get("copied") + ) if not had_marker or changed: marker.parent.mkdir(parents=True, exist_ok=True) marker.write_text( @@ -539,8 +657,10 @@ def get_config_status() -> dict[str, Any]: "home": get_active_home(), "cache": paths["cache"], "profile_marker": paths["profileMarker"], + "legacy_home_pending": _legacy_home_pending(), "legacy_cache_pending": _legacy_cache_has_content(), "legacy_presets_pending": _legacy_presets_has_content(), + "legacy_env_pending": _legacy_env_pending(), "migration_marker": str(marker) if marker.exists() else None, "migration": migration_info, } @@ -560,8 +680,9 @@ def run_config_action( if act == "migrate": if dry_run: cache = migrate_legacy_cache(dry_run=True) - presets = migrate_legacy_presets(dry_run=True) - cache["presets_migration"] = presets + cache["home_migration"] = migrate_legacy_home(dry_run=True) + cache["presets_migration"] = migrate_legacy_presets(dry_run=True) + cache["env_migration"] = migrate_legacy_env(dry_run=True) return cache return ensure_legacy_migrated(quiet=True) if act == "export": diff --git a/backend/main.py b/backend/main.py index 079ff5c..5812ad5 100644 --- a/backend/main.py +++ b/backend/main.py @@ -21,7 +21,7 @@ # Load .env file (for HF_TOKEN, etc.) try: from dotenv import load_dotenv - load_dotenv(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".env")) + load_dotenv(os.environ.get("PODCLI_ENV_FILE") or os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".env")) except ImportError: pass import traceback @@ -377,6 +377,32 @@ def handle_suggest_clips(task_id: str, params: dict): emit_result(task_id, "success", data={"clips": clips}) +def handle_find_moment(task_id: str, params: dict): + """Locate user-pasted/described moments in the transcript via the AI CLI.""" + from services.claude_suggest import find_moments_from_text + + text = (params.get("text") or params.get("description") or "").strip() + segments = params.get("segments", []) + existing_clips = params.get("existing_clips", []) + max_results = params.get("max_results", 8) + + if not text: + emit_result(task_id, "error", error="text is required") + return + if not segments: + emit_result(task_id, "error", error="segments is required") + return + + clips = find_moments_from_text( + text, + segments, + existing_clips, + progress_callback=lambda pct, msg: emit_progress(task_id, "searching", pct, msg), + max_results=max_results, + ) + emit_result(task_id, "success", data={"clips": clips}) + + def handle_generate_content(task_id: str, params: dict): """Generate titles, descriptions, tags for a clip using PodStack knowledge base.""" from services.content_generator import generate_clip_content @@ -485,6 +511,7 @@ def handle_run_integration_tool(task_id: str, params: dict): "presets": handle_presets, "corrections": handle_corrections, "suggest_clips": handle_suggest_clips, + "find_moment": handle_find_moment, "generate_content": handle_generate_content, "manage_integrations": handle_manage_integrations, "run_integration_tool": handle_run_integration_tool, diff --git a/backend/presets.py b/backend/presets.py index ff74696..70f30f6 100644 --- a/backend/presets.py +++ b/backend/presets.py @@ -40,7 +40,7 @@ "energy_boost": True, "quality": "max", "no_speakers": False, - "allow_ass_fallback": False, + "allow_ass_fallback": True, "use_ass_captions": False, "generate_thumbnails": True, "generate_content": True, diff --git a/backend/requirements-runtime.txt b/backend/requirements-runtime.txt new file mode 100644 index 0000000..408f153 --- /dev/null +++ b/backend/requirements-runtime.txt @@ -0,0 +1,11 @@ +# Hermetic native-CLI runtime deps. Transcription is whisper.cpp (a native +# binary), so openai-whisper/torch are intentionally absent — that is the ~2GB +# the native install saves. --engine whisper-py remains a dev/source-only option +# via backend/requirements.txt. +opencv-python-headless>=4.8.1.78 +numpy>=1.24.0 +Pillow>=10.0.0 +questionary>=2.0.0 +python-dotenv>=1.2.2 +google-api-python-client>=2.0.1 +google-auth-oauthlib>=1.0.0 diff --git a/backend/requirements.txt b/backend/requirements.txt index ca445ef..e5fcbfc 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -9,7 +9,7 @@ openai-whisper>=20231117 # speechbrain>=1.0.0 # Face detection (optional, for smart cropping) -opencv-python-headless>=4.8.0 +opencv-python-headless>=4.8.1.78 numpy>=1.24.0 # Thumbnails @@ -19,8 +19,8 @@ Pillow>=10.0.0 questionary>=2.0.0 # Utilities -python-dotenv>=1.0.0 +python-dotenv>=1.2.2 # YouTube analytics (optional — only needed for live OAuth sync; CSV import works without these) -google-api-python-client>=2.0.0 +google-api-python-client>=2.0.1 google-auth-oauthlib>=1.0.0 diff --git a/backend/scripts/remotion_screenshot.cjs b/backend/scripts/remotion_screenshot.cjs index e6f2f9b..0942669 100644 --- a/backend/scripts/remotion_screenshot.cjs +++ b/backend/scripts/remotion_screenshot.cjs @@ -2,7 +2,7 @@ const path = require("node:path"); const rendererRoot = path.resolve(__dirname, "..", "..", "node_modules", "@remotion", "renderer", "dist"); -const {openBrowser} = require("@remotion/renderer"); +const {openBrowser, ensureBrowser} = require("@remotion/renderer"); const {screenshot} = require(path.join(rendererRoot, "puppeteer-screenshot.js")); const [htmlPath, outputPath, widthArg, heightArg, waitMsArg] = process.argv.slice(2); @@ -46,6 +46,7 @@ const waitForAssets = async (page) => { }; (async () => { + await ensureBrowser({logLevel: "error"}); const browser = await openBrowser("chrome", {logLevel: "error"}); const closeBrowser = () => { try { browser.close({silent: true}); } catch {} }; process.on("SIGINT", () => { closeBrowser(); process.exit(1); }); diff --git a/backend/services/claude_suggest.py b/backend/services/claude_suggest.py index 2f406c7..e180477 100644 --- a/backend/services/claude_suggest.py +++ b/backend/services/claude_suggest.py @@ -375,6 +375,163 @@ def _dedupe_clips_by_range(clips: list[dict]) -> list[dict]: return deduped +def find_moments_from_text( + description: str, + segments: list[dict], + existing_clips: Optional[list[dict]] = None, + progress_callback: Optional[Callable[[int, str], None]] = None, + max_results: int = 3, +) -> list[dict]: + """Locate the moment(s) the user described/pasted in the transcript via an AI + CLI. Returns clip dicts (same shape as suggest_with_claude). Status goes to + progress_callback; warnings to stderr — never stdout, which is the task + runner's JSON-RPC channel.""" + existing_clips = existing_clips or [] + candidates = _find_ai_cli_candidates() + if not candidates: + print("No AI CLI available for moment search", file=sys.stderr, flush=True) + return [] + + if progress_callback: + progress_callback(15, "Reading transcript...") + transcript_text = _build_transcript_text(segments) + + existing_desc = "" + if existing_clips: + existing_desc = "\n\nALREADY SELECTED (do not re-suggest these):\n" + for c in existing_clips: + existing_desc += f"- {c.get('start_second')}s-{c.get('end_second')}s: {c.get('title', '')}\n" + + upper = max(1, int(max_results)) + prompt = f"""Find the moment(s) the user is describing in this podcast transcript. Return ONLY valid JSON. + +USER WANTS: "{description}" +{existing_desc} +RULES: +- Find the EXACT moment(s) matching what the user pasted/described +- The user may list several moments — return one clip per distinct moment they mention +- Return 1-{upper} matching moments (best match first) +- All timestamps in SECONDS as numbers +- Duration target: {TARGET_CLIP_DURATION_MIN}-{TARGET_CLIP_DURATION_MAX} seconds, max {MAX_CLIP_DURATION} seconds +- Cut tight: start at the hook, end when the point lands +- Use segments to cut filler if needed + +Return this JSON: +{{ + "clips": [ + {{ + "title": "First sentence of the moment", + "start_second": 123.4, + "end_second": 158.4, + "segments": [{{"start": 123.4, "end": 158.4}}], + "duration": 35, + "content_type": "guest_story", + "scores": {{"standalone": 4, "hook": 5, "relevance": 4, "quotability": 3}}, + "total_score": 16, + "quote": "The key quote", + "why": "Why this matches what the user asked for" + }} + ] +}} + +Transcript: +{transcript_text}""" + + # Prompt goes to .podcli/tmp/ (gitignored), not the repo root, so a crash + # mid-run never litters the working tree with transcript dumps. + project_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..") + from utils.prompt_files import write_prompt_file + prompt_file = write_prompt_file(prompt) + + try: + for idx, (cli_path, engine) in enumerate(candidates): + if progress_callback: + label = "Claude" if engine == "claude" else "Codex" + progress_callback(40, f"Searching transcript with {label}...") + try: + result = _run_ai_command( + cli_path=cli_path, + engine=engine, + prompt=prompt, + prompt_file=prompt_file, + project_dir=project_dir, + timeout=300, + ) + except Exception: + continue + + if result.returncode != 0 or not result.stdout.strip(): + continue + + response = result.stdout.strip() + if "```" in response: + import re + + fence_match = re.search(r"```(?:json)?\s*\n?(.*?)\n?\s*```", response, re.DOTALL) + if fence_match: + response = fence_match.group(1).strip() + + try: + json_start = response.find("{") + if json_start >= 0: + data, _ = json.JSONDecoder().raw_decode(response, json_start) + else: + data = json.loads(response) + except Exception: + continue + + found = [] + for c in data.get("clips", []): + scores = c.get("scores", {}) + total = sum(scores.values()) if scores else c.get("total_score", 0) + keep_segments = [] + for seg in c.get("segments", []): + s = round(float(seg.get("start", 0)), 1) + e = round(float(seg.get("end", 0)), 1) + if e > s: + keep_segments.append({"start": s, "end": e}) + + start_sec = round(float(c.get("start_second", 0)), 1) + end_sec = round(float(c.get("end_second", 0)), 1) + if not keep_segments and end_sec > start_sec: + keep_segments = [{"start": start_sec, "end": end_sec}] + + kept_duration = sum(seg["end"] - seg["start"] for seg in keep_segments) + if kept_duration < MIN_CLIP_DURATION or kept_duration > MAX_CLIP_DURATION: + continue + + found.append({ + "title": c.get("title", "Untitled")[:55], + "start_second": keep_segments[0]["start"] if keep_segments else start_sec, + "end_second": keep_segments[-1]["end"] if keep_segments else end_sec, + "segments": keep_segments, + "duration": round(kept_duration), + "score": total, + "content_type": c.get("content_type", "unknown"), + "reasoning": c.get("why", ""), + "preview_text": c.get("quote", "")[:120], + "suggested_caption_style": "hormozi", + "quote": c.get("quote", ""), + "why": c.get("why", ""), + "reasons": [c.get("content_type", "")], + "preview": c.get("quote", "")[:120], + }) + + if found: + return _dedupe_clips_by_range(found) + + return [] + + except Exception as e: + print(f"Moment search error: {e}", file=sys.stderr, flush=True) + return [] + finally: + try: + os.unlink(prompt_file) + except Exception: + pass + + def suggest_with_claude( segments: list[dict], top_n: int = 5, diff --git a/backend/services/clip_generator.py b/backend/services/clip_generator.py index 9608bf4..90e2adc 100644 --- a/backend/services/clip_generator.py +++ b/backend/services/clip_generator.py @@ -424,20 +424,25 @@ def _render_with_remotion( return False, None # Check node is available - node_path = shutil.which("node") + node_path = os.environ.get("PODCLI_NODE") or shutil.which("node") if not node_path: _remotion_available = False return False, None - remotion_env = {**os.environ, "PODCLI_CACHE_DIR": paths["cache"]} + # Cache the compiled bundle next to the render script. The compositions are + # project-independent, so a single global bundle (in the managed runtime dir + # for native installs) is reused across every project instead of rebuilt per + # data/cache. + bundle_cache_root = os.path.join(os.path.dirname(render_script), ".bundle-cache") + remotion_env = {**os.environ, "PODCLI_CACHE_DIR": bundle_cache_root} - cache_dir = os.path.join(paths["cache"], "remotion-bundle") + cache_dir = os.path.join(bundle_cache_root, "remotion-bundle") bundle_index = os.path.join(cache_dir, "index.html") if not os.path.exists(bundle_index): try: r = subprocess.run( [node_path, render_script, "--prebundle"], - timeout=30, + timeout=180, cwd=project_root, env=remotion_env, capture_output=True, @@ -545,16 +550,16 @@ def _render_with_remotion( if stdout: lines = [l.strip() for l in stdout.strip().split("\n") if l.strip()] if lines: - print(f" Remotion: {lines[-1][:120]}", flush=True) + print(f" Remotion: {lines[-1][:120]}", file=sys.stderr, flush=True) - print(" Remotion: falling back to ASS for this clip", flush=True) + print(" Remotion: falling back to ASS for this clip", file=sys.stderr, flush=True) return False, None except subprocess.TimeoutExpired: - print(" Remotion: timed out, using ASS for this clip", flush=True) + print(" Remotion: timed out, using ASS for this clip", file=sys.stderr, flush=True) return False, None except Exception: - print(" Remotion: render error, using ASS for this clip", flush=True) + print(" Remotion: render error, using ASS for this clip", file=sys.stderr, flush=True) return False, None finally: try: @@ -695,6 +700,7 @@ def generate_clip( print( f" Boundary revert: post-trim duration {duration:.1f}s < " f"75% of asked {llm_total:.1f}s - using original range", + file=sys.stderr, flush=True, ) start_second = llm_start_second diff --git a/backend/services/encoder.py b/backend/services/encoder.py index 346fcb5..1479ef1 100644 --- a/backend/services/encoder.py +++ b/backend/services/encoder.py @@ -159,7 +159,7 @@ def _encoder_cache_path() -> str: def _ffmpeg_fingerprint() -> str: """Cheap fingerprint (path + mtime) to invalidate cache when ffmpeg changes.""" import shutil - ffbin = shutil.which("ffmpeg") or "ffmpeg" + ffbin = os.environ.get("PODCLI_FFMPEG") or shutil.which("ffmpeg") or "ffmpeg" try: st = os.stat(ffbin) return f"{ffbin}:{int(st.st_mtime)}:{st.st_size}" diff --git a/backend/services/thumbnail_html.py b/backend/services/thumbnail_html.py index 3aaaa52..68cf3ea 100644 --- a/backend/services/thumbnail_html.py +++ b/backend/services/thumbnail_html.py @@ -173,7 +173,7 @@ def _build_remotion_screenshot_command( wait_ms: int, ) -> Optional[list[str]]: """Build a Remotion-backed screenshot command if the repo can run it.""" - node_bin = shutil.which("node") + node_bin = os.environ.get("PODCLI_NODE") or shutil.which("node") repo_root = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..") renderer_pkg = os.path.join(repo_root, "node_modules", "@remotion", "renderer", "package.json") if not node_bin or not os.path.exists(script_path) or not os.path.exists(renderer_pkg): @@ -657,6 +657,8 @@ def generate_thumbnail( if not commands: raise RuntimeError("No browser screenshot command available") + screenshot_cwd = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..")) + for cmd in commands: cmd_label = " ".join(cmd[:3]) if len(cmd) > 3 else " ".join(cmd) try: @@ -665,6 +667,7 @@ def generate_thumbnail( capture_output=True, text=True, timeout=timeout_s, + cwd=screenshot_cwd, # .cmd/npx shims need cmd.exe on Windows; shell=True with a list breaks on POSIX. shell=sys.platform == "win32", ) diff --git a/backend/services/transcript_packer.py b/backend/services/transcript_packer.py index 2d7beec..f311496 100644 --- a/backend/services/transcript_packer.py +++ b/backend/services/transcript_packer.py @@ -62,8 +62,18 @@ def legacy_md5_cache_path(video_path: str) -> str: return os.path.join(_legacy_cache_dir(), hashlib.md5(raw.encode()).hexdigest() + ".json") +def _engine_cache_suffix() -> str: + """Namespace the cache by engine so a whisper.cpp run doesn't reuse a + whisper-py transcript (their timings/word splits differ). whisper-py keeps + the bare filename, which the TS transcript cache also writes.""" + engine = os.environ.get("PODCLI_ENGINE", "whisper-py").strip().lower() + if engine in ("whispercpp", "whisper-cpp", "whisper.cpp", "cpp"): + return "-whispercpp" + return "" + + def transcript_json_path(cache_hash: str) -> str: - return os.path.join(_transcripts_cache_dir(), f"{cache_hash}.json") + return os.path.join(_transcripts_cache_dir(), f"{cache_hash}{_engine_cache_suffix()}.json") def load_cached_transcript_for_video(video_path: str) -> dict[str, Any] | None: @@ -72,12 +82,16 @@ def load_cached_transcript_for_video(video_path: str) -> dict[str, Any] | None: if os.path.exists(canonical): with open(canonical, encoding="utf-8") as f: return json.load(f) - legacy = legacy_md5_cache_path(video_path) - if os.path.exists(legacy): - with open(legacy, encoding="utf-8") as f: - data = json.load(f) - save_cached_transcript_for_video(video_path, data) - return data + # The legacy md5 cache predates the engine split and only ever held + # whisper-py output, so don't let a whisper.cpp run adopt it under its own + # namespace. + if _engine_cache_suffix() == "": + legacy = legacy_md5_cache_path(video_path) + if os.path.exists(legacy): + with open(legacy, encoding="utf-8") as f: + data = json.load(f) + save_cached_transcript_for_video(video_path, data) + return data return None diff --git a/backend/services/transcription.py b/backend/services/transcription.py index 69f7493..9ca581e 100644 --- a/backend/services/transcription.py +++ b/backend/services/transcription.py @@ -8,12 +8,77 @@ """ import os +import shutil import subprocess import sys import tempfile from typing import Optional, Callable +def _managed_home() -> str: + h = os.environ.get("PODCLI_HOME") + if h: + return h + home = os.path.expanduser("~") + if sys.platform == "darwin": + return os.path.join(home, "Library", "Application Support", "podcli") + if sys.platform == "win32": + return os.environ.get("LOCALAPPDATA") or os.path.join(home, "AppData", "Local", "podcli") + return os.environ.get("XDG_DATA_HOME") or os.path.join(home, ".local", "share", "podcli") + + +def _whispercpp_cli() -> Optional[str]: + """Resolve the whisper.cpp binary: explicit env, PATH, then the hermetic + runtime location the native installer provisions.""" + cli = os.environ.get("PODCLI_WHISPER_CLI") + if cli and (os.path.exists(cli) or shutil.which(cli)): + return cli + found = shutil.which("whisper-cli") or shutil.which("whisper-cpp") + if found: + return found + exe = "whisper-cli.exe" if sys.platform == "win32" else "whisper-cli" + hermetic = os.path.join(_managed_home(), "runtime", "whisper", exe) + return hermetic if os.path.exists(hermetic) else None + + +def _whispercpp_model(model_size: str) -> str: + return os.environ.get("PODCLI_WHISPERCPP_MODEL") or os.path.join( + _managed_home(), "models", f"ggml-{model_size}.bin" + ) + + +def _whispercpp_ready(model_size: str) -> bool: + return _whispercpp_cli() is not None and os.path.exists(_whispercpp_model(model_size)) + + +def _transcribe_with_whispercpp(file_path, model_size, language, progress_callback): + from services import transcription_whispercpp as wcpp + + if progress_callback: + progress_callback(10, "Transcribing with whisper.cpp...") + + cli = _whispercpp_cli() or "whisper-cli" + model = _whispercpp_model(model_size) + if not os.path.exists(model): + raise FileNotFoundError( + f"whisper.cpp model not found: {model}. " + "Set PODCLI_WHISPERCPP_MODEL or run provisioning." + ) + vad = os.environ.get("PODCLI_WHISPERCPP_VAD", "").strip().lower() in ("1", "true", "yes", "on") + result = wcpp.transcribe_file( + file_path, + model_path=model, + whisper_cli=cli, + ffmpeg=os.environ.get("PODCLI_FFMPEG", "ffmpeg"), + language=language, + vad=vad, + vad_model=os.environ.get("PODCLI_WHISPERCPP_VAD_MODEL") or None, + ) + if progress_callback: + progress_callback(100, "Transcription complete") + return result + + def transcribe_file( file_path: str, model_size: str = "base", @@ -39,15 +104,31 @@ def transcribe_file( if not os.path.exists(file_path): raise FileNotFoundError(f"File not found: {file_path}") + requested = os.environ.get("PODCLI_ENGINE", "").strip().lower() + engine = requested or "whisper-py" + if engine in ("whispercpp", "whisper-cpp", "whisper.cpp", "cpp"): + return _transcribe_with_whispercpp(file_path, model_size, language, progress_callback) + # ================================================================ # Step 1: Whisper transcription # ================================================================ if progress_callback: progress_callback(5, "Loading Whisper model...") - import whisper + # Native installs ship whisper.cpp, not openai-whisper. Fall back to it + # automatically — whether whisper is missing OR a broken install fails to + # load/run — unless the user explicitly asked for the whisper-py engine. + try: + import whisper - model = whisper.load_model(model_size) + model = whisper.load_model(model_size) + except Exception as e: + if not requested and _whispercpp_ready(model_size): + return _transcribe_with_whispercpp(file_path, model_size, language, progress_callback) + raise RuntimeError( + "The whisper-py engine needs the full source install (openai-whisper + torch). " + "This native install ships whisper.cpp — rerun with --engine whispercpp." + ) from e if progress_callback: progress_callback(10, f"Transcribing with Whisper ({model_size})...") diff --git a/backend/services/transcription_whispercpp.py b/backend/services/transcription_whispercpp.py new file mode 100644 index 0000000..d62f8be --- /dev/null +++ b/backend/services/transcription_whispercpp.py @@ -0,0 +1,216 @@ +"""whisper.cpp adapter behind the transcribe_file contract. + +Tokens carry a leading-space convention (" and", continuations have none); we +merge on that boundary and strip word text. The strip must match the rest of the +pipeline exactly — apply_corrections() and caption spacing key on stripped text. +""" + +import json +import os +import re +import shutil +import subprocess +import sys +import tempfile +from typing import Optional + +_SPECIAL = re.compile(r"^\[.*\]$") # [_BEG_], [_TT_...], etc. + + +def _extract_wav(media_path: str, wav_path: str, ffmpeg: str = "ffmpeg") -> None: + subprocess.run( + [ffmpeg, "-y", "-loglevel", "error", "-i", media_path, + "-ar", "16000", "-ac", "1", wav_path], + check=True, + timeout=1800, + ) + + +def _tokens_to_words(tokens: list[dict]) -> list[dict]: + """Merge whisper.cpp subword tokens into words with start/end seconds.""" + words: list[dict] = [] + cur_text = "" + cur_start: Optional[float] = None + cur_end: Optional[float] = None + + def flush(): + nonlocal cur_text, cur_start, cur_end + text = cur_text.strip().lstrip("▁").strip() + if text and cur_start is not None: + words.append({ + "word": text, + "start": round(cur_start / 1000.0, 3), + "end": round(cur_end / 1000.0, 3), + "speaker": None, + }) + cur_text, cur_start, cur_end = "", None, None + + for tok in tokens: + raw = tok.get("text", "") + if _SPECIAL.match(raw.strip()): + continue + off = tok.get("offsets") or {} + t0, t1 = off.get("from"), off.get("to") + starts_word = raw.startswith(" ") or raw.startswith("▁") + if starts_word and cur_text: + flush() + if cur_start is None and t0 is not None: + cur_start = t0 + cur_text += raw + if t1 is not None: + cur_end = t1 + flush() + return words + + +def _voiced_intervals(wav_path: str, bridge: float = 0.3, thresh_ratio: float = 0.07): + import wave + + import numpy as np + + try: + w = wave.open(wav_path, "rb") + sr, width, n = w.getframerate(), w.getsampwidth(), w.getnframes() + raw = w.readframes(n) + w.close() + except Exception: + return [] + if width != 2 or sr <= 0 or not raw: + return [] + samples = np.frombuffer(raw, dtype=np.int16).astype(np.float32) + hop = max(1, int(sr * 0.010)) + frame = max(hop, int(sr * 0.025)) + if len(samples) < frame: + return [] + nf = 1 + (len(samples) - frame) // hop + idx = np.arange(nf)[:, None] * hop + np.arange(frame)[None, :] + rms = np.sqrt((samples[idx] ** 2).mean(axis=1)) + peak = float(rms.max()) + if peak <= 0: + return [] + voiced = rms > thresh_ratio * peak + intervals, start = [], None + for i, v in enumerate(voiced): + if v and start is None: + start = i * hop / sr + elif not v and start is not None: + intervals.append([start, i * hop / sr]) + start = None + if start is not None: + intervals.append([start, nf * hop / sr]) + merged = [] + for iv in intervals: + if merged and iv[0] - merged[-1][1] <= bridge: + merged[-1][1] = iv[1] + else: + merged.append(iv) + return merged + + +def _snap_words_to_voiced(words: list[dict], wav_path: str) -> list[dict]: + """Pull word timings back into the voiced span. whisper.cpp sometimes + stretches trailing words across trailing silence; clamping to [first voiced, + last voiced] (with a small pad) and re-flowing keeps captions in sync without + disturbing words that already overlap speech.""" + if not words: + return words + intervals = _voiced_intervals(wav_path) + if not intervals: + return words + pad = 0.15 + lo = max(0.0, intervals[0][0] - pad) + hi = intervals[-1][1] + pad + out, prev_end = [], lo + for w in words: + s = min(max(float(w.get("start", 0.0)), lo), hi) + e = min(max(float(w.get("end", 0.0)), lo), hi) + if s < prev_end: + s = prev_end + if e <= s: + # A word clamped to hi would otherwise collapse to zero length; a + # tiny overhang past the voiced pad is harmless, a zero-length + # caption event is not. + e = s + 0.05 + prev_end = e + out.append({**w, "start": round(s, 3), "end": round(e, 3)}) + return out + + +def transcribe_file( + file_path: str, + model_path: str, + whisper_cli: str = "whisper-cli", + ffmpeg: str = "ffmpeg", + language: Optional[str] = "en", + dtw_model: str = "base", + threads: int = 4, + vad: bool = False, + vad_model: Optional[str] = None, + **_ignored, +) -> dict: + if not os.path.exists(file_path): + raise FileNotFoundError(file_path) + if not os.path.exists(model_path): + raise FileNotFoundError(f"ggml model not found: {model_path}") + + tmpdir = tempfile.mkdtemp(prefix="wcpp_") + wav = os.path.join(tmpdir, "audio.wav") + out_base = os.path.join(tmpdir, "out") + try: + _extract_wav(file_path, wav, ffmpeg) + + cmd = [whisper_cli, "-m", model_path, "-f", wav, "-ojf", + "-of", out_base, "-t", str(threads)] + if dtw_model: + cmd += ["-dtw", dtw_model] + if vad and vad_model and os.path.exists(vad_model): + # VAD removes the trailing-words-into-silence failure mode but adds a + # systematic early bias (silence-removal remapping). Off by default; + # the energy-snap below addresses the same defect without the bias. + cmd += ["--vad", "--vad-model", vad_model] + if language: + cmd += ["-l", language] + subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True, timeout=7200) + + with open(out_base + ".json", encoding="utf-8") as f: + data = json.load(f) + + transcription = data.get("transcription", []) + segments, words = [], [] + for i, seg in enumerate(transcription): + off = seg.get("offsets") or {} + segments.append({ + "id": i, + "start": round((off.get("from") or 0) / 1000.0, 3), + "end": round((off.get("to") or 0) / 1000.0, 3), + "text": (seg.get("text") or "").strip(), + "speaker": None, + }) + words.extend(_tokens_to_words(seg.get("tokens", []))) + + if os.environ.get("PODCLI_WHISPERCPP_NO_SNAP", "").strip().lower() not in ("1", "true", "yes", "on"): + words = _snap_words_to_voiced(words, wav) + + return { + "transcript": " ".join(s["text"] for s in segments).strip(), + "segments": segments, + "words": words, + "duration": segments[-1]["end"] if segments else 0.0, + "language": (data.get("params") or {}).get("language") or language or "en", + } + finally: + shutil.rmtree(tmpdir, ignore_errors=True) + + +if __name__ == "__main__": + # Quick CLI: transcribe_whispercpp [out.json] + media, model = sys.argv[1], sys.argv[2] + out = sys.argv[3] if len(sys.argv) > 3 else None + result = transcribe_file(media, model) + payload = json.dumps(result, indent=2, ensure_ascii=False) + if out: + with open(out, "w", encoding="utf-8") as f: + f.write(payload) + print(f"{len(result['words'])} words, {len(result['segments'])} segments -> {out}") + else: + print(payload) diff --git a/backend/utils/proc.py b/backend/utils/proc.py index d118dd9..e48b530 100644 --- a/backend/utils/proc.py +++ b/backend/utils/proc.py @@ -8,12 +8,24 @@ from __future__ import annotations import logging +import os import subprocess import time from typing import Sequence log = logging.getLogger("podcli.proc") +_TOOL_ENV = {"ffmpeg": "PODCLI_FFMPEG", "ffprobe": "PODCLI_FFPROBE"} + + +def _resolve_tool(cmd: Sequence[str]) -> list[str]: + if not cmd: + return list(cmd) + override = os.environ.get(_TOOL_ENV.get(cmd[0], "")) + if override and os.path.exists(override): + return [override, *cmd[1:]] + return list(cmd) + class ProcError(RuntimeError): """Raised when a wrapped subprocess fails or times out.""" @@ -45,6 +57,7 @@ def run( """ if not cmd: raise ValueError("proc.run: cmd must be non-empty") + cmd = _resolve_tool(cmd) tool = cmd[0] t0 = time.monotonic() log.debug("proc.start tool=%s argc=%d timeout=%.0fs", tool, len(cmd), timeout) diff --git a/cli/go.mod b/cli/go.mod new file mode 100644 index 0000000..848df68 --- /dev/null +++ b/cli/go.mod @@ -0,0 +1,3 @@ +module podcli + +go 1.23 diff --git a/cli/internal/backend/embed.go b/cli/internal/backend/embed.go new file mode 100644 index 0000000..be0a363 --- /dev/null +++ b/cli/internal/backend/embed.go @@ -0,0 +1,48 @@ +// Package backend ships the Python processing backend inside the launcher +// binary so an installed podcli runs without the source repo. The files/ tree is +// synced from the repo backend/ at build time (`go generate ./...` or CI) and is +// gitignored — never edit files/ by hand. +package backend + +import ( + "embed" + "io/fs" + "os" + "path/filepath" +) + +//go:generate sh sync.sh +//go:embed all:files +var files embed.FS + +// Extract replaces dest with the embedded backend tree. dest is removed first so +// a stale dev symlink or an older extracted copy never shadows the shipped one. +func Extract(dest string) error { + if err := os.RemoveAll(dest); err != nil { + return err + } + return fs.WalkDir(files, "files", func(p string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + rel, err := filepath.Rel("files", p) + if err != nil { + return err + } + if rel == "." { + return nil + } + target := filepath.Join(dest, rel) + if d.IsDir() { + return os.MkdirAll(target, 0o755) + } + data, err := files.ReadFile(p) + if err != nil { + return err + } + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + return os.WriteFile(target, data, 0o644) + }) +} diff --git a/cli/internal/backend/sync.sh b/cli/internal/backend/sync.sh new file mode 100644 index 0000000..51ca2ac --- /dev/null +++ b/cli/internal/backend/sync.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# Sync the repo Python backend into files/ for go:embed. Run via `go generate +# ./...` before building. files/ is gitignored; this is its only writer. +set -e +here="$(cd "$(dirname "$0")" && pwd)" +src="$here/../../../backend" +dest="$here/files" +rm -rf "$dest" +mkdir -p "$dest" +rsync -a \ + --exclude='__pycache__' \ + --exclude='*.pyc' \ + --exclude='venv' \ + --exclude='.venv' \ + --exclude='requirements.txt' \ + --exclude='models/res10_300x300_ssd_iter_140000.caffemodel' \ + --exclude='models/deploy.prototxt' \ + "$src"/ "$dest"/ +echo "synced backend -> $dest" diff --git a/cli/internal/config/config.go b/cli/internal/config/config.go new file mode 100644 index 0000000..ef972fe --- /dev/null +++ b/cli/internal/config/config.go @@ -0,0 +1,83 @@ +// Package config persists launcher settings (currently the auto-update +// off-switch) in the managed dir's config.json. +package config + +import ( + "encoding/json" + "fmt" + "os" + "strings" + + "podcli/internal/paths" +) + +type Config struct { + Update struct { + Auto *bool `json:"auto,omitempty"` + } `json:"update"` +} + +func Load() Config { + var c Config + if b, err := os.ReadFile(paths.ConfigPath()); err == nil { + json.Unmarshal(b, &c) + } + return c +} + +func (c Config) Save() error { + if err := os.MkdirAll(paths.Home(), 0o755); err != nil { + return err + } + b, _ := json.MarshalIndent(c, "", " ") + return os.WriteFile(paths.ConfigPath(), b, 0o644) +} + +func truthy(s string) bool { + switch strings.ToLower(strings.TrimSpace(s)) { + case "1", "true", "yes", "on": + return true + } + return false +} + +// AutoUpdate is true unless disabled via config update.auto or PODCLI_NO_UPDATE. +func AutoUpdate() bool { + if truthy(os.Getenv("PODCLI_NO_UPDATE")) { + return false + } + if a := Load().Update.Auto; a != nil { + return *a + } + return true +} + +func Get(key string) (string, error) { + switch key { + case "update.auto": + if a := Load().Update.Auto; a != nil && !*a { + return "off", nil + } + return "on", nil + } + return "", fmt.Errorf("unknown config key %q (known: update.auto)", key) +} + +func Set(key, val string) error { + switch key { + case "update.auto": + on := !offValue(val) + c := Load() + c.Update.Auto = &on + return c.Save() + } + return fmt.Errorf("unknown config key %q (known: update.auto)", key) +} + +func offValue(s string) bool { + switch strings.ToLower(strings.TrimSpace(s)) { + case "off", "false", "no", "0", "disable", "disabled": + return true + } + return false +} diff --git a/cli/internal/engine/engine.go b/cli/internal/engine/engine.go new file mode 100644 index 0000000..82cae60 --- /dev/null +++ b/cli/internal/engine/engine.go @@ -0,0 +1,263 @@ +// Package engine routes podcli subcommands to the Python backend. +package engine + +import ( + "bytes" + "fmt" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + + "podcli/internal/paths" +) + +// stderrFilter drops the macOS/OpenCV/Whisper startup noise the old bash +// launcher stripped with `grep -v`, so native runs aren't louder than before. +// It buffers partial lines and forwards everything else to real stderr. +type stderrFilter struct{ buf []byte } + +func isStderrNoise(line string) bool { + return strings.HasPrefix(line, "objc[") || + strings.Contains(line, "FP16 is not supported") || + strings.HasPrefix(strings.TrimSpace(line), "warnings.warn(") +} + +func (w *stderrFilter) Write(p []byte) (int, error) { + w.buf = append(w.buf, p...) + for { + i := bytes.IndexByte(w.buf, '\n') + if i < 0 { + break + } + line := w.buf[:i] + w.buf = w.buf[i+1:] + if !isStderrNoise(string(line)) { + os.Stderr.Write(append(line, '\n')) + } + } + return len(p), nil +} + +func (w *stderrFilter) flush() { + if len(w.buf) > 0 && !isStderrNoise(string(w.buf)) { + os.Stderr.Write(w.buf) + } + w.buf = nil +} + +// IsHermeticPython reports whether the resolved interpreter is the provisioned +// one (which lacks openai-whisper, so transcription must default to whisper.cpp). +func IsHermeticPython() bool { + return strings.HasPrefix(Python(), paths.RuntimeDir()) +} + +func exists(p string) bool { + _, err := os.Stat(p) + return err == nil +} + +func BackendRoot() (string, bool) { + if b := os.Getenv("PODCLI_BACKEND"); b != "" && exists(filepath.Join(b, "cli.py")) { + return b, true + } + if dir, err := os.Getwd(); err == nil { + for { + cand := filepath.Join(dir, "backend") + if exists(filepath.Join(cand, "cli.py")) { + return cand, true + } + parent := filepath.Dir(dir) + if parent == dir { + break + } + dir = parent + } + } + cand := filepath.Join(paths.RuntimeDir(), "backend") + if exists(filepath.Join(cand, "cli.py")) { + return cand, true + } + return "", false +} + +func Python() string { + if p := os.Getenv("PODCLI_PYTHON"); p != "" { + return p + } + hermetic := []string{ + filepath.Join(paths.RuntimeDir(), "python", "bin", "python3"), + filepath.Join(paths.RuntimeDir(), "python", "python.exe"), + } + for _, p := range hermetic { + if exists(p) { + return p + } + } + if root, ok := BackendRoot(); ok { + venv := filepath.Join(filepath.Dir(root), "venv", "bin", "python3") + if runtime.GOOS == "windows" { + venv = filepath.Join(filepath.Dir(root), "venv", "Scripts", "python.exe") + } + if exists(venv) { + return venv + } + } + return "python3" +} + +func runtimeBin(sub, name string) string { + for _, p := range []string{ + filepath.Join(paths.RuntimeDir(), sub, name), + filepath.Join(paths.RuntimeDir(), sub, name+".exe"), + } { + if exists(p) { + return p + } + } + return "" +} + +func FFmpeg() string { return runtimeBin("ffmpeg", "ffmpeg") } +func FFprobe() string { return runtimeBin("ffmpeg", "ffprobe") } +func WhisperCLI() string { return runtimeBin("whisper", "whisper-cli") } + +func Node() string { + for _, p := range []string{ + filepath.Join(paths.RuntimeDir(), "node", "bin", "node"), + filepath.Join(paths.RuntimeDir(), "node", "node.exe"), + } { + if exists(p) { + return p + } + } + return "" +} + +func StudioServer() string { + p := filepath.Join(paths.RuntimeDir(), "studio", "web-server.mjs") + if exists(p) { + return p + } + return "" +} + +func MCPServer() string { + p := filepath.Join(paths.RuntimeDir(), "studio", "mcp-server.mjs") + if exists(p) { + return p + } + return "" +} + +// nodeEnv builds the env a bundled Node server (studio/MCP) needs: the TS +// paths.ts reads these names (note PYTHON_PATH/FFMPEG_PATH differ from the +// PODCLI_* names the Python side uses). Project data stays cwd-local. +func nodeEnv() []string { + env := os.Environ() + if root, ok := BackendRoot(); ok { + env = append(env, "PODCLI_BACKEND="+root) + } + env = append(env, "PYTHON_PATH="+Python()) + if ff := FFmpeg(); ff != "" { + env = append(env, "FFMPEG_PATH="+ff) + } + if fp := FFprobe(); fp != "" { + env = append(env, "FFPROBE_PATH="+fp) + } + env = append(env, DataEnv()...) + return env +} + +// RunMCP execs the bundled MCP stdio server under hermetic Node. stdout carries +// JSON-RPC, so nothing here may write to it. +func RunMCP() (int, error) { + node, server := Node(), MCPServer() + if node == "" || server == "" { + return 1, fmt.Errorf("MCP server not provisioned — run `podcli setup`") + } + cmd := exec.Command(node, server) + cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr + cmd.Env = nodeEnv() + if err := cmd.Run(); err != nil { + if ee, ok := err.(*exec.ExitError); ok { + return ee.ExitCode(), nil + } + return 1, err + } + return 0, nil +} + +// DataEnv tells the backend where data lives. The brand brain (presets, +// knowledge, assets, history, config) and the transcript cache live in the +// global managed dir so they follow the user across directories; only rendered +// clips go under the working directory (PODCLI_OUTPUT). Explicit env always wins +// so power users and tests can override any single path. +func DataEnv() []string { + var env []string + home := paths.Home() + if os.Getenv("PODCLI_HOME") == "" { + env = append(env, "PODCLI_HOME="+home) + } + if os.Getenv("PODCLI_DATA") == "" { + env = append(env, "PODCLI_DATA="+filepath.Join(home, "data")) + } + if os.Getenv("PODCLI_ENV_FILE") == "" { + env = append(env, "PODCLI_ENV_FILE="+filepath.Join(home, ".env")) + } + if cwd, err := os.Getwd(); err == nil { + if os.Getenv("PODCLI_CWD") == "" { + env = append(env, "PODCLI_CWD="+cwd) + } + if os.Getenv("PODCLI_OUTPUT") == "" { + env = append(env, "PODCLI_OUTPUT="+filepath.Join(cwd, "podcli-clips")) + } + } + return env +} + +func Run(args []string) (int, error) { + root, ok := BackendRoot() + if !ok { + return 1, fmt.Errorf("python backend not found — set PODCLI_BACKEND or run inside the repo") + } + cli := filepath.Join(root, "cli.py") + full := append([]string{"-W", "ignore::UserWarning", cli}, args...) + + cmd := exec.Command(Python(), full...) + sf := &stderrFilter{} + cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, sf + env := append(os.Environ(), + "OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES", + "PYTHONIOENCODING=utf-8", + "PYTHONUTF8=1", + ) + if ff := FFmpeg(); ff != "" { + env = append(env, "PODCLI_FFMPEG="+ff) + } + if fp := FFprobe(); fp != "" { + env = append(env, "PODCLI_FFPROBE="+fp) + } + if wc := WhisperCLI(); wc != "" { + env = append(env, "PODCLI_WHISPER_CLI="+wc) + } + if nd := Node(); nd != "" { + env = append(env, "PODCLI_NODE="+nd) + } + if ss := StudioServer(); ss != "" { + env = append(env, "PODCLI_STUDIO="+filepath.Dir(ss)) + } + env = append(env, DataEnv()...) + cmd.Env = env + + err := cmd.Run() + sf.flush() + if err != nil { + if ee, ok := err.(*exec.ExitError); ok { + return ee.ExitCode(), nil + } + return 1, err + } + return 0, nil +} diff --git a/cli/internal/paths/paths.go b/cli/internal/paths/paths.go new file mode 100644 index 0000000..d510e3d --- /dev/null +++ b/cli/internal/paths/paths.go @@ -0,0 +1,45 @@ +// Package paths resolves the global managed directory where podcli keeps its +// hermetic runtimes, models, binaries, and config. Per-project working data +// (.podcli/) stays in the user's current directory and is not handled here. +package paths + +import ( + "os" + "path/filepath" + "runtime" +) + +// Home is the global managed dir. Override with PODCLI_HOME (used by tests and +// power users). Layout matches plans/native-cli.md: +// +// darwin ~/Library/Application Support/podcli +// windows %LOCALAPPDATA%\podcli +// linux $XDG_DATA_HOME/podcli (or ~/.local/share/podcli) +func Home() string { + if h := os.Getenv("PODCLI_HOME"); h != "" { + return h + } + home, err := os.UserHomeDir() + if err != nil { + home = "." + } + switch runtime.GOOS { + case "darwin": + return filepath.Join(home, "Library", "Application Support", "podcli") + case "windows": + if d := os.Getenv("LOCALAPPDATA"); d != "" { + return filepath.Join(d, "podcli") + } + return filepath.Join(home, "AppData", "Local", "podcli") + default: + if d := os.Getenv("XDG_DATA_HOME"); d != "" { + return filepath.Join(d, "podcli") + } + return filepath.Join(home, ".local", "share", "podcli") + } +} + +func RuntimeDir() string { return filepath.Join(Home(), "runtime") } +func ModelsDir() string { return filepath.Join(Home(), "models") } +func BinDir() string { return filepath.Join(Home(), "bin") } +func ConfigPath() string { return filepath.Join(Home(), "config.json") } diff --git a/cli/internal/podstack/podstack.go b/cli/internal/podstack/podstack.go new file mode 100644 index 0000000..9a5d346 --- /dev/null +++ b/cli/internal/podstack/podstack.go @@ -0,0 +1,173 @@ +// Package podstack forwards PodStack workflow commands (auto, generate-titles, +// …) to an AI agent CLI (Claude Code or Codex), porting the old bash launcher. +// The commands are Claude Code slash commands driving the podcli MCP tools, so +// they run inside the agent, not the terminal. The slash-command files are +// embedded and installed into the working project on demand. +package podstack + +import ( + "embed" + "fmt" + "io/fs" + "os" + "os/exec" + "path/filepath" + "sort" + "strings" + "syscall" +) + +//go:generate sh sync.sh +//go:embed all:commands +var commands embed.FS + +const ( + colAccent = "\033[38;2;212;135;74m" + colGreen = "\033[38;2;74;222;128m" + colYellow = "\033[38;2;250;204;21m" + colBold = "\033[1m" + colDim = "\033[2m" + colReset = "\033[0m" +) + +// Names lists the supported PodStack commands, derived from the embedded files. +func Names() []string { + entries, err := commands.ReadDir("commands") + if err != nil { + return nil + } + var names []string + for _, e := range entries { + if strings.HasSuffix(e.Name(), ".md") { + names = append(names, strings.TrimSuffix(e.Name(), ".md")) + } + } + sort.Strings(names) + return names +} + +func IsCommand(name string) bool { + name = strings.TrimPrefix(strings.TrimPrefix(name, "--"), "/") + for _, n := range Names() { + if n == name { + return true + } + } + return false +} + +// installCommands writes the embedded slash-command files into +// /.claude/commands so the agent can resolve /. Existing files are +// left untouched, so a user's local edits win. +func installCommands(project string) error { + dest := filepath.Join(project, ".claude", "commands") + if err := os.MkdirAll(dest, 0o755); err != nil { + return err + } + return fs.WalkDir(commands, "commands", func(p string, d fs.DirEntry, err error) error { + if err != nil || d.IsDir() { + return err + } + target := filepath.Join(dest, filepath.Base(p)) + if _, err := os.Stat(target); err == nil { + return nil + } + data, err := commands.ReadFile(p) + if err != nil { + return err + } + return os.WriteFile(target, data, 0o644) + }) +} + +// Run launches the agent for cmd with the remaining args as the slash-command +// arguments. Engine selection: --claude / --codex / --ai , else +// PODCLI_AI, else auto (Claude preferred, Codex fallback). +func Run(cmd string, args []string) int { + cmd = strings.TrimPrefix(strings.TrimPrefix(cmd, "--"), "/") + engine := os.Getenv("PODCLI_AI") + if engine == "" { + engine = "auto" + } + var promptArgs []string + for i := 0; i < len(args); i++ { + switch a := args[i]; { + case a == "--codex": + engine = "codex" + case a == "--claude": + engine = "claude" + case a == "--ai": + if i+1 < len(args) { + engine = args[i+1] + i++ + } + case strings.HasPrefix(a, "--ai="): + engine = strings.TrimPrefix(a, "--ai=") + default: + promptArgs = append(promptArgs, a) + } + } + switch engine { + case "auto", "claude", "codex": + default: + fmt.Fprintf(os.Stderr, " %sInvalid AI engine:%s %s — use --claude, --codex, or --ai auto\n", colBold, colReset, engine) + return 1 + } + + project, err := os.Getwd() + if err != nil { + project = "." + } + if err := installCommands(project); err != nil { + fmt.Fprintf(os.Stderr, " %swarning:%s could not install slash commands: %v\n", colYellow, colReset, err) + } + + prompt := "/" + cmd + if len(promptArgs) > 0 { + prompt += " " + strings.Join(promptArgs, " ") + } + codexPrompt := fmt.Sprintf("Run the PodStack workflow from .claude/commands/%s.md with these arguments, then follow that workflow exactly: %s", cmd, prompt) + + claudeBin, _ := exec.LookPath("claude") + codexBin, _ := exec.LookPath("codex") + + if engine == "codex" && codexBin == "" { + fmt.Fprintf(os.Stderr, "\n %sCodex not found in PATH.%s\n Install it, then run:\n %scodex --cd %q %q%s\n\n", colBold, colReset, colAccent, project, codexPrompt, colReset) + return 1 + } + if engine == "claude" && claudeBin == "" { + fmt.Fprintf(os.Stderr, "\n %sClaude Code not found in PATH.%s\n Install it, then run:\n %sclaude %q%s\n\n", colBold, colReset, colAccent, prompt, colReset) + return 1 + } + + if engine != "codex" && claudeBin != "" { + fmt.Fprintf(os.Stderr, "\n %s▶%s Launching Claude Code with: %s%s%s\n %scwd: %s%s\n\n", colGreen, colReset, colAccent, prompt, colReset, colDim, project, colReset) + if code := runIn(project, claudeBin, prompt); code == 0 || engine == "claude" || codexBin == "" { + return code + } + fmt.Fprintf(os.Stderr, "\n %s⚠%s Claude exited nonzero; trying Codex...\n", colYellow, colReset) + } + + if codexBin == "" { + fmt.Fprintf(os.Stderr, "\n %sNo AI agent CLI found in PATH.%s\n Install Claude Code or Codex, then run one of:\n %sclaude %q%s\n %scodex --cd %q %q%s\n\n", colBold, colReset, colAccent, prompt, colReset, colAccent, project, codexPrompt, colReset) + return 1 + } + fmt.Fprintf(os.Stderr, "\n %s▶%s Launching Codex with: %s%s%s\n %scwd: %s%s\n\n", colGreen, colReset, colAccent, prompt, colReset, colDim, project, colReset) + return runIn(project, codexBin, "--cd", project, codexPrompt) +} + +func runIn(dir, bin string, args ...string) int { + cmd := exec.Command(bin, args...) + cmd.Dir = dir + cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr + if err := cmd.Run(); err != nil { + if ee, ok := err.(*exec.ExitError); ok { + if ws, ok := ee.Sys().(syscall.WaitStatus); ok { + return ws.ExitStatus() + } + return 1 + } + return 1 + } + return 0 +} diff --git a/cli/internal/podstack/sync.sh b/cli/internal/podstack/sync.sh new file mode 100644 index 0000000..8b20678 --- /dev/null +++ b/cli/internal/podstack/sync.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# Sync the repo PodStack slash commands into commands/ for go:embed. Run via +# `go generate ./...` before building. commands/ is gitignored; this is its +# only writer. +set -e +here="$(cd "$(dirname "$0")" && pwd)" +src="$here/../../../.claude/commands" +dest="$here/commands" +rm -rf "$dest" +mkdir -p "$dest" +cp "$src"/*.md "$dest"/ +echo "synced PodStack commands -> $dest" diff --git a/cli/internal/provision/provision.go b/cli/internal/provision/provision.go new file mode 100644 index 0000000..ce31c5d --- /dev/null +++ b/cli/internal/provision/provision.go @@ -0,0 +1,813 @@ +// Package provision fetches pinned models into the global managed dir. +package provision + +import ( + "archive/tar" + "archive/zip" + "compress/gzip" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "time" + + "podcli/internal/paths" +) + +type model struct { + URL string + SHA256 string // empty: verification skipped +} + +var models = map[string]model{ + "base": { + URL: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin", + SHA256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe", + }, + "tiny.en": { + URL: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin", + }, + "small": { + URL: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin", + }, +} + +const vadURL = "https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin" +const vadSHA = "29940d98d42b91fbd05ce489f3ecf7c72f0a42f027e4875919a28fb4c04ea2cf" + +func ModelPath(size string) string { + return filepath.Join(paths.ModelsDir(), "ggml-"+size+".bin") +} + +func VADModelPath() string { + return filepath.Join(paths.ModelsDir(), "ggml-silero-v5.1.2.bin") +} + +func have(p string) bool { + if fi, err := os.Stat(p); err == nil && fi.Size() > 0 { + return true + } + return false +} + +func EnsureModel(size string) (string, error) { + dest := ModelPath(size) + if have(dest) { + return dest, nil + } + m, ok := models[size] + if !ok { + return "", fmt.Errorf("unknown model size %q (known: base, tiny.en, small)", size) + } + if err := download(m.URL, dest, m.SHA256, "ggml-"+size); err != nil { + return "", err + } + return dest, nil +} + +func EnsureVADModel() (string, error) { + dest := VADModelPath() + if have(dest) { + return dest, nil + } + if err := download(vadURL, dest, vadSHA, "silero-vad"); err != nil { + return "", err + } + return dest, nil +} + +const maxAttempts = 6 + +// fetch resumes via HTTP Range across transient stalls rather than restarting, +// writing to dest atomically. +func fetch(url, dest, label string) error { + if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + return err + } + tmp := dest + ".part" + var lastErr error + for attempt := 1; attempt <= maxAttempts; attempt++ { + done, err := downloadOnce(url, tmp, label) + if err == nil && done { + lastErr = nil + break + } + lastErr = err + fmt.Fprintf(os.Stderr, "\n %s interrupted (attempt %d/%d): %v — resuming\n", label, attempt, maxAttempts, err) + time.Sleep(time.Duration(attempt) * time.Second) + } + if lastErr != nil { + os.Remove(tmp) + return lastErr + } + return os.Rename(tmp, dest) +} + +func download(url, dest, wantSHA, label string) error { + if err := fetch(url, dest, label); err != nil { + return err + } + if wantSHA == "" { + fmt.Fprintf(os.Stderr, " (no pinned checksum for %s — skipped verification)\n", label) + return nil + } + got, err := sha256file(dest) + if err != nil { + return err + } + if got != wantSHA { + os.Remove(dest) + return fmt.Errorf("checksum mismatch for %s: got %s want %s", label, got, wantSHA) + } + return nil +} + +// verifyDownload checks a downloaded archive against an upstream sha256sum-style +// manifest. Fails closed on a mismatch (removes the file); fails open with a +// warning when the manifest or its entry can't be fetched, so a transient +// network issue on the sums file doesn't block provisioning. +func verifyDownload(archive, sumsURL, name string) error { + resp, err := downloadHTTPClient().Get(sumsURL) + if err != nil { + fmt.Fprintf(os.Stderr, " (could not fetch checksums for %s — skipped verification)\n", name) + return nil + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + fmt.Fprintf(os.Stderr, " (no checksums for %s — skipped verification)\n", name) + return nil + } + data, err := io.ReadAll(io.LimitReader(resp.Body, 4<<20)) + if err != nil { + return err + } + want := ParseChecksums(data)[name] + if want == "" { + fmt.Fprintf(os.Stderr, " (no checksum entry for %s — skipped verification)\n", name) + return nil + } + got, err := sha256file(archive) + if err != nil { + return err + } + if got != want { + os.Remove(archive) + return fmt.Errorf("checksum mismatch for %s: got %s want %s", name, got, want) + } + return nil +} + +func downloadOnce(url, tmp, label string) (bool, error) { + var start int64 + if fi, err := os.Stat(tmp); err == nil { + start = fi.Size() + } + + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return false, err + } + if start > 0 { + req.Header.Set("Range", fmt.Sprintf("bytes=%d-", start)) + } + resp, err := downloadHTTPClient().Do(req) + if err != nil { + return false, err + } + defer resp.Body.Close() + + switch resp.StatusCode { + case http.StatusRequestedRangeNotSatisfiable: + return true, nil // already complete on disk + case http.StatusOK: + if start > 0 { // server ignored Range — restart cleanly + os.Truncate(tmp, 0) + start = 0 + } + case http.StatusPartialContent: + default: + return false, fmt.Errorf("HTTP %d", resp.StatusCode) + } + + out, err := os.OpenFile(tmp, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644) + if err != nil { + return false, err + } + defer out.Close() + + pw := &progress{label: label, total: start + resp.ContentLength, written: start} + _, copyErr := io.Copy(io.MultiWriter(out, pw), resp.Body) + if copyErr != nil { + return false, copyErr + } + pw.done() + return true, nil +} + +// symlinkTargetInside reports whether a symlink at linkPath pointing to linkname +// resolves within root (root has a trailing separator). Absolute targets and +// any path that escapes via .. are rejected — this is the symlink half of the +// zip-slip defense, since the path guard alone only validates the link's own +// location, not where it points. +func symlinkTargetInside(linkPath, linkname, root string) bool { + if filepath.IsAbs(linkname) { + return false + } + resolved := filepath.Clean(filepath.Join(filepath.Dir(linkPath), linkname)) + return strings.HasPrefix(resolved+string(os.PathSeparator), root) +} + +// ParseChecksums parses sha256sum-style " " lines into a name->hash +// map, keyed by basename so it matches the asset filenames consumers request. +func ParseChecksums(data []byte) map[string]string { + out := map[string]string{} + for _, line := range strings.Split(string(data), "\n") { + fields := strings.Fields(line) + if len(fields) < 2 { + continue + } + out[filepath.Base(fields[len(fields)-1])] = strings.ToLower(fields[0]) + } + return out +} + +// Sha256File returns the lowercase hex SHA-256 of the file at path. +func Sha256File(path string) (string, error) { return sha256file(path) } + +func sha256file(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil)), nil +} + +type ffArchive struct { + URL string + Bins []string +} + +// Static ffmpeg sources are not yet pinned by checksum (upstream "latest" URLs); +// they get our own pinned builds once podcli hosts releases. +var ffmpegSpecs = map[string][]ffArchive{ + "darwin/amd64": { + {URL: "https://evermeet.cx/ffmpeg/getrelease/ffmpeg/zip", Bins: []string{"ffmpeg"}}, + {URL: "https://evermeet.cx/ffmpeg/getrelease/ffprobe/zip", Bins: []string{"ffprobe"}}, + }, + "darwin/arm64": { + {URL: "https://evermeet.cx/ffmpeg/getrelease/ffmpeg/zip", Bins: []string{"ffmpeg"}}, + {URL: "https://evermeet.cx/ffmpeg/getrelease/ffprobe/zip", Bins: []string{"ffprobe"}}, + }, + "linux/amd64": { + {URL: "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz", Bins: []string{"ffmpeg", "ffprobe"}}, + }, + "linux/arm64": { + {URL: "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-arm64-static.tar.xz", Bins: []string{"ffmpeg", "ffprobe"}}, + }, + "windows/amd64": { + {URL: "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip", Bins: []string{"ffmpeg.exe", "ffprobe.exe"}}, + }, +} + +func exeSuffix() string { + if runtime.GOOS == "windows" { + return ".exe" + } + return "" +} + +const podcliRepo = "nmbrthirteen/podcli" + +func WhisperCLIBin() string { + return filepath.Join(paths.RuntimeDir(), "whisper", "whisper-cli"+exeSuffix()) +} + +func latestReleaseAssets() (map[string]string, error) { + req, _ := http.NewRequest(http.MethodGet, "https://api.github.com/repos/"+podcliRepo+"/releases/latest", nil) + req.Header.Set("Accept", "application/vnd.github+json") + resp, err := releaseHTTPClient().Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("no published release (HTTP %d)", resp.StatusCode) + } + var rel struct { + Assets []struct { + Name string `json:"name"` + URL string `json:"browser_download_url"` + } `json:"assets"` + } + if err := json.NewDecoder(resp.Body).Decode(&rel); err != nil { + return nil, err + } + out := make(map[string]string, len(rel.Assets)) + for _, a := range rel.Assets { + out[a.Name] = a.URL + } + return out, nil +} + +func latestReleaseAssetURL(name string) (string, error) { + assets, err := latestReleaseAssets() + if err != nil { + return "", err + } + if u, ok := assets[name]; ok { + return u, nil + } + return "", fmt.Errorf("asset %s not in latest release", name) +} + +// allowedReleaseHost pins binary downloads to GitHub's own hosts so a redirect +// can't divert a download to an attacker-controlled host. +func allowedReleaseHost(h string) bool { + h = strings.ToLower(h) + switch h { + case "github.com", "api.github.com", "objects.githubusercontent.com", "codeload.github.com": + return true + } + return strings.HasSuffix(h, ".githubusercontent.com") +} + +func releaseHTTPClient() *http.Client { + return &http.Client{ + Transport: &http.Transport{ResponseHeaderTimeout: 30 * time.Second}, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if len(via) >= 10 { + return fmt.Errorf("too many redirects") + } + if !allowedReleaseHost(req.URL.Hostname()) { + return fmt.Errorf("refusing redirect to untrusted host %q", req.URL.Hostname()) + } + return nil + }, + } +} + +// allowedDownloadHost pins large-binary/model downloads to their known source +// hosts and CDNs. Several of these payloads (Node, CPython, ffmpeg) are not +// checksum-verified, so blocking redirects to unknown hosts is the main defense +// against a diverted download. Initial request URLs are hardcoded (trusted); +// this only constrains where a redirect may land. +func allowedDownloadHost(h string) bool { + h = strings.ToLower(h) + for _, base := range []string{ + "github.com", // release assets + "githubusercontent.com", // objects.* / release-assets.* CDN + "huggingface.co", // whisper.cpp models + cdn-lfs*.huggingface.co + "nodejs.org", // hermetic Node + "evermeet.cx", // macOS ffmpeg + "johnvansickle.com", // linux ffmpeg + } { + if h == base || strings.HasSuffix(h, "."+base) { + return true + } + } + return false +} + +func downloadHTTPClient() *http.Client { + return &http.Client{ + Transport: &http.Transport{ResponseHeaderTimeout: 60 * time.Second}, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if len(via) >= 10 { + return fmt.Errorf("too many redirects") + } + if !allowedDownloadHost(req.URL.Hostname()) { + return fmt.Errorf("refusing redirect to untrusted host %q", req.URL.Hostname()) + } + return nil + }, + } +} + +func httpGetBytes(url string) ([]byte, error) { + resp, err := releaseHTTPClient().Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP %d for %s", resp.StatusCode, url) + } + return io.ReadAll(io.LimitReader(resp.Body, 1<<20)) +} + +// verifyReleaseAsset checks the file at path against the release's checksums.txt. +// It fails closed on a real checksum mismatch, but fails open (with a warning) +// when checksums.txt or the asset's entry is absent — so a release published +// before checksum manifests, or a CI hiccup, never bricks an install. +func verifyReleaseAsset(assets map[string]string, assetName, path string) error { + sumsURL, ok := assets["checksums.txt"] + if !ok { + fmt.Fprintf(os.Stderr, " (no checksums.txt in release — skipped verification of %s)\n", assetName) + return nil + } + data, err := httpGetBytes(sumsURL) + if err != nil { + fmt.Fprintf(os.Stderr, " (could not fetch checksums.txt: %v — skipped verification of %s)\n", err, assetName) + return nil + } + want, ok := ParseChecksums(data)[assetName] + if !ok { + fmt.Fprintf(os.Stderr, " (no checksum entry for %s — skipped verification)\n", assetName) + return nil + } + got, err := sha256file(path) + if err != nil { + return err + } + if !strings.EqualFold(got, want) { + os.Remove(path) + return fmt.Errorf("checksum mismatch for %s: got %s want %s", assetName, got, want) + } + return nil +} + +func EnsureWhisperCpp() (string, error) { + bin := WhisperCLIBin() + if have(bin) { + return bin, nil + } + name := fmt.Sprintf("whisper-cli-%s-%s%s", runtime.GOOS, runtime.GOARCH, exeSuffix()) + assets, err := latestReleaseAssets() + if err != nil { + return "", err + } + url, ok := assets[name] + if !ok { + return "", fmt.Errorf("asset %s not in latest release", name) + } + if err := os.MkdirAll(filepath.Dir(bin), 0o755); err != nil { + return "", err + } + if err := fetch(url, bin, "whisper-cli"); err != nil { + return "", err + } + if err := verifyReleaseAsset(assets, name, bin); err != nil { + return "", err + } + if runtime.GOOS != "windows" { + os.Chmod(bin, 0o755) + } + return bin, nil +} + +func FFmpegBin() string { + return filepath.Join(paths.RuntimeDir(), "ffmpeg", "ffmpeg"+exeSuffix()) +} + +func EnsureFFmpeg() (string, error) { + bin := FFmpegBin() + if have(bin) { + return bin, nil + } + specs, ok := ffmpegSpecs[runtime.GOOS+"/"+runtime.GOARCH] + if !ok { + return "", fmt.Errorf("no ffmpeg build for %s/%s", runtime.GOOS, runtime.GOARCH) + } + dir := filepath.Join(paths.RuntimeDir(), "ffmpeg") + if err := os.MkdirAll(dir, 0o755); err != nil { + return "", err + } + for _, a := range specs { + sum := sha256.Sum256([]byte(a.URL)) + archive := filepath.Join(os.TempDir(), "podcli-ff-"+hex.EncodeToString(sum[:8])) + if err := fetch(a.URL, archive, "ffmpeg-archive"); err != nil { + return "", err + } + err := extractBins(archive, a.Bins, dir) + os.Remove(archive) + if err != nil { + return "", err + } + } + if !have(bin) { + return "", fmt.Errorf("ffmpeg missing after extraction in %s", dir) + } + return bin, nil +} + +func extractBins(archive string, bins []string, dest string) error { + f, err := os.Open(archive) + if err != nil { + return err + } + magic := make([]byte, 6) + io.ReadFull(f, magic) + f.Close() + switch { + case magic[0] == 'P' && magic[1] == 'K': + return extractZip(archive, bins, dest) + case magic[0] == 0xFD && string(magic[1:6]) == "7zXZ\x00": + return extractTarXz(archive, bins, dest) + default: + return fmt.Errorf("unrecognized archive format") + } +} + +func wantSet(bins []string) map[string]bool { + m := make(map[string]bool, len(bins)) + for _, b := range bins { + m[b] = true + } + return m +} + +func extractZip(archive string, bins []string, dest string) error { + zr, err := zip.OpenReader(archive) + if err != nil { + return err + } + defer zr.Close() + want := wantSet(bins) + for _, zf := range zr.File { + if zf.FileInfo().IsDir() || !want[filepath.Base(zf.Name)] { + continue + } + rc, err := zf.Open() + if err != nil { + return err + } + err = writeBin(rc, filepath.Join(dest, filepath.Base(zf.Name))) + rc.Close() + if err != nil { + return err + } + } + return nil +} + +func extractTarXz(archive string, bins []string, dest string) error { + tmp, err := os.MkdirTemp("", "podcli-ffx-") + if err != nil { + return err + } + defer os.RemoveAll(tmp) + listing, err := exec.Command("tar", "-tf", archive).Output() + if err != nil { + return fmt.Errorf("tar list (is tar installed?): %w", err) + } + for _, name := range strings.Split(string(listing), "\n") { + name = strings.TrimSpace(name) + if name == "" { + continue + } + clean := filepath.Clean(name) + if filepath.IsAbs(clean) || clean == ".." || strings.HasPrefix(clean, ".."+string(os.PathSeparator)) { + return fmt.Errorf("refusing archive with unsafe path: %q", name) + } + } + cmd := exec.Command("tar", "-xf", archive, "-C", tmp) + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("tar extract (is tar installed?): %w", err) + } + want := wantSet(bins) + return filepath.WalkDir(tmp, func(p string, d os.DirEntry, err error) error { + if err != nil || d.IsDir() || !want[filepath.Base(p)] { + return err + } + in, err := os.Open(p) + if err != nil { + return err + } + defer in.Close() + return writeBin(in, filepath.Join(dest, filepath.Base(p))) + }) +} + +func writeBin(r io.Reader, dest string) error { + out, err := os.OpenFile(dest, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o755) + if err != nil { + return err + } + defer out.Close() + _, err = io.Copy(out, r) + return err +} + +var pyTriples = map[string]string{ + "darwin/amd64": "x86_64-apple-darwin", + "darwin/arm64": "aarch64-apple-darwin", + "linux/amd64": "x86_64-unknown-linux-gnu", + "linux/arm64": "aarch64-unknown-linux-gnu", + "windows/amd64": "x86_64-pc-windows-msvc", +} + +func PythonBin() string { + if runtime.GOOS == "windows" { + return filepath.Join(paths.RuntimeDir(), "python", "python.exe") + } + return filepath.Join(paths.RuntimeDir(), "python", "bin", "python3") +} + +// pythonAssetURL resolves a python-build-standalone install_only tarball for +// this platform via the GitHub latest-release API, so it tracks upstream +// without a hardcoded version that rots. +func pythonAssetURL() (url, name, sumsURL string, err error) { + triple, ok := pyTriples[runtime.GOOS+"/"+runtime.GOARCH] + if !ok { + return "", "", "", fmt.Errorf("no python build for %s/%s", runtime.GOOS, runtime.GOARCH) + } + req, _ := http.NewRequest(http.MethodGet, "https://api.github.com/repos/astral-sh/python-build-standalone/releases/latest", nil) + req.Header.Set("Accept", "application/vnd.github+json") + resp, err := releaseHTTPClient().Do(req) + if err != nil { + return "", "", "", err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return "", "", "", fmt.Errorf("github api: HTTP %d", resp.StatusCode) + } + var rel struct { + Assets []struct { + Name string `json:"name"` + URL string `json:"browser_download_url"` + } `json:"assets"` + } + if err := json.NewDecoder(resp.Body).Decode(&rel); err != nil { + return "", "", "", err + } + sums := "" + for _, a := range rel.Assets { + if a.Name == "SHA256SUMS" { + sums = a.URL + break + } + } + match := func(prefer string) (string, string) { + for _, a := range rel.Assets { + if strings.Contains(a.Name, triple) && strings.HasSuffix(a.Name, "install_only.tar.gz") && strings.Contains(a.Name, prefer) { + return a.URL, a.Name + } + } + return "", "" + } + if u, n := match("cpython-3.12."); u != "" { + return u, n, sums, nil + } + if u, n := match("cpython-3."); u != "" { + return u, n, sums, nil + } + return "", "", "", fmt.Errorf("no install_only python asset for %s", triple) +} + +func EnsurePython(requirements string) (string, error) { + bin := PythonBin() + if !have(bin) { + url, name, sumsURL, err := pythonAssetURL() + if err != nil { + return "", err + } + sum := sha256.Sum256([]byte(url)) + archive := filepath.Join(os.TempDir(), "podcli-py-"+hex.EncodeToString(sum[:8])+".tar.gz") + if err := fetch(url, archive, "cpython"); err != nil { + return "", err + } + if sumsURL != "" { + if err := verifyDownload(archive, sumsURL, name); err != nil { + return "", err + } + } + err = extractTarGz(archive, paths.RuntimeDir()) + os.Remove(archive) + if err != nil { + return "", err + } + if !have(bin) { + return "", fmt.Errorf("python missing after extraction") + } + } + if requirements != "" { + if err := pipInstall(bin, requirements); err != nil { + return "", err + } + } + return bin, nil +} + +func pipInstall(pybin, requirements string) error { + fmt.Fprintf(os.Stderr, " installing python deps (%s) — pulls ~80MB, first run takes a minute\n", filepath.Base(requirements)) + cmd := exec.Command(pybin, "-m", "pip", "install", "--disable-pip-version-check", "--progress-bar=on", "-r", requirements) + cmd.Stdout, cmd.Stderr = os.Stderr, os.Stderr + cmd.Env = append(os.Environ(), "PYTHONUNBUFFERED=1") + return cmd.Run() +} + +func extractTarGz(archive, dest string) error { + f, err := os.Open(archive) + if err != nil { + return err + } + defer f.Close() + gz, err := gzip.NewReader(f) + if err != nil { + return err + } + defer gz.Close() + tr := tar.NewReader(gz) + root := filepath.Clean(dest) + string(os.PathSeparator) + for { + h, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + target := filepath.Join(dest, h.Name) + if !strings.HasPrefix(target, root) { + return fmt.Errorf("unsafe path in archive: %s", h.Name) + } + switch h.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(target, 0o755); err != nil { + return err + } + case tar.TypeReg: + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + out, err := os.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.FileMode(h.Mode)) + if err != nil { + return err + } + _, err = io.Copy(out, tr) + out.Close() + if err != nil { + return err + } + case tar.TypeSymlink: + if !symlinkTargetInside(target, h.Linkname, root) { + return fmt.Errorf("unsafe symlink %s -> %s in archive", h.Name, h.Linkname) + } + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + os.Remove(target) + if err := os.Symlink(h.Linkname, target); err != nil { + return err + } + case tar.TypeLink: + linkTarget := filepath.Join(dest, h.Linkname) + if !strings.HasPrefix(linkTarget, root) { + return fmt.Errorf("unsafe hardlink %s -> %s in archive", h.Name, h.Linkname) + } + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + os.Remove(target) + if err := os.Link(linkTarget, target); err != nil { + return err + } + } + } + return nil +} + +type progress struct { + label string + total int64 + written int64 + lastPct int + lastTick time.Time +} + +func (p *progress) Write(b []byte) (int, error) { + n := len(b) + p.written += int64(n) + now := time.Now() + if now.Sub(p.lastTick) < 200*time.Millisecond { + return n, nil + } + p.lastTick = now + if p.total > 0 { + pct := int(p.written * 100 / p.total) + if pct != p.lastPct { + p.lastPct = pct + fmt.Fprintf(os.Stderr, "\r fetching %s ... %d%% (%d/%d MB)", p.label, pct, p.written>>20, p.total>>20) + } + } else { + fmt.Fprintf(os.Stderr, "\r fetching %s ... %d MB", p.label, p.written>>20) + } + return n, nil +} + +func (p *progress) done() { + fmt.Fprintf(os.Stderr, "\r fetching %s ... done (%d MB)%s\n", p.label, p.written>>20, " ") +} diff --git a/cli/internal/provision/provision_manual_test.go b/cli/internal/provision/provision_manual_test.go new file mode 100644 index 0000000..d44c3a9 --- /dev/null +++ b/cli/internal/provision/provision_manual_test.go @@ -0,0 +1,47 @@ +package provision + +import ( + "os" + "os/exec" + "strings" + "testing" + + "podcli/internal/paths" +) + +// Guarded network integration check (set PODCLI_MANUAL=1). Validates GitHub-API +// asset resolution, tar.gz extraction (incl. symlinks), and that the provisioned +// interpreter runs. PODCLI_PY_ARCHIVE= extracts a local tarball instead of +// downloading; PODCLI_REQS also tests pip install. +func TestEnsurePythonManual(t *testing.T) { + if os.Getenv("PODCLI_MANUAL") == "" { + t.Skip("set PODCLI_MANUAL=1 to run") + } + if arc := os.Getenv("PODCLI_PY_ARCHIVE"); arc != "" { + if err := os.MkdirAll(paths.RuntimeDir(), 0o755); err != nil { + t.Fatal(err) + } + if err := extractTarGz(arc, paths.RuntimeDir()); err != nil { + t.Fatal(err) + } + assertRuns(t, PythonBin()) + return + } + bin, err := EnsurePython(os.Getenv("PODCLI_REQS")) + if err != nil { + t.Fatal(err) + } + assertRuns(t, bin) +} + +func assertRuns(t *testing.T, bin string) { + out, err := exec.Command(bin, "--version").CombinedOutput() + if err != nil { + t.Fatal(err) + } + v := strings.TrimSpace(string(out)) + t.Logf("python %s -> %s", bin, v) + if !strings.HasPrefix(v, "Python 3.") { + t.Fatalf("unexpected version: %q", v) + } +} diff --git a/cli/internal/provision/redirect_test.go b/cli/internal/provision/redirect_test.go new file mode 100644 index 0000000..8e03c9e --- /dev/null +++ b/cli/internal/provision/redirect_test.go @@ -0,0 +1,45 @@ +package provision + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestAllowedDownloadHost(t *testing.T) { + allow := []string{ + "github.com", "objects.githubusercontent.com", "release-assets.githubusercontent.com", + "huggingface.co", "cdn-lfs.huggingface.co", "cdn-lfs-us-1.huggingface.co", + "nodejs.org", "evermeet.cx", "johnvansickle.com", "www.johnvansickle.com", + } + for _, h := range allow { + if !allowedDownloadHost(h) { + t.Errorf("expected %q to be allowed", h) + } + } + // Suffix spoofing must not pass: a trusted name as a left-label is not enough. + deny := []string{ + "evil.com", "github.com.evil.com", "huggingface.co.attacker.net", + "githubusercontent.com.evil.com", "127.0.0.1", "", + } + for _, h := range deny { + if allowedDownloadHost(h) { + t.Errorf("expected %q to be denied", h) + } + } +} + +func TestDownloadClientRefusesUntrustedRedirect(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Redirect(w, r, "https://attacker.example/payload", http.StatusFound) + })) + defer srv.Close() + + resp, err := downloadHTTPClient().Get(srv.URL) + if err == nil { + if resp != nil { + resp.Body.Close() + } + t.Fatal("expected redirect to an untrusted host to be refused") + } +} diff --git a/cli/internal/provision/security_test.go b/cli/internal/provision/security_test.go new file mode 100644 index 0000000..7a747c1 --- /dev/null +++ b/cli/internal/provision/security_test.go @@ -0,0 +1,96 @@ +package provision + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "os" + "path/filepath" + "runtime" + "testing" +) + +func TestParseChecksums(t *testing.T) { + in := []byte("abc123 podcli-darwin-arm64\n" + + "DEADBEEF ./dist/whisper-cli-linux-amd64\n" + + "\n" + + "malformed-line\n") + got := ParseChecksums(in) + if got["podcli-darwin-arm64"] != "abc123" { + t.Fatalf("podcli hash = %q", got["podcli-darwin-arm64"]) + } + if got["whisper-cli-linux-amd64"] != "deadbeef" { + t.Fatalf("whisper hash = %q (want lowercased, basename-keyed)", got["whisper-cli-linux-amd64"]) + } + if len(got) != 2 { + t.Fatalf("expected 2 entries, got %d: %v", len(got), got) + } +} + +func TestSymlinkTargetInside(t *testing.T) { + root := filepath.Clean("/tmp/dest") + string(os.PathSeparator) + cases := []struct { + name string + linkPath string + linkname string + ok bool + }{ + {"relative inside", "/tmp/dest/a/link", "../b", true}, + {"to root", "/tmp/dest/link", ".", true}, + {"escape via dotdot", "/tmp/dest/link", "../../etc/passwd", false}, + {"absolute", "/tmp/dest/link", "/etc/passwd", false}, + {"deep escape", "/tmp/dest/a/b/link", "../../../outside", false}, + } + for _, c := range cases { + if got := symlinkTargetInside(c.linkPath, c.linkname, root); got != c.ok { + t.Errorf("%s: symlinkTargetInside(%q,%q)=%v want %v", c.name, c.linkPath, c.linkname, got, c.ok) + } + } +} + +// TestExtractTarGzRejectsEscapingSymlink builds an in-memory tarball with a +// symlink that escapes the destination and confirms extraction refuses it. +func TestExtractTarGzRejectsEscapingSymlink(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("symlink semantics differ on Windows") + } + var buf bytes.Buffer + gz := gzip.NewWriter(&buf) + tw := tar.NewWriter(gz) + if err := tw.WriteHeader(&tar.Header{Name: "evil", Typeflag: tar.TypeSymlink, Linkname: "../../../../etc", Mode: 0o777}); err != nil { + t.Fatal(err) + } + tw.Close() + gz.Close() + + dir := t.TempDir() + archive := filepath.Join(dir, "evil.tar.gz") + if err := os.WriteFile(archive, buf.Bytes(), 0o644); err != nil { + t.Fatal(err) + } + dest := filepath.Join(dir, "out") + if err := extractTarGz(archive, dest); err == nil { + t.Fatal("expected extractTarGz to reject escaping symlink, got nil error") + } +} + +func TestExtractTarGzRejectsEscapingHardlink(t *testing.T) { + var buf bytes.Buffer + gz := gzip.NewWriter(&buf) + tw := tar.NewWriter(gz) + if err := tw.WriteHeader(&tar.Header{Name: "evil", Typeflag: tar.TypeLink, Linkname: "../../../../etc/passwd", Mode: 0o644}); err != nil { + t.Fatal(err) + } + tw.Close() + gz.Close() + + dir := t.TempDir() + archive := filepath.Join(dir, "evil.tar.gz") + if err := os.WriteFile(archive, buf.Bytes(), 0o644); err != nil { + t.Fatal(err) + } + dest := filepath.Join(dir, "out") + if err := extractTarGz(archive, dest); err == nil { + t.Fatal("expected extractTarGz to reject escaping hardlink, got nil error") + } +} diff --git a/cli/internal/provision/studio.go b/cli/internal/provision/studio.go new file mode 100644 index 0000000..f11a40e --- /dev/null +++ b/cli/internal/provision/studio.go @@ -0,0 +1,312 @@ +package provision + +import ( + "archive/tar" + "archive/zip" + "compress/gzip" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + + "podcli/internal/paths" +) + +// nodeVersion is the hermetic Node.js used to serve the studio web UI. Node only +// runs the bundled server (rendering is delegated to the Python backend), so any +// recent LTS works; pin for reproducibility. +const nodeVersion = "20.18.1" + +var nodeTriples = map[string]string{ + "darwin/amd64": "darwin-x64", + "darwin/arm64": "darwin-arm64", + "linux/amd64": "linux-x64", + "linux/arm64": "linux-arm64", + "windows/amd64": "win-x64", +} + +func NodeDir() string { return filepath.Join(paths.RuntimeDir(), "node") } + +func NodeBin() string { + if runtime.GOOS == "windows" { + return filepath.Join(NodeDir(), "node.exe") + } + return filepath.Join(NodeDir(), "bin", "node") +} + +func StudioDir() string { return filepath.Join(paths.RuntimeDir(), "studio") } +func StudioServer() string { return filepath.Join(StudioDir(), "web-server.mjs") } + +func EnsureNode() (string, error) { + bin := NodeBin() + if have(bin) { + return bin, nil + } + triple, ok := nodeTriples[runtime.GOOS+"/"+runtime.GOARCH] + if !ok { + return "", fmt.Errorf("no node build for %s/%s", runtime.GOOS, runtime.GOARCH) + } + ext := "tar.gz" + if runtime.GOOS == "windows" { + ext = "zip" + } + base := fmt.Sprintf("node-v%s-%s", nodeVersion, triple) + url := fmt.Sprintf("https://nodejs.org/dist/v%s/%s.%s", nodeVersion, base, ext) + archive := filepath.Join(os.TempDir(), "podcli-"+base+"."+ext) + if err := fetch(url, archive, "node"); err != nil { + return "", err + } + defer os.Remove(archive) + if err := verifyDownload(archive, fmt.Sprintf("https://nodejs.org/dist/v%s/SHASUMS256.txt", nodeVersion), base+"."+ext); err != nil { + return "", err + } + if err := os.RemoveAll(NodeDir()); err != nil { + return "", err + } + var err error + if ext == "zip" { + err = extractZipStrip1(archive, NodeDir()) + } else { + err = extractTarGzStrip1(archive, NodeDir()) + } + if err != nil { + return "", err + } + if runtime.GOOS != "windows" { + os.Chmod(bin, 0o755) + } + if !have(bin) { + return "", fmt.Errorf("node missing after extraction in %s", NodeDir()) + } + return bin, nil +} + +func RemotionDir() string { return filepath.Join(paths.RuntimeDir(), "remotion") } +func RemotionScript() string { return filepath.Join(RemotionDir(), "render.mjs") } + +// EnsureRemotion fetches the per-platform Remotion render bundle (remotion/ + +// a production node_modules with native bindings) and extracts it into the +// runtime dir so remotion/ and node_modules/ sit beside backend/. Per-platform +// because @rspack and the Remotion compositor ship native binaries. +func EnsureRemotion() (string, error) { + if have(RemotionScript()) { + return RemotionDir(), nil + } + name := fmt.Sprintf("remotion-bundle-%s-%s.tar.gz", runtime.GOOS, runtime.GOARCH) + assets, err := latestReleaseAssets() + if err != nil { + return "", err + } + url, ok := assets[name] + if !ok { + return "", fmt.Errorf("asset %s not in latest release", name) + } + archive := filepath.Join(os.TempDir(), "podcli-"+name) + if err := fetch(url, archive, "remotion"); err != nil { + return "", err + } + defer os.Remove(archive) + if err := verifyReleaseAsset(assets, name, archive); err != nil { + return "", err + } + if err := os.RemoveAll(RemotionDir()); err != nil { + return "", err + } + if err := os.RemoveAll(filepath.Join(paths.RuntimeDir(), "node_modules")); err != nil { + return "", err + } + if err := extractTarGz(archive, paths.RuntimeDir()); err != nil { + return "", err + } + if !have(RemotionScript()) { + return "", fmt.Errorf("remotion render.mjs missing after extraction") + } + return RemotionDir(), nil +} + +// PrewarmRemotion compiles the project-independent composition bundle once into +// the managed dir so the first caption render skips the ~20s bundling step. +func PrewarmRemotion() error { + node := NodeBin() + if !have(node) || !have(RemotionScript()) { + return fmt.Errorf("node/remotion not provisioned") + } + cmd := exec.Command(node, RemotionScript(), "--prebundle") + cmd.Dir = RemotionDir() + cmd.Env = append(os.Environ(), "PODCLI_CACHE_DIR="+filepath.Join(RemotionDir(), ".bundle-cache")) + cmd.Stderr = os.Stderr + return cmd.Run() +} + +// EnsureRemotionBrowser pre-downloads the Chrome Headless Shell the Remotion +// renderer needs, so the first caption render works offline. Best-effort: if it +// fails, the renderer downloads the browser on first use. +func EnsureRemotionBrowser() error { + node := NodeBin() + if !have(node) || !have(RemotionScript()) { + return fmt.Errorf("node/remotion not provisioned") + } + cmd := exec.Command(node, "-e", + "import('@remotion/renderer').then(r=>r.ensureBrowser()).then(()=>process.exit(0)).catch(e=>{console.error(String(e));process.exit(1)})") + cmd.Dir = RemotionDir() + cmd.Stderr = os.Stderr + return cmd.Run() +} + +// EnsureStudio fetches the prebuilt, platform-independent studio bundle (server + +// SPA) from the latest release into StudioDir. +func EnsureStudio() (string, error) { + server := StudioServer() + if have(server) { + return StudioDir(), nil + } + assets, err := latestReleaseAssets() + if err != nil { + return "", err + } + url, ok := assets["studio-bundle.tar.gz"] + if !ok { + return "", fmt.Errorf("asset studio-bundle.tar.gz not in latest release") + } + archive := filepath.Join(os.TempDir(), "podcli-studio-bundle.tar.gz") + if err := fetch(url, archive, "studio"); err != nil { + return "", err + } + defer os.Remove(archive) + if err := verifyReleaseAsset(assets, "studio-bundle.tar.gz", archive); err != nil { + return "", err + } + if err := os.RemoveAll(StudioDir()); err != nil { + return "", err + } + if err := os.MkdirAll(StudioDir(), 0o755); err != nil { + return "", err + } + if err := extractTarGz(archive, StudioDir()); err != nil { + return "", err + } + if !have(server) { + return "", fmt.Errorf("studio server missing after extraction in %s", StudioDir()) + } + return StudioDir(), nil +} + +// strip1 drops the leading path component (node tarballs/zips nest everything +// under node-vX-os-arch/). +func strip1(name string) string { + name = filepath.ToSlash(name) + if i := strings.IndexByte(name, '/'); i >= 0 { + return name[i+1:] + } + return "" +} + +func extractTarGzStrip1(archive, dest string) error { + f, err := os.Open(archive) + if err != nil { + return err + } + defer f.Close() + gz, err := gzip.NewReader(f) + if err != nil { + return err + } + defer gz.Close() + tr := tar.NewReader(gz) + root := filepath.Clean(dest) + string(os.PathSeparator) + for { + h, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + rel := strip1(h.Name) + if rel == "" { + continue + } + target := filepath.Join(dest, rel) + if !strings.HasPrefix(target, root) { + return fmt.Errorf("unsafe path in archive: %s", h.Name) + } + switch h.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(target, 0o755); err != nil { + return err + } + case tar.TypeReg: + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + out, err := os.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.FileMode(h.Mode)) + if err != nil { + return err + } + _, err = io.Copy(out, tr) + out.Close() + if err != nil { + return err + } + case tar.TypeSymlink: + if !symlinkTargetInside(target, h.Linkname, root) { + return fmt.Errorf("unsafe symlink %s -> %s in archive", h.Name, h.Linkname) + } + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + os.Remove(target) + if err := os.Symlink(h.Linkname, target); err != nil { + return err + } + } + } + return nil +} + +func extractZipStrip1(archive, dest string) error { + zr, err := zip.OpenReader(archive) + if err != nil { + return err + } + defer zr.Close() + root := filepath.Clean(dest) + string(os.PathSeparator) + for _, zf := range zr.File { + rel := strip1(zf.Name) + if rel == "" { + continue + } + target := filepath.Join(dest, rel) + if !strings.HasPrefix(target, root) { + return fmt.Errorf("unsafe path in archive: %s", zf.Name) + } + if zf.FileInfo().IsDir() { + if err := os.MkdirAll(target, 0o755); err != nil { + return err + } + continue + } + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + return err + } + rc, err := zf.Open() + if err != nil { + return err + } + out, err := os.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, zf.Mode()) + if err != nil { + rc.Close() + return err + } + _, err = io.Copy(out, rc) + out.Close() + rc.Close() + if err != nil { + return err + } + } + return nil +} diff --git a/cli/internal/update/update.go b/cli/internal/update/update.go new file mode 100644 index 0000000..4a3fb7f --- /dev/null +++ b/cli/internal/update/update.go @@ -0,0 +1,253 @@ +// Package update checks GitHub Releases for a newer podcli and (once releases +// publish per-platform binaries) applies it. For now a manual update points the +// user at their package manager, matching the npm/bun reinstall fallback. +package update + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "time" + + "podcli/internal/config" + "podcli/internal/paths" + "podcli/internal/provision" +) + +const repo = "nmbrthirteen/podcli" + +func exeExt() string { + if runtime.GOOS == "windows" { + return ".exe" + } + return "" +} + +// managedBin is the binary the npm shim and direct installs both exec, so +// replacing it updates podcli regardless of how it was installed. +func managedBin() string { + return filepath.Join(paths.BinDir(), "podcli"+exeExt()) +} + +func assetName() string { + return fmt.Sprintf("podcli-%s-%s%s", runtime.GOOS, runtime.GOARCH, exeExt()) +} + +func assetURL(tag string) string { + return fmt.Sprintf("https://github.com/%s/releases/download/v%s/%s", repo, tag, assetName()) +} + +func checksumsURL(tag string) string { + return fmt.Sprintf("https://github.com/%s/releases/download/v%s/checksums.txt", repo, tag) +} + +func allowedHost(h string) bool { + h = strings.ToLower(h) + switch h { + case "github.com", "api.github.com", "objects.githubusercontent.com", "codeload.github.com": + return true + } + return strings.HasSuffix(h, ".githubusercontent.com") +} + +func guardedClient() *http.Client { + return &http.Client{ + Transport: &http.Transport{ResponseHeaderTimeout: 30 * time.Second}, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if len(via) >= 10 { + return fmt.Errorf("too many redirects") + } + if !allowedHost(req.URL.Hostname()) { + return fmt.Errorf("refusing redirect to untrusted host %q", req.URL.Hostname()) + } + return nil + }, + } +} + +func latestTag(timeout time.Duration) (string, error) { + client := &http.Client{Timeout: timeout} + req, _ := http.NewRequest(http.MethodGet, "https://api.github.com/repos/"+repo+"/releases/latest", nil) + req.Header.Set("Accept", "application/vnd.github+json") + resp, err := client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("no published release (HTTP %d)", resp.StatusCode) + } + var rel struct { + Tag string `json:"tag_name"` + } + if err := json.NewDecoder(resp.Body).Decode(&rel); err != nil { + return "", err + } + return strings.TrimPrefix(rel.Tag, "v"), nil +} + +func parseVer(v string) [3]int { + v = strings.TrimPrefix(v, "v") + v = strings.SplitN(v, "-", 2)[0] // drop -dev / pre-release + var out [3]int + for i, p := range strings.SplitN(v, ".", 3) { + out[i], _ = strconv.Atoi(p) + } + return out +} + +func newer(remote, current string) bool { + r, c := parseVer(remote), parseVer(current) + for i := 0; i < 3; i++ { + if r[i] != c[i] { + return r[i] > c[i] + } + } + return false +} + +// NotifyIfOutdated prints a one-line notice when a newer release exists. Fast, +// silent on any error, and respects the off-switch. +func NotifyIfOutdated(current string) { + if !config.AutoUpdate() { + return + } + tag, err := latestTag(1500 * time.Millisecond) + if err != nil { + return + } + if newer(tag, current) { + fmt.Fprintf(os.Stderr, " ↑ podcli %s available (you have %s) — run `podcli update`\n", tag, current) + } +} + +func Run(current string) int { + tag, err := latestTag(10 * time.Second) + if err != nil { + fmt.Fprintf(os.Stderr, "podcli: update check failed: %v\n", err) + return 1 + } + if !newer(tag, current) { + fmt.Printf("podcli %s is up to date.\n", current) + return 0 + } + fmt.Printf("Updating podcli %s → %s ...\n", current, tag) + if err := apply(tag); err != nil { + fmt.Fprintf(os.Stderr, "podcli: self-update failed (%v).\n", err) + fmt.Fprintln(os.Stderr, "Reinstall via your package manager: npm i -g podcli (or: bun add -g podcli)") + return 1 + } + fmt.Printf("Updated to podcli %s.\n", tag) + return 0 +} + +// apply downloads the release binary for this platform and swaps the managed +// binary atomically. +func apply(tag string) error { + dest := managedBin() + if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + return err + } + staged := dest + ".new" + if err := downloadFile(assetURL(tag), staged); err != nil { + return err + } + if err := verifyStaged(tag, staged); err != nil { + os.Remove(staged) + return err + } + if runtime.GOOS != "windows" { + if err := os.Chmod(staged, 0o755); err != nil { + return err + } + } + return swap(staged, dest) +} + +// verifyStaged checks the downloaded binary against the release's checksums.txt. +// Fails closed on a mismatch; fails open (warning) only when checksums.txt is +// absent, so an older release without a manifest still updates. +func verifyStaged(tag, staged string) error { + resp, err := guardedClient().Get(checksumsURL(tag)) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode == http.StatusNotFound { + fmt.Fprintln(os.Stderr, " (no checksums.txt in release — skipped verification)") + return nil + } + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("HTTP %d fetching checksums.txt", resp.StatusCode) + } + data, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) + if err != nil { + return err + } + want, ok := provision.ParseChecksums(data)[assetName()] + if !ok { + fmt.Fprintf(os.Stderr, " (no checksum entry for %s — skipped verification)\n", assetName()) + return nil + } + got, err := provision.Sha256File(staged) + if err != nil { + return err + } + if !strings.EqualFold(got, want) { + return fmt.Errorf("checksum mismatch: got %s want %s", got, want) + } + return nil +} + +// swap replaces dest with staged. On Windows a running .exe can't be overwritten, +// so the old binary is moved aside first; on Unix the rename is atomic. +func swap(staged, dest string) error { + if runtime.GOOS == "windows" { + old := dest + ".old" + os.Remove(old) + moved := false + if _, err := os.Stat(dest); err == nil { + if err := os.Rename(dest, old); err != nil { + return err + } + moved = true + } + if err := os.Rename(staged, dest); err != nil { + if moved { + os.Rename(old, dest) // restore the original so the CLI isn't bricked + } + return err + } + return nil + } + return os.Rename(staged, dest) +} + +func downloadFile(url, dest string) error { + resp, err := guardedClient().Get(url) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("HTTP %d for %s", resp.StatusCode, url) + } + tmp := dest + ".part" + f, err := os.Create(tmp) + if err != nil { + return err + } + _, err = io.Copy(f, resp.Body) + f.Close() + if err != nil { + os.Remove(tmp) + return err + } + return os.Rename(tmp, dest) +} diff --git a/cli/internal/update/update_test.go b/cli/internal/update/update_test.go new file mode 100644 index 0000000..7be03c2 --- /dev/null +++ b/cli/internal/update/update_test.go @@ -0,0 +1,45 @@ +package update + +import ( + "os" + "path/filepath" + "testing" +) + +func TestSwapReplacesBinary(t *testing.T) { + dir := t.TempDir() + dest := filepath.Join(dir, "podcli") + if err := os.WriteFile(dest, []byte("OLD"), 0o755); err != nil { + t.Fatal(err) + } + staged := dest + ".new" + if err := os.WriteFile(staged, []byte("NEW"), 0o755); err != nil { + t.Fatal(err) + } + if err := swap(staged, dest); err != nil { + t.Fatal(err) + } + b, _ := os.ReadFile(dest) + if string(b) != "NEW" { + t.Fatalf("swap left %q, want NEW", b) + } +} + +func TestNewer(t *testing.T) { + cases := []struct { + remote, current string + want bool + }{ + {"2.0.1", "2.0.0", true}, + {"2.0.0", "2.0.0", false}, + {"1.9.9", "2.0.0", false}, + {"2.1.0", "2.0.9", true}, + {"v2.0.1", "2.0.0", true}, + {"3.0.0", "2.9.9", true}, + } + for _, c := range cases { + if got := newer(c.remote, c.current); got != c.want { + t.Errorf("newer(%q, %q) = %v, want %v", c.remote, c.current, got, c.want) + } + } +} diff --git a/cli/main.go b/cli/main.go new file mode 100644 index 0000000..6d24257 --- /dev/null +++ b/cli/main.go @@ -0,0 +1,415 @@ +// podcli — native launcher. Reserved verbs are handled here; everything else +// routes to the Python engine. +package main + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "podcli/internal/backend" + "podcli/internal/config" + "podcli/internal/engine" + "podcli/internal/paths" + "podcli/internal/podstack" + "podcli/internal/provision" + "podcli/internal/update" +) + +// Version is set at build time via -ldflags "-X main.Version=...". +var Version = "2.0.0-dev" + +func main() { + args := os.Args[1:] + if len(args) == 0 { + os.Exit(runEngine(args)) // backend's branded interactive menu + } + + switch args[0] { + case "version", "--version", "-v": + fmt.Printf("podcli %s\n", Version) + case "doctor": + doctor() + case "update": + os.Exit(update.Run(Version)) + case "setup": + os.Exit(setup(args[1:])) + case "mcp": + if len(args) >= 2 && args[1] == "install" { + os.Exit(mcpInstall()) + } + code, err := engine.RunMCP() + if err != nil { + fmt.Fprintln(os.Stderr, "podcli:", err) + } + os.Exit(code) + case "config": + if len(args) >= 2 && (args[1] == "get" || args[1] == "set") { + os.Exit(configCmd(args[1:])) + } + os.Exit(runEngine(args)) // status/export/import/use → Python + case "help", "--help", "-h": + printHelp() + default: + if podstack.IsCommand(args[0]) { + os.Exit(podstack.Run(args[0], args[1:])) + } + os.Exit(runEngine(args)) + } +} + +// wantsRuntime gates first-run auto-provisioning: only commands that need the +// backend trigger the download, not lightweight ones like config. +func wantsRuntime(args []string) bool { + if len(args) == 0 { + return true + } + switch args[0] { + case "process", "transcribe", "studio", "auto": + return true + } + return false +} + +// ensureRuntime self-provisions on first run so `podcli` works without a separate +// `podcli setup`. Not called on the mcp path, whose stdout is the JSON-RPC channel. +func ensureRuntime() { + if _, ok := engine.BackendRoot(); ok { + return + } + fmt.Fprintln(os.Stderr, "First run — setting up podcli (one-time download)…") + setup(nil) +} + +func runEngine(args []string) int { + update.NotifyIfOutdated(Version) + if wantsRuntime(args) { + ensureRuntime() + } + if transcribeEngine(args) == "whispercpp" { + model, err := provision.EnsureModel(transcribeModel(args)) + if err != nil { + fmt.Fprintln(os.Stderr, "podcli: provisioning model:", err) + return 1 + } + os.Setenv("PODCLI_ENGINE", "whispercpp") + os.Setenv("PODCLI_WHISPERCPP_MODEL", model) + } + code, err := engine.Run(args) + if err != nil { + fmt.Fprintln(os.Stderr, "podcli:", err) + return 1 + } + return code +} + +func transcribeModel(args []string) string { + for _, arg := range args { + if arg == "--fast" { + return "tiny.en" + } + } + return "base" +} + +func configCmd(args []string) int { + switch { + case args[0] == "get" && len(args) == 2: + v, err := config.Get(args[1]) + if err != nil { + fmt.Fprintln(os.Stderr, "podcli:", err) + return 1 + } + fmt.Println(v) + case args[0] == "set" && len(args) == 3: + if err := config.Set(args[1], args[2]); err != nil { + fmt.Fprintln(os.Stderr, "podcli:", err) + return 1 + } + fmt.Printf("%s = %s\n", args[1], args[2]) + default: + fmt.Fprintln(os.Stderr, "usage: podcli config get | config set ") + return 2 + } + return 0 +} + +// transcribeEngine resolves which engine a run will use, honoring --engine, +// PODCLI_ENGINE, then defaulting to whisper.cpp on a hermetic Python (which has +// no openai-whisper). Covers every entry point that can transcribe: the no-arg +// interactive menu, process, studio, and transcribe. +func transcribeEngine(args []string) string { + cmd := "" + if len(args) > 0 { + cmd = args[0] + } + switch cmd { + case "", "process", "studio", "transcribe": + default: + return "" + } + sel := strings.ToLower(os.Getenv("PODCLI_ENGINE")) + for i, a := range args { + if a == "--engine" && i+1 < len(args) { + sel = strings.ToLower(args[i+1]) + } else if strings.HasPrefix(a, "--engine=") { + sel = strings.ToLower(strings.TrimPrefix(a, "--engine=")) + } + } + if sel == "" && engine.IsHermeticPython() { + sel = "whispercpp" + } + switch sel { + case "whispercpp", "whisper-cpp", "whisper.cpp", "cpp": + return "whispercpp" + } + return sel +} + +func setup(args []string) int { + size := "base" + vad := false + for i := 0; i < len(args); i++ { + switch args[i] { + case "--model": + if i+1 < len(args) { + size = args[i+1] + i++ + } + case "--vad": + vad = true + } + } + fmt.Printf("Provisioning into %s\n", paths.Home()) + p, err := provision.EnsureModel(size) + if err != nil { + fmt.Fprintln(os.Stderr, "podcli: setup:", err) + return 1 + } + fmt.Printf(" model: %s\n", p) + if vad { + vp, err := provision.EnsureVADModel() + if err != nil { + fmt.Fprintln(os.Stderr, "podcli: setup:", err) + return 1 + } + fmt.Printf(" vad: %s\n", vp) + } + if fp, err := provision.EnsureFFmpeg(); err != nil { + fmt.Fprintf(os.Stderr, " ffmpeg: skipped (%v) — backend will use PATH ffmpeg\n", err) + } else { + fmt.Printf(" ffmpeg: %s\n", fp) + } + backendDir := filepath.Join(paths.RuntimeDir(), "backend") + if err := backend.Extract(backendDir); err != nil { + fmt.Fprintf(os.Stderr, " backend: skipped (%v) — falling back to repo/PODCLI_BACKEND\n", err) + backendDir, _ = engine.BackendRoot() + } else { + fmt.Printf(" backend: %s\n", backendDir) + } + if backendDir != "" { + reqs := filepath.Join(backendDir, "requirements-runtime.txt") + if pb, err := provision.EnsurePython(reqs); err != nil { + fmt.Fprintf(os.Stderr, " python: skipped (%v) — using dev venv / system python\n", err) + } else { + fmt.Printf(" python: %s\n", pb) + } + } + if wc, err := provision.EnsureWhisperCpp(); err != nil { + fmt.Fprintf(os.Stderr, " whisper: skipped (%v) — backend will use PATH whisper-cli\n", err) + } else { + fmt.Printf(" whisper: %s\n", wc) + } + if nb, err := provision.EnsureNode(); err != nil { + fmt.Fprintf(os.Stderr, " node: skipped (%v) — Web UI will use system Node if present\n", err) + } else { + fmt.Printf(" node: %s\n", nb) + } + if sd, err := provision.EnsureStudio(); err != nil { + fmt.Fprintf(os.Stderr, " studio: skipped (%v) — Web UI needs a published release\n", err) + } else { + fmt.Printf(" studio: %s\n", sd) + } + if rd, err := provision.EnsureRemotion(); err != nil { + fmt.Fprintf(os.Stderr, " remotion: skipped (%v) — captions/thumbnails need a published release\n", err) + } else { + fmt.Printf(" remotion: %s\n", rd) + if err := provision.PrewarmRemotion(); err != nil { + fmt.Fprintf(os.Stderr, " bundle: deferred to first render (%v)\n", err) + } else { + fmt.Printf(" bundle: prebuilt\n") + } + if err := provision.EnsureRemotionBrowser(); err != nil { + fmt.Fprintf(os.Stderr, " browser: deferred to first render (%v)\n", err) + } else { + fmt.Printf(" browser: ready\n") + } + } + if engine.MCPServer() != "" { + if mcpRegisteredToSelf() { + fmt.Printf(" mcp: already registered\n") + } else if err := registerMCPServer(); err != nil { + fmt.Fprintf(os.Stderr, " mcp: not registered (%v) — run `podcli mcp install`\n", err) + } else { + fmt.Printf(" mcp: registered with Claude Code\n") + } + } + fmt.Println("Done.") + return 0 +} + +// registerMCPServer points Claude Code at this binary's `mcp` command. Remove +// first so re-runs refresh a stale path and stay idempotent. +func registerMCPServer() error { + claude, err := exec.LookPath("claude") + if err != nil { + return fmt.Errorf("Claude Code CLI not found on PATH") + } + self, err := os.Executable() + if err != nil { + return err + } + exec.Command(claude, "mcp", "remove", "podcli").Run() + if out, err := exec.Command(claude, "mcp", "add", "podcli", "--", self, "mcp").CombinedOutput(); err != nil { + return fmt.Errorf("%v: %s", err, strings.TrimSpace(string(out))) + } + return nil +} + +func mcpRegisteredToSelf() bool { + claude, err := exec.LookPath("claude") + if err != nil { + return false + } + self, err := os.Executable() + if err != nil { + return false + } + out, err := exec.Command(claude, "mcp", "get", "podcli").CombinedOutput() + return err == nil && strings.Contains(string(out), self) +} + +func mcpInstall() int { + if err := registerMCPServer(); err != nil { + self, _ := os.Executable() + fmt.Fprintf(os.Stderr, "podcli: %v\n", err) + fmt.Fprintf(os.Stderr, "Register manually: claude mcp add podcli -- %s mcp\n", self) + return 1 + } + fmt.Println("Registered podcli MCP server with Claude Code.") + return 0 +} + +func doctor() { + fmt.Printf("podcli %s\n\n", Version) + fmt.Println("Paths") + fmt.Printf(" home: %s\n", paths.Home()) + fmt.Printf(" runtime: %s\n", paths.RuntimeDir()) + fmt.Printf(" models: %s\n", paths.ModelsDir()) + fmt.Printf(" presets/knowledge/assets/history/cache: %s (global — follow you everywhere)\n", paths.Home()) + if cwd, err := os.Getwd(); err == nil { + fmt.Printf(" clips: %s (rendered into your working directory)\n", filepath.Join(cwd, "podcli-clips")) + } + fmt.Println("\nEngine resolution") + if root, ok := engine.BackendRoot(); ok { + fmt.Printf(" backend: %s\n", root) + } else { + fmt.Printf(" backend: NOT FOUND (set PODCLI_BACKEND or run inside the repo)\n") + } + fmt.Printf(" python: %s\n", engine.Python()) + if ff := engine.FFmpeg(); ff != "" { + fmt.Printf(" ffmpeg: %s (hermetic)\n", ff) + } else { + fmt.Printf(" ffmpeg: PATH fallback (not yet hermetic)\n") + } + if fp := engine.FFprobe(); fp != "" { + fmt.Printf(" ffprobe: %s (hermetic)\n", fp) + } + if wc := engine.WhisperCLI(); wc != "" { + fmt.Printf(" whisper: %s (hermetic)\n", wc) + } else { + fmt.Printf(" whisper: PATH fallback (install whisper-cli, or provisioned once hosted)\n") + } + if nd := engine.Node(); nd != "" { + fmt.Printf(" node: %s (hermetic)\n", nd) + } else { + fmt.Printf(" node: PATH fallback (Web UI uses system Node, or run `podcli setup`)\n") + } + if ss := engine.StudioServer(); ss != "" { + fmt.Printf(" studio: %s\n", ss) + } else { + fmt.Printf(" studio: not provisioned (Web UI needs a published release)\n") + } + if ms := engine.MCPServer(); ms != "" { + fmt.Printf(" mcp: %s\n", ms) + } else { + fmt.Printf(" mcp: not provisioned (needs a published release)\n") + } + if rs := provision.RemotionScript(); fileExists(rs) { + fmt.Printf(" remotion: %s\n", rs) + } else { + fmt.Printf(" remotion: not provisioned (captions/thumbnails need a published release)\n") + } + fmt.Println("\nModels") + fmt.Printf(" base: %s\n", presence(provision.ModelPath("base"))) + fmt.Printf(" vad: %s\n", presence(provision.VADModelPath())) +} + +func presence(p string) string { + if fi, err := os.Stat(p); err == nil && fi.Size() > 0 { + return fmt.Sprintf("%s (%s)", p, humanBytes(fi.Size())) + } + return "not provisioned — run `podcli setup`" +} + +func fileExists(p string) bool { + _, err := os.Stat(p) + return err == nil +} + +func humanBytes(n int64) string { + switch { + case n >= 1<<20: + return fmt.Sprintf("%d MB", n>>20) + case n >= 1<<10: + return fmt.Sprintf("%d KB", n>>10) + default: + return fmt.Sprintf("%d B", n) + } +} + +func printHelp() { + fmt.Printf(`podcli %s — AI podcast clip generator + +Usage: + podcli [args] + +Engine commands (routed to the processing backend): + process