diff --git a/.agents/skills/video-editor-development/SKILL.md b/.agents/skills/video-editor-development/SKILL.md new file mode 100644 index 000000000..4a960e9c6 --- /dev/null +++ b/.agents/skills/video-editor-development/SKILL.md @@ -0,0 +1,6 @@ +--- +name: video-editor-development +description: Architecture guidance for the LTX video editor — editor store, selectors, actions, undo/redo, persistence, keyboard behavior, timeline semantics, and hot-path performance. +--- + +@../../docs/skills/video-editor-development.md diff --git a/.gitignore b/.gitignore index 4f9ea8e73..dee7410d1 100644 --- a/.gitignore +++ b/.gitignore @@ -54,6 +54,9 @@ yarn-error.log* .env .env.local .env.*.local +secret.txt +secrets.txt +runpod-private-token.txt # Testing coverage/ diff --git a/CLAUDE.md b/CLAUDE.md index 47dc3e3d8..e69de29bb 120000 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1 +0,0 @@ -AGENTS.md \ No newline at end of file diff --git a/README.md b/README.md index c0469cb8a..208aaa0af 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,64 @@ LTX Desktop is an open-source desktop app for generating videos with LTX models > **Status: Beta.** Expect breaking changes. > Frontend architecture is under active refactor; large UI PRs may be declined for now (see [`CONTRIBUTING.md`](docs/CONTRIBUTING.md)). +## Start here: choose your setup + +LTX Desktop can run generation in three ways. Pick one before setup: + +| Mode | Best for | Where generation runs | Sends media to third parties? | +| --- | --- | --- | --- | +| Local GPU | A workstation with enough VRAM | Your own computer | No | +| Private RunPod API | A low-VRAM computer with a private cloud GPU | Your RunPod Pod | No, only your own Pod | +| Official LTX API | Quick managed cloud setup | LTX official servers | Yes | + +### Option A: Private RunPod API setup + +Use this when your desktop computer does not have enough GPU memory, but you do +not want prompts, images, audio, or videos sent to the official LTX API. + +1. Create a RunPod Pod with a CUDA GPU. Recommended for LTX 2.3: **A100 80 GB**, **112 GB+ RAM**, and **150 GB+ disk** for ephemeral testing. +2. Expose HTTP port `8000` on the Pod. +3. Build and push the RunPod image from this repo: + + ```bash + docker build -f runpod/Dockerfile -t ltx-desktop-runpod:latest . + ``` + +4. Set these Pod environment variables: + + ```bash + RUNPOD_PRIVATE_API_TOKEN=change-this-long-random-token + LTX_APP_DATA_DIR=/workspace/ltx-data + LTX_PORT=8000 + ``` + +5. Start the Pod with the image. The server listens on: + + ```text + https://-8000.proxy.runpod.net + ``` + +6. Open LTX Desktop, then go to **Settings > General** and choose **Private RunPod API**. +7. Go to **Settings > API Keys**, paste the RunPod URL, and save the same token from `RUNPOD_PRIVATE_API_TOKEN`. +8. Generate normally. The desktop app uploads inputs to your Pod, the Pod downloads any selected model variation if missing, runs generation, then returns the output video to your local outputs folder. + +Full RunPod details: [`docs/runpod-private-api.md`](docs/runpod-private-api.md). + +### Option B: Local GPU setup + +1. Use Windows or Linux with an NVIDIA CUDA GPU and **16 GB+ VRAM**. +2. Install the app or run from source. +3. Complete first-run setup. +4. Download the required local models when prompted. +5. Optional: enable **Local Text Encoder** for fully local prompt encoding and prompt enhancement. + +### Option C: Official LTX API setup + +1. Create an LTX API key from the [LTX Console](https://console.ltx.video/). +2. Open **Settings > General** and choose the official LTX API mode. +3. Open **Settings > API Keys** and save the LTX API key. +4. Generate normally. API-backed features send prompts and input media to the official LTX service. +

Gen Space

@@ -31,12 +89,13 @@ LTX Desktop is an open-source desktop app for generating videos with LTX models | Platform / hardware | Generation mode | Notes | | --- | --- | --- | | Windows + CUDA GPU with **≥16GB VRAM** | Local generation | Downloads model weights locally | -| Windows (no CUDA, <16GB VRAM, or unknown VRAM) | API-only | **LTX API key required** | +| Windows (no CUDA, <16GB VRAM, or unknown VRAM) | Private RunPod API or official LTX API | RunPod keeps media on your own Pod; official LTX API requires an LTX API key | | Linux + CUDA GPU with **≥16GB VRAM** | Local generation | Downloads model weights locally | -| Linux (no CUDA, <16GB VRAM, or unknown VRAM) | API-only | **LTX API key required** | -| macOS (Apple Silicon builds) | API-only | **LTX API key required** | +| Linux (no CUDA, <16GB VRAM, or unknown VRAM) | Private RunPod API or official LTX API | RunPod keeps media on your own Pod; official LTX API requires an LTX API key | +| macOS (Apple Silicon builds) | Private RunPod API or official LTX API | RunPod keeps media on your own Pod; official LTX API requires an LTX API key | -In API-only mode, available resolutions/durations may be limited to what the API supports. +In official API mode, available resolutions/durations may be limited to what the API supports. +In Private RunPod API mode, available options follow the local LTX pipeline exposed by the Pod. ## System requirements @@ -55,7 +114,7 @@ In API-only mode, available resolutions/durations may be limited to what the API - 16GB+ RAM (32GB recommended) - Plenty of free disk space for model weights and outputs -### macOS (API-only) +### macOS (remote generation) - Apple Silicon (arm64) - macOS 13+ (Ventura) @@ -91,15 +150,24 @@ Text encoding: to generate videos you must configure text encoding: The LTX API is used for: - **Cloud text encoding and prompt enhancement** — **FREE**; text encoding is highly recommended to speed up inference and save memory -- API-based video generations (required on macOS and on unsupported Windows hardware) — paid +- Official API-based video generations — paid - Retake — paid -An LTX API key is required in API-only mode, but optional on Windows/Linux local mode if you enable the Local Text Encoder. +An LTX API key is required only when using the official LTX API mode. It is not required for Private RunPod API mode. Generate a FREE API key at the [LTX Console](https://console.ltx.video/). Text encoding is free; video generation API usage is paid. [Read more](https://ltx.io/model/model-blog/ltx-2-better-control-for-real-workflows). When you use API-backed features, prompts and media inputs are sent to the API service. Your API key is stored locally in your app data folder — treat it like a secret. +### Private RunPod API token + +Used when **Settings > General > Video Generation Provider** is set to **Private RunPod API**. + +The desktop app sends prompts and media only to the RunPod URL you configure. +The Pod runs the same local backend pipeline, downloads selected model variations +when missing, and returns generated video bytes to your desktop app. Store the +RunPod bearer token securely; anyone with the URL and token can use your Pod. + ### fal API key (optional) Used for Z Image Turbo text-to-image generation in API mode. When enabled, image generation requests are sent to fal.ai. @@ -185,6 +253,7 @@ LTX Desktop collects minimal, anonymous usage analytics (app version, platform, ## Docs +- [`runpod-private-api.md`](docs/runpod-private-api.md) — private RunPod setup - [`INSTALLER.md`](docs/INSTALLER.md) — building installers - [`TELEMETRY.md`](docs/TELEMETRY.md) — telemetry and privacy - [`backend/architecture.md`](backend/architecture.md) — backend architecture diff --git a/backend/_routes/private_ltx_api.py b/backend/_routes/private_ltx_api.py new file mode 100644 index 000000000..9af3f8d09 --- /dev/null +++ b/backend/_routes/private_ltx_api.py @@ -0,0 +1,372 @@ +"""Private LTX-compatible API used by self-hosted RunPod backends.""" + +from __future__ import annotations + +import mimetypes +import os +import uuid +from pathlib import Path +from typing import Literal, cast + +from fastapi import APIRouter, Depends, Request +from fastapi.responses import Response +from pydantic import BaseModel, ConfigDict + +from _routes._errors import HTTPError +from api_types import ( + GenerateVideoRequest, + LTXLocalModelId, + LTXVideoGenPipeline, + LTXVideoGenDuration, + LTXVideoGenFps, + LTXVideoGenResolution, + ModelCheckpointID, + RetakeRequest, + RetakeVideoResponse, + VideoCameraMotion, +) +from app_handler import AppHandler +from runtime_config.model_download_specs import get_ltx_model_id_for_pipeline, get_ltx_model_spec, is_cp_downloaded +from state import get_state_service + +router = APIRouter(prefix="/v1", tags=["private-ltx-api"]) + + +class PrivateUploadInitResponse(BaseModel): + upload_url: str + storage_uri: str + required_headers: dict[str, str] = {} + + +class PrivateTextToVideoRequest(BaseModel): + model_config = ConfigDict(extra="ignore") + + prompt: str + model: str = "fast" + resolution: str = "1080p" + duration: float = 5 + fps: float = 24 + generate_audio: bool = False + camera_motion: str = "none" + aspect_ratio: str = "16:9" + enhance_prompt: bool = False + + +class PrivateImageToVideoRequest(PrivateTextToVideoRequest): + image_uri: str + + +class PrivateAudioToVideoRequest(BaseModel): + model_config = ConfigDict(extra="ignore") + + prompt: str + audio_uri: str + image_uri: str | None = None + model: str = "fast" + resolution: str = "1080p" + duration: float = 5 + fps: float = 24 + aspect_ratio: str = "16:9" + enhance_prompt: bool = False + + +class PrivateRetakeRequest(BaseModel): + model_config = ConfigDict(extra="ignore") + + video_uri: str + start_time: float + duration: float + prompt: str = "" + mode: Literal["replace_audio_and_video", "replace_video", "replace_audio"] = "replace_audio_and_video" + + +def _uploads_dir(handler: AppHandler) -> Path: + path = handler.config.app_data_dir / "private-api-uploads" + path.mkdir(parents=True, exist_ok=True) + return path + + +def _storage_uri(upload_id: str) -> str: + return f"private-upload://{upload_id}" + + +def _public_base_url(request: Request) -> str: + configured = os.environ.get("RUNPOD_PUBLIC_BASE_URL", "").strip().rstrip("/") + if configured: + return configured + + proto = request.headers.get("x-forwarded-proto") + host = request.headers.get("x-forwarded-host") or request.headers.get("host") + if proto and host: + return f"{proto}://{host}".rstrip("/") + + return str(request.base_url).rstrip("/") + + +def _public_upload_url(request: Request, upload_id: str) -> str: + return f"{_public_base_url(request)}/v1/upload/{upload_id}" + + +def _upload_id_from_uri(storage_uri: str) -> str: + prefix = "private-upload://" + if not storage_uri.startswith(prefix): + raise HTTPError(400, "INVALID_PRIVATE_STORAGE_URI") + upload_id = storage_uri[len(prefix):] + if not upload_id or "/" in upload_id or "\\" in upload_id: + raise HTTPError(400, "INVALID_PRIVATE_STORAGE_URI") + return upload_id + + +def _resolve_uploaded_path(handler: AppHandler, storage_uri: str) -> Path: + upload_id = _upload_id_from_uri(storage_uri) + path = _uploads_dir(handler) / upload_id + if not path.exists(): + raise HTTPError(400, "PRIVATE_UPLOAD_NOT_FOUND") + return path + + +def _extension_for_content_type(content_type: str) -> str: + media_type = content_type.split(";", 1)[0].strip().lower() + if media_type == "video/mp4": + return ".mp4" + if media_type in ("image/jpeg", "image/jpg"): + return ".jpg" + if media_type == "image/png": + return ".png" + if media_type in ("audio/wav", "audio/x-wav"): + return ".wav" + if media_type == "audio/mpeg": + return ".mp3" + return mimetypes.guess_extension(media_type) or ".bin" + + +def _safe_int(value: float, allowed: set[int], default: int) -> int: + candidate = int(round(value)) + return candidate if candidate in allowed else default + + +def _local_model(model: str) -> LTXVideoGenPipeline: + if model in ("fast", "ltx-2-3-fast", "ltx-2.3-22b-distilled-1.1"): + return "fast" + if model in ("fast_legacy", "ltx-2.3-22b-distilled"): + return "fast_legacy" + if model in ("pro", "ltx-2-3-pro"): + raise HTTPError(422, "RUNPOD_PRIVATE_API_PRO_MODEL_NOT_AVAILABLE") + raise HTTPError(422, "RUNPOD_PRIVATE_API_MODEL_NOT_AVAILABLE") + + +def _local_model_id(pipeline: LTXVideoGenPipeline) -> LTXLocalModelId: + model_id = get_ltx_model_id_for_pipeline(pipeline) + if model_id is None: + raise HTTPError(422, "RUNPOD_PRIVATE_API_MODEL_NOT_AVAILABLE") + return model_id + + +def _required_private_generation_cp_ids(model_id: LTXLocalModelId) -> set[ModelCheckpointID]: + spec = get_ltx_model_spec(model_id) + return {spec.model_cp, spec.upscale_cp, spec.text_encoder_cp} + + +def _ensure_private_model_downloaded(handler: AppHandler, model_id: LTXLocalModelId) -> None: + missing: set[ModelCheckpointID] = { + cp_id + for cp_id in _required_private_generation_cp_ids(model_id) + if not is_cp_downloaded(handler.config.default_models_dir, cp_id) + } + if not missing: + return + try: + handler.downloads.download_missing_sync(missing) + except HTTPError: + raise + except Exception as exc: + raise HTTPError(500, f"RUNPOD_PRIVATE_MODEL_DOWNLOAD_FAILED: {exc}") from exc + + +def _duration(value: float) -> LTXVideoGenDuration: + return cast(LTXVideoGenDuration, _safe_int(value, {5, 6, 8, 10, 12, 14, 16, 18, 20}, 5)) + + +def _fps(value: float) -> LTXVideoGenFps: + return cast(LTXVideoGenFps, _safe_int(value, {24, 25, 48, 50}, 24)) + + +def _camera_motion(value: str) -> VideoCameraMotion: + allowed: set[str] = { + "none", + "dolly_in", + "dolly_out", + "dolly_left", + "dolly_right", + "jib_up", + "jib_down", + "static", + "focus_shift", + } + if value not in allowed: + raise HTTPError(422, "RUNPOD_PRIVATE_API_CAMERA_MOTION_NOT_AVAILABLE") + return cast(VideoCameraMotion, value) + + +def _local_resolution(resolution: str) -> LTXVideoGenResolution: + if resolution in ("540p", "720p", "1080p"): + return cast(LTXVideoGenResolution, resolution) + pixel_map = { + "960x544": "540p", + "544x960": "540p", + "1280x704": "720p", + "704x1280": "720p", + "1920x1088": "1080p", + "1088x1920": "1080p", + "1920x1080": "1080p", + "1080x1920": "1080p", + } + mapped = pixel_map.get(resolution) + if mapped is None: + raise HTTPError(422, "RUNPOD_PRIVATE_API_RESOLUTION_NOT_AVAILABLE") + return cast(LTXVideoGenResolution, mapped) + + +def _aspect_ratio(value: str) -> Literal["16:9", "9:16"]: + if value in ("16:9", "9:16"): + return value # type: ignore[return-value] + raise HTTPError(422, "RUNPOD_PRIVATE_API_ASPECT_RATIO_NOT_AVAILABLE") + + +def _video_response(video_path: str) -> Response: + data = Path(video_path).read_bytes() + return Response(content=data, media_type="video/mp4") + + +def _generate_video(handler: AppHandler, req: GenerateVideoRequest) -> Response: + result = handler.video_generation.generate(req) + if result.status == "cancelled": + raise HTTPError(499, "Generation was cancelled") + return _video_response(result.video_path) + + +@router.post("/upload", response_model=PrivateUploadInitResponse) +def route_private_upload_init( + request: Request, + handler: AppHandler = Depends(get_state_service), +) -> PrivateUploadInitResponse: + upload_id = f"{uuid.uuid4().hex}.bin" + upload_url = _public_upload_url(request, upload_id) + required_headers: dict[str, str] = {} + auth_header = request.headers.get("authorization") + if auth_header: + required_headers["Authorization"] = auth_header + return PrivateUploadInitResponse( + upload_url=upload_url, + storage_uri=_storage_uri(upload_id), + required_headers=required_headers, + ) + + +@router.put("/upload/{upload_id}", name="route_private_upload_put") +async def route_private_upload_put( + upload_id: str, + request: Request, + handler: AppHandler = Depends(get_state_service), +) -> dict[str, str]: + if not upload_id.endswith(".bin") or "/" in upload_id or "\\" in upload_id: + raise HTTPError(400, "INVALID_PRIVATE_UPLOAD_ID") + + content_type = request.headers.get("content-type", "application/octet-stream") + final_id = f"{upload_id[:-4]}{_extension_for_content_type(content_type)}" + body = await request.body() + if not body: + raise HTTPError(400, "EMPTY_PRIVATE_UPLOAD") + + path = _uploads_dir(handler) / final_id + path.write_bytes(body) + return {"storage_uri": _storage_uri(final_id)} + + +@router.post("/text-to-video") +def route_private_text_to_video( + payload: PrivateTextToVideoRequest, + handler: AppHandler = Depends(get_state_service), +) -> Response: + model = _local_model(payload.model) + _ensure_private_model_downloaded(handler, _local_model_id(model)) + req = GenerateVideoRequest( + prompt=payload.prompt, + model=model, + resolution=_local_resolution(payload.resolution), + duration=_duration(payload.duration), + fps=_fps(payload.fps), + audio=payload.generate_audio, + cameraMotion=_camera_motion(payload.camera_motion), + aspectRatio=_aspect_ratio(payload.aspect_ratio), + enhancePrompt=payload.enhance_prompt, + ) + return _generate_video(handler, req) + + +@router.post("/image-to-video") +def route_private_image_to_video( + payload: PrivateImageToVideoRequest, + handler: AppHandler = Depends(get_state_service), +) -> Response: + model = _local_model(payload.model) + _ensure_private_model_downloaded(handler, _local_model_id(model)) + image_path = _resolve_uploaded_path(handler, payload.image_uri) + req = GenerateVideoRequest( + prompt=payload.prompt, + model=model, + resolution=_local_resolution(payload.resolution), + duration=_duration(payload.duration), + fps=_fps(payload.fps), + audio=payload.generate_audio, + imagePath=str(image_path), + cameraMotion=_camera_motion(payload.camera_motion), + aspectRatio=_aspect_ratio(payload.aspect_ratio), + enhancePrompt=payload.enhance_prompt, + ) + return _generate_video(handler, req) + + +@router.post("/audio-to-video") +def route_private_audio_to_video( + payload: PrivateAudioToVideoRequest, + handler: AppHandler = Depends(get_state_service), +) -> Response: + model = _local_model(payload.model) + _ensure_private_model_downloaded(handler, _local_model_id(model)) + audio_path = _resolve_uploaded_path(handler, payload.audio_uri) + image_path = _resolve_uploaded_path(handler, payload.image_uri) if payload.image_uri else None + req = GenerateVideoRequest( + prompt=payload.prompt, + model=model, + resolution=_local_resolution(payload.resolution), + duration=_duration(payload.duration), + fps=_fps(payload.fps), + audioPath=str(audio_path), + imagePath=str(image_path) if image_path else None, + aspectRatio=_aspect_ratio(payload.aspect_ratio), + enhancePrompt=payload.enhance_prompt, + ) + return _generate_video(handler, req) + + +@router.post("/retake") +def route_private_retake( + payload: PrivateRetakeRequest, + handler: AppHandler = Depends(get_state_service), +) -> Response: + video_path = _resolve_uploaded_path(handler, payload.video_uri) + result = handler.retake.run( + RetakeRequest( + video_path=str(video_path), + start_time=payload.start_time, + duration=payload.duration, + prompt=payload.prompt, + mode=payload.mode, + ) + ) + if result.status == "cancelled": + raise HTTPError(499, "Retake was cancelled") + if isinstance(result, RetakeVideoResponse): + return _video_response(result.video_path) + raise HTTPError(500, "Private retake returned no video output") diff --git a/backend/api_model_specs.py b/backend/api_model_specs.py index a7408cf8c..07e84d295 100644 --- a/backend/api_model_specs.py +++ b/backend/api_model_specs.py @@ -13,7 +13,7 @@ LTXVideoGenPipeline, LTXVideoGenResolution, ) -from runtime_config.model_download_specs import get_latest_ltx_model_id, get_ltx_model_spec +from runtime_config.model_download_specs import ALL_LTX_LOCAL_MODEL_IDS, LTXLocalModelRelevant, get_ltx_model_spec def _resolution_spec( *, @@ -125,8 +125,14 @@ def _pairs_to_items( def get_local_video_generation_model_specs() -> list[LTXVideoGenerationModelSpecItem]: - local_model_spec = get_ltx_model_spec(get_latest_ltx_model_id()) - return _pairs_to_items(local_model_spec.supported_pipelines) + items: list[LTXVideoGenerationModelSpecItem] = [] + ordered_model_ids = sorted( + ALL_LTX_LOCAL_MODEL_IDS, + key=lambda model_id: 0 if isinstance(get_ltx_model_spec(model_id).relevance, LTXLocalModelRelevant) else 1, + ) + for model_id in ordered_model_ids: + items.extend(_pairs_to_items(get_ltx_model_spec(model_id).supported_pipelines)) + return items def get_api_video_generation_model_specs() -> list[LTXVideoGenerationModelSpecItem]: diff --git a/backend/api_types.py b/backend/api_types.py index 31bff4666..0129fe957 100644 --- a/backend/api_types.py +++ b/backend/api_types.py @@ -10,7 +10,9 @@ NonEmptyPrompt = Annotated[str, StringConstraints(strip_whitespace=True, min_length=1)] ModelCheckpointID = Literal[ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -18,7 +20,7 @@ "gemma-3-12b-it-qat-q4_0-unquantized", "z-image-turbo", ] -LTXLocalModelId = Literal["ltx-2.3-22b-distilled"] +LTXLocalModelId = Literal["ltx-2.3-22b-distilled", "ltx-2.3-22b-distilled-1.1"] class ImageConditioningInput(NamedTuple): @@ -281,7 +283,7 @@ class LtxInsufficientFundsErrorResponse(BaseModel): LTXVideoGenResolution: TypeAlias = Literal["540p", "720p", "1080p", "1440p", "2160p"] LTXVideoGenDuration: TypeAlias = Literal[5, 6, 8, 10, 12, 14, 16, 18, 20] LTXVideoGenFps: TypeAlias = Literal[24, 25, 48, 50] -LTXVideoGenPipeline: TypeAlias = Literal["fast", "pro"] +LTXVideoGenPipeline: TypeAlias = Literal["fast", "fast_legacy", "pro"] class LTXVideoGenerationResolutionSpec(BaseModel): @@ -318,6 +320,7 @@ class GenerateVideoRequest(BaseModel): imagePath: str | None = None audioPath: str | None = None aspectRatio: Literal["16:9", "9:16"] = "16:9" + enhancePrompt: bool | None = None class GenerateImageRequest(BaseModel): diff --git a/backend/app_factory.py b/backend/app_factory.py index e1bdfa212..388593aa2 100644 --- a/backend/app_factory.py +++ b/backend/app_factory.py @@ -21,6 +21,7 @@ from _routes.ic_lora import router as ic_lora_router from _routes.image_gen import router as image_gen_router from _routes.models import router as models_router +from _routes.private_ltx_api import router as private_ltx_api_router from _routes.suggest_gap_prompt import router as suggest_gap_prompt_router from _routes.retake import router as retake_router from _routes.runtime_policy import router as runtime_policy_router @@ -161,5 +162,6 @@ async def _route_generic_error_handler(request: Request, exc: Exception) -> JSON app.include_router(ic_lora_router) app.include_router(runtime_policy_router) app.include_router(hf_auth_router) + app.include_router(private_ltx_api_router) return app diff --git a/backend/app_handler.py b/backend/app_handler.py index a845e737f..4a82245ea 100644 --- a/backend/app_handler.py +++ b/backend/app_handler.py @@ -2,8 +2,10 @@ from __future__ import annotations +import os import threading from dataclasses import dataclass +from typing import Any from state.app_settings import AppSettings from handlers import ( @@ -248,7 +250,6 @@ def build_default_service_bundle(config: RuntimeConfig) -> ServiceBundle: from services.a2v_pipeline.ltx_a2v_pipeline import LTXa2vPipeline from services.depth_processor_pipeline.midas_dpt_pipeline import MidasDPTPipeline from services.ic_lora_pipeline.ltx_ic_lora_pipeline import LTXIcLoraPipeline - from services.image_generation_pipeline.zit_image_generation_pipeline import ZitImageGenerationPipeline from services.ltx_api_client.ltx_api_client_impl import LTXAPIClientImpl from services.model_downloader.hugging_face_downloader import HuggingFaceDownloader from services.retake_pipeline.ltx_retake_pipeline import LTXRetakePipeline @@ -257,6 +258,25 @@ def build_default_service_bundle(config: RuntimeConfig) -> ServiceBundle: from services.text_encoder.ltx_text_encoder import LTXTextEncoder from services.video_processor.video_processor_impl import VideoProcessorImpl + if os.environ.get("LTX_VIDEO_ONLY_SERVER") == "1": + class VideoOnlyImageGenerationPipeline: + @staticmethod + def create(model_path: str, device: str | None = None) -> "VideoOnlyImageGenerationPipeline": + del model_path, device + return VideoOnlyImageGenerationPipeline() + + def generate(self, *args: object, **kwargs: object) -> Any: + del args, kwargs + raise RuntimeError("Image generation is disabled on this video-only private server.") + + def to(self, device: str) -> None: + del device + + image_generation_pipeline_class: type[ImageGenerationPipeline] = VideoOnlyImageGenerationPipeline + else: + from services.image_generation_pipeline.zit_image_generation_pipeline import ZitImageGenerationPipeline + image_generation_pipeline_class = ZitImageGenerationPipeline + http = HTTPClientImpl() return ServiceBundle( @@ -274,7 +294,7 @@ def build_default_service_bundle(config: RuntimeConfig) -> ServiceBundle: ltx_api_client=LTXAPIClientImpl(http=http, ltx_api_base_url=config.ltx_api_base_url), zit_api_client=ZitAPIClientImpl(http=http), fast_video_pipeline_class=LTXFastVideoPipeline, - image_generation_pipeline_class=ZitImageGenerationPipeline, + image_generation_pipeline_class=image_generation_pipeline_class, ic_lora_pipeline_class=LTXIcLoraPipeline, depth_processor_pipeline_class=MidasDPTPipeline, pose_processor_pipeline_class=DWPosePipeline, diff --git a/backend/handlers/download_handler.py b/backend/handlers/download_handler.py index 814b3dccd..e86d73367 100644 --- a/backend/handlers/download_handler.py +++ b/backend/handlers/download_handler.py @@ -328,6 +328,23 @@ def start_model_download(self, *, download_type: str, cp_ids: set[ModelCheckpoin ) return session_id + def download_missing_sync(self, cp_ids: set[ModelCheckpointID]) -> DownloadSessionId: + if self.config.force_api_generations: + raise HTTPError(409, "LOCAL_MODEL_DOWNLOADS_DISABLED_IN_FORCE_API_MODE") + + with self._lock: + if self.state.downloading_session is not None: + raise HTTPError(409, "DOWNLOAD_ALREADY_RUNNING") + + ordered_cp_ids = self._discover_download_cp_ids(cp_ids) + session_id = self.start_download(set(ordered_cp_ids)) + try: + self._download_worker(ordered_cp_ids, atomic_commit=False) + except Exception as exc: + self.fail_download(str(exc)) + raise + return session_id + def check_model_access(self, cp_ids: set[ModelCheckpointID]) -> CheckModelAccessResponse: repo_ids = {get_model_cp_spec(cp_id).repo_id for cp_id in cp_ids} diff --git a/backend/handlers/pipelines_handler.py b/backend/handlers/pipelines_handler.py index 2e2aa2669..2bdfec226 100644 --- a/backend/handlers/pipelines_handler.py +++ b/backend/handlers/pipelines_handler.py @@ -14,6 +14,7 @@ IMG_GEN_MODEL_CP_ID, get_downloaded_ltx_model_id, get_existing_cp_path, + get_ltx_model_id_for_pipeline, get_ltx_model_spec, ) from runtime_config.runtime_policy import streaming_prefetch_count_for_mode @@ -82,10 +83,10 @@ def _ensure_no_running_generation(self) -> None: case _: return - def _pipeline_matches_model_type(self, model_type: VideoPipelineModelType) -> bool: + def _pipeline_matches_model_type(self, model_type: VideoPipelineModelType, model_id: LTXLocalModelId) -> bool: match self.state.gpu_slot: - case GpuSlot(active_pipeline=VideoPipelineState(pipeline=pipeline)): - return pipeline.pipeline_kind == model_type + case GpuSlot(active_pipeline=VideoPipelineState(pipeline=pipeline, model_id=active_model_id)): + return pipeline.pipeline_kind == "fast" and active_model_id == model_id case _: return False @@ -105,12 +106,23 @@ def _install_text_patches_if_needed(self) -> None: return te.service.install_patches(lambda: self.state) - def _require_downloaded_ltx_model_id(self) -> LTXLocalModelId: - model_id = get_downloaded_ltx_model_id(self.models_dir) + def _model_id_for_video_pipeline(self, model_type: VideoPipelineModelType) -> LTXLocalModelId: + model_id = get_ltx_model_id_for_pipeline(model_type) if model_id is None: - raise HTTPError(409, "NO_DOWNLOADED_LTX_MODEL") + raise HTTPError(409, f"UNSUPPORTED_LOCAL_VIDEO_PIPELINE:{model_type}") return model_id + def _require_downloaded_ltx_model_id(self, model_id: LTXLocalModelId | None = None) -> LTXLocalModelId: + resolved_model_id = model_id or get_downloaded_ltx_model_id(self.models_dir) + if resolved_model_id is None: + raise HTTPError(409, "NO_DOWNLOADED_LTX_MODEL") + model_cp = get_ltx_model_spec(resolved_model_id).model_cp + try: + get_existing_cp_path(self.models_dir, model_cp) + except FileNotFoundError as exc: + raise HTTPError(409, "NO_DOWNLOADED_LTX_MODEL") from exc + return resolved_model_id + def _compile_if_enabled(self, state: VideoPipelineState) -> VideoPipelineState: if not self.state.app_settings.use_torch_compile: return state @@ -129,7 +141,7 @@ def _compile_if_enabled(self, state: VideoPipelineState) -> VideoPipelineState: def _create_video_pipeline(self, model_type: VideoPipelineModelType) -> VideoPipelineState: gemma_root = self._text_handler.resolve_gemma_root() - model_id = self._require_downloaded_ltx_model_id() + model_id = self._require_downloaded_ltx_model_id(self._model_id_for_video_pipeline(model_type)) spec = get_ltx_model_spec(model_id) checkpoint_path = str(get_existing_cp_path(self.models_dir, spec.model_cp)) upsampler_path = str(get_existing_cp_path(self.models_dir, spec.upscale_cp)) @@ -144,6 +156,7 @@ def _create_video_pipeline(self, model_type: VideoPipelineModelType) -> VideoPip state = VideoPipelineState( pipeline=pipeline, + model_id=model_id, is_compiled=False, ) return self._compile_if_enabled(state) @@ -238,10 +251,11 @@ def _evict_gpu_pipeline_for_swap(self) -> None: def load_gpu_pipeline(self, model_type: VideoPipelineModelType) -> VideoPipelineState: self._install_text_patches_if_needed() + model_id = self._model_id_for_video_pipeline(model_type) state: VideoPipelineState | None = None with self._lock: - if self._pipeline_matches_model_type(model_type): + if self._pipeline_matches_model_type(model_type, model_id): match self.state.gpu_slot: case GpuSlot(active_pipeline=VideoPipelineState() as existing_state): state = existing_state @@ -304,18 +318,19 @@ def load_ic_lora( self._assert_invariants() return state - def load_a2v_pipeline(self) -> A2VPipelineState: + def load_a2v_pipeline(self, model_type: VideoPipelineModelType) -> A2VPipelineState: self._install_text_patches_if_needed() + model_id = self._model_id_for_video_pipeline(model_type) with self._lock: match self.state.gpu_slot: - case GpuSlot(active_pipeline=A2VPipelineState() as state): + case GpuSlot(active_pipeline=A2VPipelineState(model_id=current_model_id) as state) if current_model_id == model_id: return state case _: pass self._evict_gpu_pipeline_for_swap() - model_id = self._require_downloaded_ltx_model_id() + model_id = self._require_downloaded_ltx_model_id(model_id) model_spec = get_ltx_model_spec(model_id) pipeline = self._a2v_pipeline_class.create( @@ -325,7 +340,7 @@ def load_a2v_pipeline(self) -> A2VPipelineState: self.config.device, streaming_prefetch_count_for_mode(self.config.local_generations_mode), ) - state = A2VPipelineState(pipeline=pipeline) + state = A2VPipelineState(pipeline=pipeline, model_id=model_id) with self._lock: self.state.gpu_slot = GpuSlot(active_pipeline=state) diff --git a/backend/handlers/retake_handler.py b/backend/handlers/retake_handler.py index f3edc5c9f..3960ba0b3 100644 --- a/backend/handlers/retake_handler.py +++ b/backend/handlers/retake_handler.py @@ -25,7 +25,7 @@ from services.ltx_api_client.ltx_api_client import LTXAPIClientError from services.interfaces import LTXAPIClient from state.app_state_types import AppState -from state.app_settings import should_video_generate_with_ltx_api +from state.app_settings import VideoGenerationProvider, resolve_video_generation_provider class RetakeHandler(StateHandlerBase): @@ -61,16 +61,18 @@ def run(self, req: RetakeRequest) -> RetakeResponse: if not video_file.exists(): raise HTTPError(400, f"Video file not found: {video_path}") - if should_video_generate_with_ltx_api( + provider = resolve_video_generation_provider( force_api_generations=self.config.force_api_generations, settings=self.state.app_settings, - ): + ) + if provider != "local": return self._run_api_retake( video_file=video_file, start_time=start_time, duration=duration, prompt=prompt, mode=mode, + provider=provider, ) return self._run_local_retake( @@ -89,10 +91,9 @@ def _run_api_retake( duration: float, prompt: str, mode: RetakeMode, + provider: VideoGenerationProvider, ) -> RetakeResponse: - api_key = self.state.app_settings.ltx_api_key - if not api_key: - raise HTTPError(400, "LTX API key not configured. Set it in Settings.") + api_key, base_url = self._resolve_remote_api_credentials(provider) try: result = self._ltx_api_client.retake( @@ -102,6 +103,7 @@ def _run_api_retake( duration=duration, prompt=prompt, mode=mode, + base_url=base_url, ) except LTXAPIClientError as exc: raise HTTPError(exc.status_code, exc.detail) from exc @@ -117,6 +119,25 @@ def _run_api_retake( raise HTTPError(500, "Retake API returned no result") + def _resolve_remote_api_credentials(self, provider: VideoGenerationProvider) -> tuple[str, str | None]: + settings = self.state.app_settings + if provider == "ltx_api": + api_key = settings.ltx_api_key.strip() + if not api_key: + raise HTTPError(400, "LTX API key not configured. Set it in Settings.") + return api_key, None + + if provider == "runpod": + api_key = settings.runpod_api_token.strip() + base_url = settings.runpod_api_url.strip().rstrip("/") + if not base_url: + raise HTTPError(400, "RUNPOD_API_URL_REQUIRED") + if not api_key: + raise HTTPError(400, "RUNPOD_API_TOKEN_REQUIRED") + return api_key, base_url + + raise HTTPError(500, "INVALID_REMOTE_PROVIDER_CONFIG") + def _run_local_retake( self, *, diff --git a/backend/handlers/settings_handler.py b/backend/handlers/settings_handler.py index 6da5c1557..7e7c04f55 100644 --- a/backend/handlers/settings_handler.py +++ b/backend/handlers/settings_handler.py @@ -66,7 +66,7 @@ def get_settings_snapshot(self) -> AppSettings: def update_settings(self, patch: UpdateSettingsRequest) -> tuple[AppSettings, AppSettings, set[str]]: patch_payload = strip_none_values(ensure_json_object(patch.model_dump(by_alias=False, exclude_unset=True))) - for key_field in ("ltx_api_key", "gemini_api_key", "fal_api_key"): + for key_field in ("ltx_api_key", "gemini_api_key", "fal_api_key", "runpod_api_token"): if key_field in patch_payload and patch_payload[key_field] == "": del patch_payload[key_field] diff --git a/backend/handlers/video_generation_handler.py b/backend/handlers/video_generation_handler.py index d8faff5c9..ba059531f 100644 --- a/backend/handlers/video_generation_handler.py +++ b/backend/handlers/video_generation_handler.py @@ -21,6 +21,8 @@ GenerateVideoRequest, GenerateVideoResponse, ImageConditioningInput, + ModelCheckpointID, + LTXVideoGenPipeline, VideoCameraMotion, ) from _routes._errors import HTTPError @@ -32,6 +34,7 @@ from handlers.generation_handler import GenerationHandler from handlers.pipelines_handler import PipelinesHandler from handlers.text_handler import TextHandler +from runtime_config.model_download_specs import get_ltx_model_id_for_pipeline, get_ltx_model_spec from server_utils.media_validation import ( normalize_optional_path, validate_audio_file, @@ -40,7 +43,7 @@ from services.interfaces import LTXAPIClient from services.ltx_api_client.ltx_api_client import LTXAPIClientError from state.app_state_types import AppState -from state.app_settings import should_video_generate_with_ltx_api +from state.app_settings import VideoGenerationProvider, resolve_video_generation_provider if TYPE_CHECKING: from runtime_config.runtime_config import RuntimeConfig @@ -81,16 +84,17 @@ def get_model_specs(self) -> GenerateVideoModelsSpecsResponse: return build_generate_video_model_specs_response() def generate(self, req: GenerateVideoRequest) -> GenerateVideoResponse: - use_api_specs = should_video_generate_with_ltx_api( + provider = resolve_video_generation_provider( force_api_generations=self.config.force_api_generations, settings=self.state.app_settings, ) + use_api_specs = provider == "ltx_api" validation_error = validate_generate_video_request(req, use_api_specs=use_api_specs) if validation_error is not None: raise HTTPError(422, validation_error, code="INVALID_VIDEO_GENERATION_SPEC") - if use_api_specs: - return self._generate_forced_api(req) + if provider != "local": + return self._generate_remote_api(req, provider=provider) if self._generation.is_generation_running(): raise HTTPError(409, "Generation already in progress") @@ -139,11 +143,12 @@ def get_9_16_size(res: str) -> tuple[int, int]: seed = self._resolve_seed() try: - self._pipelines.load_gpu_pipeline("fast") + self._pipelines.load_gpu_pipeline(req.model) self._generation.start_generation(generation_id) output_path = self.generate_video( prompt=req.prompt, + model=req.model, image=image, height=height, width=width, @@ -152,6 +157,7 @@ def get_9_16_size(res: str) -> tuple[int, int]: seed=seed, camera_motion=req.cameraMotion, negative_prompt=req.negativePrompt, + requested_enhance_prompt=req.enhancePrompt, ) self._generation.complete_generation(output_path) @@ -171,6 +177,7 @@ def get_9_16_size(res: str) -> tuple[int, int]: def generate_video( self, prompt: str, + model: LTXVideoGenPipeline, image: Image.Image | None, height: int, width: int, @@ -179,10 +186,11 @@ def generate_video( seed: int, camera_motion: VideoCameraMotion, negative_prompt: str, + requested_enhance_prompt: bool | None, ) -> str: t_total_start = time.perf_counter() gen_mode = "i2v" if image is not None else "t2v" - logger.info("[%s] Generation started (model=fast, %dx%d, %d frames, %d fps)", gen_mode, width, height, num_frames, int(fps)) + logger.info("[%s] Generation started (model=%s, %dx%d, %d frames, %d fps)", gen_mode, model, width, height, num_frames, int(fps)) if self._generation.is_generation_cancelled(): raise RuntimeError("Generation was cancelled") @@ -191,7 +199,7 @@ def generate_video( self._generation.update_progress("loading_model", 5, 0, total_steps) t_load_start = time.perf_counter() - pipeline_state = self._pipelines.load_gpu_pipeline("fast") + pipeline_state = self._pipelines.load_gpu_pipeline(model) t_load_end = time.perf_counter() logger.info("[%s] Pipeline load: %.2fs", gen_mode, t_load_end - t_load_start) @@ -211,10 +219,12 @@ def generate_video( try: settings = self.state.app_settings use_api_encoding = not self._text.should_use_local_encoding() - if image is not None: - enhance = use_api_encoding and settings.prompt_enhancer_enabled_i2v + if requested_enhance_prompt is not None: + enhance = requested_enhance_prompt + elif image is not None: + enhance = settings.prompt_enhancer_enabled_i2v else: - enhance = use_api_encoding and settings.prompt_enhancer_enabled_t2v + enhance = settings.prompt_enhancer_enabled_t2v encoding_method = "api" if use_api_encoding else "local" t_text_start = time.perf_counter() @@ -237,6 +247,7 @@ def generate_video( frame_rate=fps, images=images, output_path=str(output_path), + enhance_prompt=enhance, ) t_inference_end = time.perf_counter() logger.info("[%s] Inference: %.2fs", gen_mode, t_inference_end - t_inference_start) @@ -289,7 +300,7 @@ def _generate_a2v( generation_id = self._make_generation_id() try: - a2v_state = self._pipelines.load_a2v_pipeline() + a2v_state = self._pipelines.load_a2v_pipeline(req.model) self._generation.start_generation(generation_id) enhanced_prompt = req.prompt + self.config.camera_motion_prompts.get(req.cameraMotion, "") @@ -306,11 +317,12 @@ def _generate_a2v( total_steps = 11 # distilled: 8 steps (stage 1) + 3 steps (stage 2) a2v_settings = self.state.app_settings - a2v_use_api = not self._text.should_use_local_encoding() - if image is not None: - a2v_enhance = a2v_use_api and a2v_settings.prompt_enhancer_enabled_i2v + if req.enhancePrompt is not None: + a2v_enhance = req.enhancePrompt + elif image is not None: + a2v_enhance = a2v_settings.prompt_enhancer_enabled_i2v else: - a2v_enhance = a2v_use_api and a2v_settings.prompt_enhancer_enabled_t2v + a2v_enhance = a2v_settings.prompt_enhancer_enabled_t2v self._generation.update_progress("loading_model", 5, 0, total_steps) self._generation.update_progress("encoding_text", 10, 0, total_steps) @@ -331,6 +343,7 @@ def _generate_a2v( audio_start_time=0.0, audio_max_duration=None, output_path=str(output_path), + enhance_prompt=a2v_enhance, ) if self._generation.is_generation_cancelled(): @@ -398,7 +411,7 @@ def _make_output_path(self) -> Path: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") return self.config.outputs_dir / f"ltx2_video_{timestamp}_{self._make_generation_id()}.mp4" - def _generate_forced_api(self, req: GenerateVideoRequest) -> GenerateVideoResponse: + def _generate_remote_api(self, req: GenerateVideoRequest, *, provider: VideoGenerationProvider) -> GenerateVideoResponse: if self._generation.is_generation_running(): raise HTTPError(409, "Generation already in progress") @@ -413,28 +426,22 @@ def _generate_forced_api(self, req: GenerateVideoRequest) -> GenerateVideoRespon try: self._generation.update_progress("validating_request", 5, None, None) - api_key = self.state.app_settings.ltx_api_key.strip() - logger.info("Forced API generation route selected (key_present=%s)", bool(api_key)) - if not api_key: - raise HTTPError(400, "PRO_API_KEY_REQUIRED") + api_key, base_url = self._resolve_remote_api_credentials(provider) + logger.info("Remote API generation route selected (provider=%s base_url=%s)", provider, base_url or self.config.ltx_api_base_url) requested_model = req.model - api_model_id = FORCED_API_MODEL_MAP.get(requested_model) - if api_model_id is None: - raise HTTPError(500, "INVALID_FORCED_API_MODEL_CONFIG") - - resolution_label = req.resolution - resolution_by_aspect = FORCED_API_RESOLUTION_MAP.get(resolution_label) - if resolution_by_aspect is None: - raise HTTPError(500, "INVALID_FORCED_API_RESOLUTION_CONFIG") - - aspect_ratio = req.aspectRatio - if aspect_ratio not in FORCED_API_ALLOWED_ASPECT_RATIOS: - raise HTTPError(400, "INVALID_FORCED_API_ASPECT_RATIO") - - api_resolution = resolution_by_aspect[aspect_ratio] + api_model_id = self._map_remote_api_model(provider, requested_model) + api_resolution = self._map_remote_api_resolution(provider, req.resolution, req.aspectRatio) prompt = req.prompt + runpod_enhance_prompt = self._resolve_runpod_enhance_prompt(req) if provider == "runpod" else None + if provider == "runpod": + self._generation.update_progress("downloading_model", 10, None, None) + self._ltx_api_client.ensure_remote_model_downloaded( + api_key=api_key, + base_url=base_url or "", + cp_ids=self._runpod_required_cp_ids(requested_model), + ) if self._generation.is_generation_cancelled(): raise RuntimeError("Generation was cancelled") @@ -449,6 +456,7 @@ def _generate_forced_api(self, req: GenerateVideoRequest) -> GenerateVideoRespon audio_uri = self._ltx_api_client.upload_file( api_key=api_key, file_path=str(validated_audio_path), + base_url=base_url, ) image_uri: str | None = None if validated_image_path is not None: @@ -456,6 +464,7 @@ def _generate_forced_api(self, req: GenerateVideoRequest) -> GenerateVideoRespon image_uri = self._ltx_api_client.upload_file( api_key=api_key, file_path=str(validated_image_path), + base_url=base_url, ) self._generation.update_progress("inference", 55, None, None) video_bytes = self._ltx_api_client.generate_audio_to_video( @@ -465,6 +474,11 @@ def _generate_forced_api(self, req: GenerateVideoRequest) -> GenerateVideoRespon image_uri=image_uri, model=api_model_id, resolution=api_resolution, + duration=float(req.duration) if provider == "runpod" else None, + fps=float(req.fps) if provider == "runpod" else None, + aspect_ratio=req.aspectRatio if provider == "runpod" else None, + enhance_prompt=runpod_enhance_prompt, + base_url=base_url, ) self._generation.update_progress("downloading_output", 85, None, None) elif has_input_image: @@ -478,6 +492,7 @@ def _generate_forced_api(self, req: GenerateVideoRequest) -> GenerateVideoRespon image_uri = self._ltx_api_client.upload_file( api_key=api_key, file_path=str(validated_image_path), + base_url=base_url, ) self._generation.update_progress("inference", 55, None, None) video_bytes = self._ltx_api_client.generate_image_to_video( @@ -490,6 +505,9 @@ def _generate_forced_api(self, req: GenerateVideoRequest) -> GenerateVideoRespon fps=float(fps), generate_audio=generate_audio, camera_motion=req.cameraMotion, + aspect_ratio=req.aspectRatio if provider == "runpod" else None, + enhance_prompt=runpod_enhance_prompt, + base_url=base_url, ) self._generation.update_progress("downloading_output", 85, None, None) else: @@ -507,6 +525,9 @@ def _generate_forced_api(self, req: GenerateVideoRequest) -> GenerateVideoRespon fps=float(fps), generate_audio=generate_audio, camera_motion=req.cameraMotion, + aspect_ratio=req.aspectRatio if provider == "runpod" else None, + enhance_prompt=runpod_enhance_prompt, + base_url=base_url, ) self._generation.update_progress("downloading_output", 85, None, None) @@ -540,6 +561,61 @@ def _write_forced_api_video(self, video_bytes: bytes) -> Path: output_path.write_bytes(video_bytes) return output_path + def _resolve_remote_api_credentials(self, provider: VideoGenerationProvider) -> tuple[str, str | None]: + settings = self.state.app_settings + if provider == "ltx_api": + api_key = settings.ltx_api_key.strip() + if not api_key: + raise HTTPError(400, "PRO_API_KEY_REQUIRED") + return api_key, None + + if provider == "runpod": + api_key = settings.runpod_api_token.strip() + base_url = settings.runpod_api_url.strip().rstrip("/") + if not base_url: + raise HTTPError(400, "RUNPOD_API_URL_REQUIRED") + if not api_key: + raise HTTPError(400, "RUNPOD_API_TOKEN_REQUIRED") + return api_key, base_url + + raise HTTPError(500, "INVALID_REMOTE_PROVIDER_CONFIG") + + @staticmethod + def _map_remote_api_model(provider: VideoGenerationProvider, requested_model: str) -> str: + if provider == "runpod": + return requested_model + + api_model_id = FORCED_API_MODEL_MAP.get(requested_model) + if api_model_id is None: + raise HTTPError(500, "INVALID_FORCED_API_MODEL_CONFIG") + return api_model_id + + @staticmethod + def _map_remote_api_resolution(provider: VideoGenerationProvider, resolution_label: str, aspect_ratio: str) -> str: + if provider == "runpod": + return resolution_label + + resolution_by_aspect = FORCED_API_RESOLUTION_MAP.get(resolution_label) + if resolution_by_aspect is None: + raise HTTPError(500, "INVALID_FORCED_API_RESOLUTION_CONFIG") + if aspect_ratio not in FORCED_API_ALLOWED_ASPECT_RATIOS: + raise HTTPError(400, "INVALID_FORCED_API_ASPECT_RATIO") + return resolution_by_aspect[aspect_ratio] + + def _resolve_runpod_enhance_prompt(self, req: GenerateVideoRequest) -> bool: + if req.enhancePrompt is not None: + return req.enhancePrompt + settings = self.state.app_settings + return settings.prompt_enhancer_enabled_i2v if req.imagePath else settings.prompt_enhancer_enabled_t2v + + @staticmethod + def _runpod_required_cp_ids(requested_model: LTXVideoGenPipeline) -> set[ModelCheckpointID]: + model_id = get_ltx_model_id_for_pipeline(requested_model) + if model_id is None: + raise HTTPError(422, "RUNPOD_PRIVATE_API_MODEL_NOT_AVAILABLE") + spec = get_ltx_model_spec(model_id) + return {spec.model_cp, spec.upscale_cp, spec.text_encoder_cp} + @staticmethod def _map_ltx_api_generation_error(exc: LTXAPIClientError) -> HTTPError: if exc.status_code == 402 and exc.provider_error_type == "insufficient_funds_error": diff --git a/backend/runpod_server.py b/backend/runpod_server.py new file mode 100644 index 000000000..8cef04df5 --- /dev/null +++ b/backend/runpod_server.py @@ -0,0 +1,49 @@ +"""RunPod-facing entrypoint for the private LTX Desktop backend.""" + +from __future__ import annotations + +import asyncio +import logging +import os +from pathlib import Path + +import uvicorn + +DEFAULT_DATA_DIR = "/workspace/ltx-data" +DEFAULT_PORT = "8000" + +os.environ.setdefault("LTX_APP_DATA_DIR", DEFAULT_DATA_DIR) +os.environ.setdefault("LTX_PORT", DEFAULT_PORT) +os.environ.setdefault("LTX_VIDEO_ONLY_SERVER", "1") + +private_token = os.environ.get("RUNPOD_PRIVATE_API_TOKEN") or os.environ.get("LTX_AUTH_TOKEN") +if not private_token: + raise RuntimeError( + "RUNPOD_PRIVATE_API_TOKEN must be set. The desktop app uses this as the bearer token for your private server." + ) +os.environ["LTX_AUTH_TOKEN"] = private_token + +from ltx2_server import app, log_hardware_info # noqa: E402 + +logger = logging.getLogger(__name__) + + +if __name__ == "__main__": + data_dir = Path(os.environ["LTX_APP_DATA_DIR"]) + data_dir.mkdir(parents=True, exist_ok=True) + port = int(os.environ.get("LTX_PORT", DEFAULT_PORT)) + + logger.info("=" * 60) + logger.info("LTX Desktop Private RunPod Server") + logger.info("Data directory: %s", data_dir) + log_hardware_info() + logger.info("=" * 60) + + config = uvicorn.Config( + app, + host="0.0.0.0", + port=port, + log_level="info", + access_log=True, + ) + asyncio.run(uvicorn.Server(config).serve()) diff --git a/backend/runtime_config/model_download_specs.py b/backend/runtime_config/model_download_specs.py index 7eb924431..a6b3060cf 100644 --- a/backend/runtime_config/model_download_specs.py +++ b/backend/runtime_config/model_download_specs.py @@ -67,6 +67,29 @@ class LTXLocalModelSpec: supported_pipelines: tuple[tuple[LTXVideoGenPipeline, LTXVideoGenerationSpec], ...] +def _fast_spec(display_name: str) -> LTXVideoGenerationSpec: + return LTXVideoGenerationSpec( + display_name=display_name, + supported_resolutions_durations={ + "540p": _local_resolution_spec( + fps_to_durations={ + 24: (5, 6, 8, 10, 20), + }, + ), + "720p": _local_resolution_spec( + fps_to_durations={ + 24: (5, 6, 8, 10), + }, + ), + "1080p": _local_resolution_spec( + fps_to_durations={ + 24: (5,), + }, + ), + }, + ) + + def _local_resolution_spec( *, fps_to_durations: dict[LTXVideoGenFps, tuple[LTXVideoGenDuration, ...]], @@ -93,7 +116,15 @@ def get_model_cp_spec(cp_id: ModelCheckpointID) -> ModelCheckpointSpec: expected_size_bytes=43_000_000_000, is_folder=False, repo_id="Lightricks/LTX-2.3", - description="Main transformer model", + description="LTX 2.3 Distilled 1.0 transformer model", + ) + case "ltx-2.3-22b-distilled-1.1": + return ModelCheckpointSpec( + relative_path=Path("ltx-2.3-22b-distilled-1.1.safetensors"), + expected_size_bytes=43_000_000_000, + is_folder=False, + repo_id="Lightricks/LTX-2.3", + description="LTX 2.3 Distilled 1.1 transformer model", ) case "ltx-2.3-spatial-upscaler-x2-1.0": return ModelCheckpointSpec( @@ -101,7 +132,15 @@ def get_model_cp_spec(cp_id: ModelCheckpointID) -> ModelCheckpointSpec: expected_size_bytes=1_900_000_000, is_folder=False, repo_id="Lightricks/LTX-2.3", - description="2x upscaler", + description="2x spatial upscaler 1.0", + ) + case "ltx-2.3-spatial-upscaler-x2-1.1": + return ModelCheckpointSpec( + relative_path=Path("ltx-2.3-spatial-upscaler-x2-1.1.safetensors"), + expected_size_bytes=1_900_000_000, + is_folder=False, + repo_id="Lightricks/LTX-2.3", + description="2x spatial upscaler 1.1", ) case "ltx-2.3-22b-ic-lora-union-control-ref0.5": return ModelCheckpointSpec( @@ -167,30 +206,33 @@ def get_ltx_model_spec(model_id: LTXLocalModelId) -> LTXLocalModelSpec: canny_cp="ltx-2.3-22b-ic-lora-union-control-ref0.5", pose_cp="ltx-2.3-22b-ic-lora-union-control-ref0.5", ), - relevance=LTXLocalModelRelevant(upgrade_messages={}), + relevance=LTXLocalModelDeprecated(), + supported_pipelines=( + ( + "fast_legacy", + _fast_spec("LTX 2.3 Fast (Distilled 1.0)"), + ), + ), + ) + case "ltx-2.3-22b-distilled-1.1": + return LTXLocalModelSpec( + model_cp="ltx-2.3-22b-distilled-1.1", + upscale_cp="ltx-2.3-spatial-upscaler-x2-1.1", + text_encoder_cp="gemma-3-12b-it-qat-q4_0-unquantized", + ic_loras_spec=LtxIcLorasSpec( + depth_cp="ltx-2.3-22b-ic-lora-union-control-ref0.5", + canny_cp="ltx-2.3-22b-ic-lora-union-control-ref0.5", + pose_cp="ltx-2.3-22b-ic-lora-union-control-ref0.5", + ), + relevance=LTXLocalModelRelevant( + upgrade_messages={ + "ltx-2.3-22b-distilled": "LTX 2.3 Distilled 1.1 is available with matching updated upscaler weights.", + }, + ), supported_pipelines=( ( "fast", - LTXVideoGenerationSpec( - display_name="LTX 2.3 Fast", - supported_resolutions_durations={ - "540p": _local_resolution_spec( - fps_to_durations={ - 24: (5, 6, 8, 10, 20), - }, - ), - "720p": _local_resolution_spec( - fps_to_durations={ - 24: (5, 6, 8, 10), - }, - ), - "1080p": _local_resolution_spec( - fps_to_durations={ - 24: (5,), - }, - ), - }, - ), + _fast_spec("LTX 2.3 Fast (Distilled 1.1)"), ), ), ) @@ -222,6 +264,16 @@ def get_ltx_model_id_for_cp(cp_id: ModelCheckpointID) -> LTXLocalModelId | None: return None +def get_ltx_model_id_for_pipeline(pipeline: LTXVideoGenPipeline) -> LTXLocalModelId | None: + if pipeline == "pro": + return None + for model_id in ALL_LTX_LOCAL_MODEL_IDS: + spec = get_ltx_model_spec(model_id) + if any(candidate_pipeline == pipeline for candidate_pipeline, _ in spec.supported_pipelines): + return model_id + return None + + def get_ic_loras_cp_ids(ic_loras_spec: LtxIcLorasSpec) -> tuple[ModelCheckpointID, ...]: return tuple(dict.fromkeys((ic_loras_spec.depth_cp, ic_loras_spec.canny_cp, ic_loras_spec.pose_cp))) diff --git a/backend/services/a2v_pipeline/a2v_pipeline.py b/backend/services/a2v_pipeline/a2v_pipeline.py index 6c6ce3447..4b4244a0e 100644 --- a/backend/services/a2v_pipeline/a2v_pipeline.py +++ b/backend/services/a2v_pipeline/a2v_pipeline.py @@ -35,4 +35,5 @@ def generate( audio_start_time: float, audio_max_duration: float | None, output_path: str, + enhance_prompt: bool = False, ) -> None: ... diff --git a/backend/services/a2v_pipeline/distilled_a2v_pipeline.py b/backend/services/a2v_pipeline/distilled_a2v_pipeline.py index 44fb2066f..820e6000b 100644 --- a/backend/services/a2v_pipeline/distilled_a2v_pipeline.py +++ b/backend/services/a2v_pipeline/distilled_a2v_pipeline.py @@ -88,6 +88,7 @@ def __call__( audio_start_time: float = 0.0, audio_max_duration: float | None = None, tiling_config: TilingConfigType | None = None, + enhance_prompt: bool = False, streaming_prefetch_count: int | None = None, ) -> tuple[Iterator[torch.Tensor], AudioOrNone]: from ltx_core.components.noisers import GaussianNoiser @@ -111,7 +112,13 @@ def __call__( dtype = torch.bfloat16 # Text encode (positive only). - (ctx_p,) = self.prompt_encoder([prompt], streaming_prefetch_count=streaming_prefetch_count) + (ctx_p,) = self.prompt_encoder( + [prompt], + enhance_first_prompt=enhance_prompt, + enhance_prompt_image=ltx_images[0][0] if ltx_images else None, + enhance_prompt_seed=seed, + streaming_prefetch_count=streaming_prefetch_count, + ) video_context = ctx_p.video_encoding audio_context = ctx_p.audio_encoding assert audio_context is not None, "A2V pipeline requires audio context from text encoder" diff --git a/backend/services/a2v_pipeline/ltx_a2v_pipeline.py b/backend/services/a2v_pipeline/ltx_a2v_pipeline.py index 1077f3eb9..73d6753ec 100644 --- a/backend/services/a2v_pipeline/ltx_a2v_pipeline.py +++ b/backend/services/a2v_pipeline/ltx_a2v_pipeline.py @@ -65,6 +65,7 @@ def _run_inference( audio_start_time: float, audio_max_duration: float | None, tiling_config: TilingConfigType, + enhance_prompt: bool, ) -> tuple[torch.Tensor | Iterator[torch.Tensor], AudioOrNone]: return self.pipeline( prompt=prompt, @@ -78,6 +79,7 @@ def _run_inference( audio_start_time=audio_start_time, audio_max_duration=audio_max_duration, tiling_config=tiling_config, + enhance_prompt=enhance_prompt, streaming_prefetch_count=self._streaming_prefetch_count, ) @@ -97,6 +99,7 @@ def generate( audio_start_time: float, audio_max_duration: float | None, output_path: str, + enhance_prompt: bool = False, ) -> None: tiling_config = default_tiling_config() video, audio = self._run_inference( @@ -113,6 +116,7 @@ def generate( audio_start_time=audio_start_time, audio_max_duration=audio_max_duration, tiling_config=tiling_config, + enhance_prompt=enhance_prompt, ) chunks = video_chunks_number(num_frames, tiling_config) encode_video_output(video=video, audio=audio, fps=int(frame_rate), output_path=output_path, video_chunks_number_value=chunks) diff --git a/backend/services/fast_video_pipeline/fast_video_pipeline.py b/backend/services/fast_video_pipeline/fast_video_pipeline.py index 13a3697ad..7a800b448 100644 --- a/backend/services/fast_video_pipeline/fast_video_pipeline.py +++ b/backend/services/fast_video_pipeline/fast_video_pipeline.py @@ -33,6 +33,7 @@ def generate( frame_rate: float, images: list[ImageConditioningInput], output_path: str, + enhance_prompt: bool = False, ) -> None: ... diff --git a/backend/services/fast_video_pipeline/ltx_fast_video_pipeline.py b/backend/services/fast_video_pipeline/ltx_fast_video_pipeline.py index 50d47292c..1a93b8b11 100644 --- a/backend/services/fast_video_pipeline/ltx_fast_video_pipeline.py +++ b/backend/services/fast_video_pipeline/ltx_fast_video_pipeline.py @@ -69,6 +69,7 @@ def _run_inference( frame_rate: float, images: list[ImageConditioningInput], tiling_config: TilingConfigType, + enhance_prompt: bool, ) -> tuple[torch.Tensor | Iterator[torch.Tensor], AudioOrNone]: from ltx_pipelines.utils.args import ImageConditioningInput as _LtxImageInput @@ -81,6 +82,7 @@ def _run_inference( frame_rate=frame_rate, images=[_LtxImageInput(img.path, img.frame_idx, img.strength) for img in images], tiling_config=tiling_config, + enhance_prompt=enhance_prompt, streaming_prefetch_count=self._streaming_prefetch_count, ) @@ -95,6 +97,7 @@ def generate( frame_rate: float, images: list[ImageConditioningInput], output_path: str, + enhance_prompt: bool = False, ) -> None: tiling_config = default_tiling_config() video, audio = self._run_inference( @@ -106,6 +109,7 @@ def generate( frame_rate=frame_rate, images=images, tiling_config=tiling_config, + enhance_prompt=enhance_prompt, ) chunks = video_chunks_number(num_frames, tiling_config) encode_video_output(video=video, audio=audio, fps=int(frame_rate), output_path=output_path, video_chunks_number_value=chunks) @@ -125,6 +129,7 @@ def warmup(self, output_path: str) -> None: frame_rate=8, images=[], tiling_config=tiling_config, + enhance_prompt=False, ) chunks = video_chunks_number(warmup_frames, tiling_config) encode_video_output(video=video, audio=audio, fps=8, output_path=output_path, video_chunks_number_value=chunks) diff --git a/backend/services/interfaces.py b/backend/services/interfaces.py index 125e2c9c2..25c4c3a56 100644 --- a/backend/services/interfaces.py +++ b/backend/services/interfaces.py @@ -2,8 +2,6 @@ from __future__ import annotations -from typing import Literal - from services.a2v_pipeline.a2v_pipeline import A2VPipeline from services.depth_processor_pipeline.depth_processor_pipeline import DepthProcessorPipeline from services.fast_video_pipeline.fast_video_pipeline import FastVideoPipeline @@ -22,7 +20,9 @@ from services.text_encoder.text_encoder import TextEncoder from services.video_processor.video_processor import VideoInfoPayload, VideoProcessor -VideoPipelineModelType = Literal["fast"] +from api_types import LTXVideoGenPipeline + +VideoPipelineModelType = LTXVideoGenPipeline __all__ = [ "A2VPipeline", diff --git a/backend/services/ltx_api_client/ltx_api_client.py b/backend/services/ltx_api_client/ltx_api_client.py index 27867ff1e..1d6232a20 100644 --- a/backend/services/ltx_api_client/ltx_api_client.py +++ b/backend/services/ltx_api_client/ltx_api_client.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from typing import Any, Protocol -from api_types import RetakeMode, VideoCameraMotion +from api_types import ModelCheckpointID, RetakeMode, VideoCameraMotion @dataclass(frozen=True) @@ -35,11 +35,22 @@ def __init__( class LTXAPIClient(Protocol): + def ensure_remote_model_downloaded( + self, + *, + api_key: str, + base_url: str, + cp_ids: set[ModelCheckpointID], + poll_interval_seconds: float = 5.0, + ) -> None: + ... + def upload_file( self, *, api_key: str, file_path: str, + base_url: str | None = None, ) -> str: ... @@ -54,6 +65,9 @@ def generate_text_to_video( fps: float, generate_audio: bool, camera_motion: VideoCameraMotion = "none", + aspect_ratio: str | None = None, + enhance_prompt: bool | None = None, + base_url: str | None = None, ) -> bytes: ... @@ -69,6 +83,9 @@ def generate_image_to_video( fps: float, generate_audio: bool, camera_motion: VideoCameraMotion = "none", + aspect_ratio: str | None = None, + enhance_prompt: bool | None = None, + base_url: str | None = None, ) -> bytes: ... @@ -81,6 +98,11 @@ def generate_audio_to_video( image_uri: str | None, model: str, resolution: str, + duration: float | None = None, + fps: float | None = None, + aspect_ratio: str | None = None, + enhance_prompt: bool | None = None, + base_url: str | None = None, ) -> bytes: ... @@ -93,5 +115,6 @@ def retake( duration: float, prompt: str, mode: RetakeMode, + base_url: str | None = None, ) -> LTXRetakeResult: ... diff --git a/backend/services/ltx_api_client/ltx_api_client_impl.py b/backend/services/ltx_api_client/ltx_api_client_impl.py index 489429ee7..ada20920e 100644 --- a/backend/services/ltx_api_client/ltx_api_client_impl.py +++ b/backend/services/ltx_api_client/ltx_api_client_impl.py @@ -4,10 +4,12 @@ import json import mimetypes +import time from pathlib import Path from typing import Any, Literal, cast +from urllib.parse import urlsplit, urlunsplit -from api_types import RetakeMode, VideoCameraMotion +from api_types import ModelCheckpointID, RetakeMode, VideoCameraMotion from pydantic import BaseModel, ConfigDict, ValidationError from services.ltx_api_client.ltx_api_client import LTXAPIClientError, LTXRetakeResult from services.http_client.http_client import HTTPClient @@ -71,6 +73,87 @@ def __init__(self, http: HTTPClient, ltx_api_base_url: str) -> None: self._http = http self._base_url = ltx_api_base_url.rstrip("/") + def _resolve_base_url(self, base_url: str | None) -> str: + return (base_url or self._base_url).rstrip("/") + + def ensure_remote_model_downloaded( + self, + *, + api_key: str, + base_url: str, + cp_ids: set[ModelCheckpointID], + poll_interval_seconds: float = 5.0, + ) -> None: + if not cp_ids: + return + + resolved_base_url = self._resolve_base_url(base_url) + deadline = time.monotonic() + 1800 + while True: + cp_id_payload: list[JSONValue] = [str(cp_id) for cp_id in sorted(cp_ids)] + download_payload: dict[str, JSONValue] = {"type": "download", "cp_ids": cp_id_payload} + response = self._http.post( + f"{resolved_base_url}/api/models/download", + headers=self._json_headers(api_key), + json_payload=download_payload, + timeout=30, + ) + if response.status_code == 200: + break + if response.status_code == 409 and "DOWNLOAD_ALREADY_RUNNING" in response.text and time.monotonic() < deadline: + time.sleep(poll_interval_seconds) + continue + err = response.text[:500] if response.text else "Unknown error" + raise LTXAPIClientError( + response.status_code, + f"RunPod model download start failed ({response.status_code}): {err}{self._fmt_request_id(response)}", + stage="model_download", + ) + + try: + payload = cast(dict[str, Any], response.json()) + session_id = str(payload["sessionId"]) + except Exception as exc: + raise LTXAPIClientError( + 500, + f"Unexpected RunPod model download response format{self._fmt_request_id(response)}", + stage="model_download", + ) from exc + + while True: + progress_response = self._http.get( + f"{resolved_base_url}/api/models/download/progress?sessionId={session_id}", + headers={"Authorization": f"Bearer {api_key}"}, + timeout=30, + ) + if progress_response.status_code != 200: + err = progress_response.text[:500] if progress_response.text else "Unknown error" + raise LTXAPIClientError( + progress_response.status_code, + f"RunPod model download progress failed ({progress_response.status_code}): {err}{self._fmt_request_id(progress_response)}", + stage="model_download", + ) + + try: + progress_payload = cast(dict[str, Any], progress_response.json()) + except Exception as exc: + raise LTXAPIClientError( + 500, + f"Unexpected RunPod model download progress format{self._fmt_request_id(progress_response)}", + stage="model_download", + ) from exc + + status = progress_payload.get("status") + if status == "complete": + return + if status == "error": + error = progress_payload.get("error") + detail = error if isinstance(error, str) and error else "Unknown error" + raise LTXAPIClientError(500, f"RunPod model download failed: {detail}", stage="model_download") + if time.monotonic() >= deadline: + raise LTXAPIClientError(504, "RunPod model download timed out", stage="model_download") + time.sleep(poll_interval_seconds) + def generate_text_to_video( self, *, @@ -82,6 +165,9 @@ def generate_text_to_video( fps: float, generate_audio: bool, camera_motion: VideoCameraMotion = "none", + aspect_ratio: str | None = None, + enhance_prompt: bool | None = None, + base_url: str | None = None, ) -> bytes: payload: dict[str, JSONValue] = { "prompt": prompt, @@ -91,11 +177,15 @@ def generate_text_to_video( "fps": fps, "generate_audio": generate_audio, } + if aspect_ratio is not None: + payload["aspect_ratio"] = aspect_ratio + if enhance_prompt is not None: + payload["enhance_prompt"] = enhance_prompt mapped_camera_motion = self._map_camera_motion(camera_motion) if mapped_camera_motion is not None: payload["camera_motion"] = mapped_camera_motion response = self._http.post( - f"{self._base_url}/v1/text-to-video", + f"{self._resolve_base_url(base_url)}/v1/text-to-video", headers=self._json_headers(api_key), json_payload=payload, timeout=1200, @@ -114,6 +204,9 @@ def generate_image_to_video( fps: float, generate_audio: bool, camera_motion: VideoCameraMotion = "none", + aspect_ratio: str | None = None, + enhance_prompt: bool | None = None, + base_url: str | None = None, ) -> bytes: payload: dict[str, JSONValue] = { "prompt": prompt, @@ -124,11 +217,15 @@ def generate_image_to_video( "fps": fps, "generate_audio": generate_audio, } + if aspect_ratio is not None: + payload["aspect_ratio"] = aspect_ratio + if enhance_prompt is not None: + payload["enhance_prompt"] = enhance_prompt mapped_camera_motion = self._map_camera_motion(camera_motion) if mapped_camera_motion is not None: payload["camera_motion"] = mapped_camera_motion response = self._http.post( - f"{self._base_url}/v1/image-to-video", + f"{self._resolve_base_url(base_url)}/v1/image-to-video", headers=self._json_headers(api_key), json_payload=payload, timeout=1200, @@ -144,6 +241,11 @@ def generate_audio_to_video( image_uri: str | None, model: str, resolution: str, + duration: float | None = None, + fps: float | None = None, + aspect_ratio: str | None = None, + enhance_prompt: bool | None = None, + base_url: str | None = None, ) -> bytes: payload: dict[str, JSONValue] = { "prompt": prompt, @@ -151,10 +253,18 @@ def generate_audio_to_video( "model": model, "resolution": resolution, } + if aspect_ratio is not None: + payload["aspect_ratio"] = aspect_ratio + if enhance_prompt is not None: + payload["enhance_prompt"] = enhance_prompt + if duration is not None: + payload["duration"] = duration + if fps is not None: + payload["fps"] = fps if image_uri is not None: payload["image_uri"] = image_uri response = self._http.post( - f"{self._base_url}/v1/audio-to-video", + f"{self._resolve_base_url(base_url)}/v1/audio-to-video", headers=self._json_headers(api_key), json_payload=payload, timeout=1200, @@ -170,9 +280,10 @@ def retake( duration: float, prompt: str, mode: RetakeMode, + base_url: str | None = None, ) -> LTXRetakeResult: try: - storage_uri = self.upload_file(api_key=api_key, file_path=video_path) + storage_uri = self.upload_file(api_key=api_key, file_path=video_path, base_url=base_url) except LTXAPIClientError as exc: if exc.stage == "upload_init": err_text = self._extract_error_detail(exc.detail) @@ -194,7 +305,7 @@ def retake( payload["prompt"] = prompt response = self._http.post( - f"{self._base_url}/v1/retake", + f"{self._resolve_base_url(base_url)}/v1/retake", headers=self._json_headers(api_key), json_payload=payload, timeout=600, @@ -232,9 +343,9 @@ def retake( error_text = response.text[:500] if response.text else "Unknown error" raise LTXAPIClientError(response.status_code, f"Retake API error: {error_text}{rid}") - def upload_file(self, *, file_path: str, api_key: str) -> str: + def upload_file(self, *, file_path: str, api_key: str, base_url: str | None = None) -> str: upload_resp = self._http.post( - f"{self._base_url}/v1/upload", + f"{self._resolve_base_url(base_url)}/v1/upload", headers={"Authorization": f"Bearer {api_key}"}, timeout=30, ) @@ -260,7 +371,7 @@ def upload_file(self, *, file_path: str, api_key: str) -> str: mime = mimetypes.guess_type(path_obj.name)[0] or "application/octet-stream" with open(path_obj, "rb") as media_file: put_resp = self._http.put( - upload_url, + self._normalize_upload_url(upload_url, base_url), data=media_file, headers={"Content-Type": mime, **required_headers}, timeout=300, @@ -270,8 +381,33 @@ def upload_file(self, *, file_path: str, api_key: str) -> str: rid = self._fmt_request_id(upload_resp) raise LTXAPIClientError(500, f"LTX upload failed ({put_resp.status_code}): {err}{rid}", stage="upload_put") + try: + put_payload = cast(dict[str, Any], put_resp.json()) + final_storage_uri = put_payload.get("storage_uri") + if isinstance(final_storage_uri, str) and final_storage_uri: + return final_storage_uri + except Exception: + pass + return storage_uri + def _normalize_upload_url(self, upload_url: str, base_url: str | None) -> str: + if base_url is None: + return upload_url + + parsed_upload = urlsplit(upload_url) + if parsed_upload.path.startswith("/v1/upload/"): + parsed_base = urlsplit(self._resolve_base_url(base_url)) + return urlunsplit(( + parsed_base.scheme, + parsed_base.netloc, + parsed_upload.path, + parsed_upload.query, + parsed_upload.fragment, + )) + + return upload_url + def _extract_video_bytes(self, response: Any, api_key: str) -> bytes: rid = self._fmt_request_id(response) if response.status_code != 200: diff --git a/backend/services/ltx_pipeline_common.py b/backend/services/ltx_pipeline_common.py index 76273b3c9..62981ef4d 100644 --- a/backend/services/ltx_pipeline_common.py +++ b/backend/services/ltx_pipeline_common.py @@ -102,6 +102,7 @@ def __call__( frame_rate: float, images: list[ImageConditioningInput], tiling_config: TilingConfigType | None = None, + enhance_prompt: bool = False, ) -> tuple[torch.Tensor | Iterator[torch.Tensor], AudioOrNone]: from ltx_core.components.noisers import GaussianNoiser from ltx_pipelines.utils.args import ImageConditioningInput as _LtxImageInput @@ -114,7 +115,12 @@ def __call__( noiser = GaussianNoiser(generator=generator) dtype = torch.bfloat16 - (ctx_p,) = self.prompt_encoder([prompt]) + (ctx_p,) = self.prompt_encoder( + [prompt], + enhance_first_prompt=enhance_prompt, + enhance_prompt_image=images[0].path if images else None, + enhance_prompt_seed=seed, + ) video_context, audio_context = ctx_p.video_encoding, ctx_p.audio_encoding sigmas = torch.Tensor(DISTILLED_SIGMA_VALUES).to(self.device) diff --git a/backend/state/app_settings.py b/backend/state/app_settings.py index 9e9eb0b21..2b27a38c1 100644 --- a/backend/state/app_settings.py +++ b/backend/state/app_settings.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Any, TypeGuard, TypeVar, cast, get_args +from typing import Any, Literal, TypeGuard, TypeVar, cast, get_args from pydantic import BaseModel, ConfigDict, Field, create_model, field_validator @@ -49,6 +49,9 @@ class AppSettings(SettingsBaseModel): use_torch_compile: bool = False ltx_api_key: str = "" user_prefers_ltx_api_video_generations: bool = False + video_generation_provider: Literal["local", "ltx_api", "runpod"] = "local" + runpod_api_url: str = "" + runpod_api_token: str = "" fal_api_key: str = "" use_local_text_encoder: bool = False prompt_cache_size: int = 100 @@ -118,6 +121,9 @@ class SettingsResponse(SettingsBaseModel): use_torch_compile: bool = False has_ltx_api_key: bool = False user_prefers_ltx_api_video_generations: bool = False + video_generation_provider: Literal["local", "ltx_api", "runpod"] = "local" + runpod_api_url: str = "" + has_runpod_api_token: bool = False has_fal_api_key: bool = False use_local_text_encoder: bool = False prompt_cache_size: int = 100 @@ -132,17 +138,44 @@ class SettingsResponse(SettingsBaseModel): def to_settings_response(settings: AppSettings) -> SettingsResponse: data = settings.model_dump(by_alias=False) ltx_key = data.pop("ltx_api_key", "") + runpod_token = data.pop("runpod_api_token", "") fal_key = data.pop("fal_api_key", "") gemini_key = data.pop("gemini_api_key", "") data["has_ltx_api_key"] = bool(ltx_key) + data["has_runpod_api_token"] = bool(runpod_token) data["has_fal_api_key"] = bool(fal_key) data["has_gemini_api_key"] = bool(gemini_key) # models_dir passes through as-is (not secret) return SettingsResponse.model_validate(data) -def should_video_generate_with_ltx_api(*, force_api_generations: bool, settings: AppSettings) -> bool: +VideoGenerationProvider = Literal["local", "ltx_api", "runpod"] + + +def resolve_video_generation_provider(*, force_api_generations: bool, settings: AppSettings) -> VideoGenerationProvider: + configured_provider = settings.video_generation_provider + if configured_provider == "runpod": + return "runpod" + if configured_provider == "ltx_api": + return "ltx_api" + has_ltx_api_key = bool(settings.ltx_api_key.strip()) - return force_api_generations or ( - settings.user_prefers_ltx_api_video_generations and has_ltx_api_key - ) + if force_api_generations: + return "ltx_api" + if settings.user_prefers_ltx_api_video_generations and has_ltx_api_key: + return "ltx_api" + return "local" + + +def should_video_generate_with_ltx_api(*, force_api_generations: bool, settings: AppSettings) -> bool: + return resolve_video_generation_provider( + force_api_generations=force_api_generations, + settings=settings, + ) == "ltx_api" + + +def should_video_generate_with_remote_api(*, force_api_generations: bool, settings: AppSettings) -> bool: + return resolve_video_generation_provider( + force_api_generations=force_api_generations, + settings=settings, + ) in ("ltx_api", "runpod") diff --git a/backend/state/app_state_types.py b/backend/state/app_state_types.py index 408bafd98..7fabdfd9f 100644 --- a/backend/state/app_state_types.py +++ b/backend/state/app_state_types.py @@ -5,7 +5,7 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING, NewType, Protocol -from api_types import ModelCheckpointID +from api_types import LTXLocalModelId, ModelCheckpointID from state.conditioning_cache import ConditioningCache if TYPE_CHECKING: @@ -101,6 +101,7 @@ class TextEncoderState: @dataclass class VideoPipelineState: pipeline: FastVideoPipeline + model_id: LTXLocalModelId is_compiled: bool @@ -124,6 +125,7 @@ class ICLoraState: @dataclass class A2VPipelineState: pipeline: A2VPipeline + model_id: LTXLocalModelId @dataclass diff --git a/backend/tests/fakes/services.py b/backend/tests/fakes/services.py index 85cb065de..04060a8fe 100644 --- a/backend/tests/fakes/services.py +++ b/backend/tests/fakes/services.py @@ -8,7 +8,7 @@ from typing import Any, ClassVar from PIL import Image -from api_types import ImageConditioningInput, VideoCameraMotion +from api_types import ImageConditioningInput, ModelCheckpointID, VideoCameraMotion from services.interfaces import VideoInfoPayload from services.ltx_api_client.ltx_api_client import LTXRetakeResult from tests.fakes.fake_gpu_info import FakeGpuInfo @@ -114,12 +114,14 @@ def run_background( class FakeLTXAPIClient: def __init__(self) -> None: + self.ensure_remote_model_downloaded_calls: list[dict[str, Any]] = [] self.upload_file_calls: list[dict[str, Any]] = [] self.text_to_video_calls: list[dict[str, Any]] = [] self.image_to_video_calls: list[dict[str, Any]] = [] self.audio_to_video_calls: list[dict[str, Any]] = [] self.retake_calls: list[dict[str, Any]] = [] self.raise_on_upload_file: Exception | None = None + self.raise_on_ensure_remote_model_downloaded: Exception | None = None self.raise_on_text_to_video: Exception | None = None self.raise_on_image_to_video: Exception | None = None self.raise_on_audio_to_video: Exception | None = None @@ -130,16 +132,37 @@ def __init__(self) -> None: self.retake_result = LTXRetakeResult(video_bytes=b"fake-ltx-api-retake-video", result_payload=None) self.upload_file_results: dict[str, str] = {} + def ensure_remote_model_downloaded( + self, + *, + api_key: str, + base_url: str, + cp_ids: set[ModelCheckpointID], + poll_interval_seconds: float = 5.0, + ) -> None: + self.ensure_remote_model_downloaded_calls.append( + { + "api_key": api_key, + "base_url": base_url, + "cp_ids": set(cp_ids), + "poll_interval_seconds": poll_interval_seconds, + } + ) + if self.raise_on_ensure_remote_model_downloaded is not None: + raise self.raise_on_ensure_remote_model_downloaded + def upload_file( self, *, api_key: str, file_path: str, + base_url: str | None = None, ) -> str: self.upload_file_calls.append( { "api_key": api_key, "file_path": file_path, + "base_url": base_url, } ) if self.raise_on_upload_file is not None: @@ -158,6 +181,9 @@ def generate_text_to_video( fps: float, generate_audio: bool, camera_motion: VideoCameraMotion = "none", + aspect_ratio: str | None = None, + enhance_prompt: bool | None = None, + base_url: str | None = None, ) -> bytes: self.text_to_video_calls.append( { @@ -169,6 +195,9 @@ def generate_text_to_video( "fps": fps, "generate_audio": generate_audio, "camera_motion": camera_motion, + "aspect_ratio": aspect_ratio, + "enhance_prompt": enhance_prompt, + "base_url": base_url, } ) if self.raise_on_text_to_video is not None: @@ -187,6 +216,9 @@ def generate_image_to_video( fps: float, generate_audio: bool, camera_motion: VideoCameraMotion = "none", + aspect_ratio: str | None = None, + enhance_prompt: bool | None = None, + base_url: str | None = None, ) -> bytes: self.image_to_video_calls.append( { @@ -199,6 +231,9 @@ def generate_image_to_video( "fps": fps, "generate_audio": generate_audio, "camera_motion": camera_motion, + "aspect_ratio": aspect_ratio, + "enhance_prompt": enhance_prompt, + "base_url": base_url, } ) if self.raise_on_image_to_video is not None: @@ -214,6 +249,11 @@ def generate_audio_to_video( image_uri: str | None, model: str, resolution: str, + duration: float | None = None, + fps: float | None = None, + aspect_ratio: str | None = None, + enhance_prompt: bool | None = None, + base_url: str | None = None, ) -> bytes: self.audio_to_video_calls.append( { @@ -223,6 +263,11 @@ def generate_audio_to_video( "image_uri": image_uri, "model": model, "resolution": resolution, + "duration": duration, + "fps": fps, + "aspect_ratio": aspect_ratio, + "enhance_prompt": enhance_prompt, + "base_url": base_url, } ) if self.raise_on_audio_to_video is not None: @@ -238,6 +283,7 @@ def retake( duration: float, prompt: str, mode: str, + base_url: str | None = None, ) -> LTXRetakeResult: self.retake_calls.append( { @@ -247,6 +293,7 @@ def retake( "duration": duration, "prompt": prompt, "mode": mode, + "base_url": base_url, } ) if self.raise_on_retake is not None: @@ -510,6 +557,7 @@ def generate( frame_rate: float, images: list[ImageConditioningInput], output_path: str, + enhance_prompt: bool = False, ) -> None: self._record_generate( { @@ -521,6 +569,7 @@ def generate( "frame_rate": frame_rate, "images": images, "output_path": output_path, + "enhance_prompt": enhance_prompt, } ) diff --git a/backend/tests/test_api_calls.py b/backend/tests/test_api_calls.py index 2d1eea9ae..69f64ed0f 100644 --- a/backend/tests/test_api_calls.py +++ b/backend/tests/test_api_calls.py @@ -251,6 +251,26 @@ def test_prefers_api_video_routes_retake_to_api(self, client, test_state, fake_s assert len(test_state.ltx_api_client.retake_calls) == 1 assert len(fake_services.retake_pipeline.generate_calls) == 0 + def test_runpod_provider_routes_retake_to_private_api(self, client, test_state, fake_services): + test_state.config.local_generations_mode = "full_models_loading" + test_state.state.app_settings.video_generation_provider = "runpod" + test_state.state.app_settings.runpod_api_url = "https://pod.example" + test_state.state.app_settings.runpod_api_token = "runpod-token" + video_path = self._make_video(test_state) + test_state.ltx_api_client.retake_result = LTXRetakeResult( + video_bytes=b"\x00\x00\x00\x1cftypisom" + b"\x00" * 500, + result_payload=None, + ) + + r = client.post("/api/retake", json=self._base_payload(video_path)) + + assert r.status_code == 200 + assert r.json()["status"] == "complete" + retake_call = test_state.ltx_api_client.retake_calls[-1] + assert retake_call["api_key"] == "runpod-token" + assert retake_call["base_url"] == "https://pod.example" + assert len(fake_services.retake_pipeline.generate_calls) == 0 + def test_prefers_api_video_without_key_falls_back_to_local_retake( self, client, diff --git a/backend/tests/test_generation.py b/backend/tests/test_generation.py index a7f770232..c3bfb93fa 100644 --- a/backend/tests/test_generation.py +++ b/backend/tests/test_generation.py @@ -47,6 +47,7 @@ def _fake_running_generation_state(test_state) -> None: test_state.state.gpu_slot = GpuSlot( active_pipeline=VideoPipelineState( pipeline=pipeline, + model_id="ltx-2.3-22b-distilled-1.1", is_compiled=False, ), ) @@ -523,6 +524,66 @@ def test_t2v_routes_to_ltx_api(self, client, test_state, fake_services): assert call["generate_audio"] is True assert call["camera_motion"] == "dolly_in" + def test_t2v_routes_to_private_runpod_api(self, client, test_state, fake_services): + test_state.config.local_generations_mode = "unsupported" + test_state.state.app_settings.video_generation_provider = "runpod" + test_state.state.app_settings.runpod_api_url = "https://pod.example" + test_state.state.app_settings.runpod_api_token = "runpod-token" + test_state.state.app_settings.prompt_enhancer_enabled_t2v = True + + r = client.post( + "/api/generate", + json={ + "prompt": "A mountain lake", + "resolution": "720p", + "model": "fast", + "duration": 10, + "fps": 24, + "audio": True, + "cameraMotion": "dolly_in", + "aspectRatio": "9:16", + }, + ) + + assert r.status_code == 200 + assert r.json()["status"] == "complete" + assert len(fake_services.ltx_api_client.ensure_remote_model_downloaded_calls) == 1 + download_call = fake_services.ltx_api_client.ensure_remote_model_downloaded_calls[0] + assert download_call["api_key"] == "runpod-token" + assert download_call["base_url"] == "https://pod.example" + assert download_call["cp_ids"] == { + "ltx-2.3-22b-distilled-1.1", + "ltx-2.3-spatial-upscaler-x2-1.1", + "gemma-3-12b-it-qat-q4_0-unquantized", + } + assert len(fake_services.ltx_api_client.text_to_video_calls) == 1 + call = fake_services.ltx_api_client.text_to_video_calls[0] + assert call["api_key"] == "runpod-token" + assert call["base_url"] == "https://pod.example" + assert call["model"] == "fast" + assert call["resolution"] == "720p" + assert call["aspect_ratio"] == "9:16" + assert call["enhance_prompt"] is True + + def test_runpod_mode_uses_local_model_specs(self, client, test_state): + test_state.config.local_generations_mode = "unsupported" + test_state.state.app_settings.video_generation_provider = "runpod" + test_state.state.app_settings.runpod_api_url = "https://pod.example" + test_state.state.app_settings.runpod_api_token = "runpod-token" + + r = client.post( + "/api/generate", + json={ + "prompt": "A mountain lake", + "resolution": "2160p", + "model": "pro", + "duration": 8, + "fps": 24, + }, + ) + + assert r.status_code == 422 + def test_i2v_routes_to_ltx_api(self, client, test_state, fake_services, make_test_image, tmp_path): test_state.config.local_generations_mode = "unsupported" test_state.state.app_settings.ltx_api_key = "api-key" @@ -1105,8 +1166,9 @@ def test_models_specs_endpoint_returns_ordered_backend_specs(self, client): assert r.status_code == 200 data = r.json() - assert [item["pipeline"] for item in data["local_models"]] == ["fast"] - assert data["local_models"][0]["spec"]["display_name"] == "LTX 2.3 Fast" + assert [item["pipeline"] for item in data["local_models"]] == ["fast", "fast_legacy"] + assert data["local_models"][0]["spec"]["display_name"] == "LTX 2.3 Fast (Distilled 1.1)" + assert data["local_models"][1]["spec"]["display_name"] == "LTX 2.3 Fast (Distilled 1.0)" assert list(data["local_models"][0]["spec"]["supported_resolutions_durations"]["540p"]["fps_to_durations"].keys()) == ["24"] assert [item["pipeline"] for item in data["api_models"]] == ["fast", "pro"] assert list(data["api_models"][0]["spec"]["a2v_supported_resolutions_durations"].keys()) == ["1080p"] @@ -1355,3 +1417,4 @@ def test_local_encoding_skips_api(self, client, test_state, fake_services, creat assert r.status_code == 200 assert len(fake_services.text_encoder.encode_calls) == 0 + assert fake_services.fast_video_pipeline.generate_calls[0]["enhance_prompt"] is True diff --git a/backend/tests/test_health.py b/backend/tests/test_health.py index da6363bc4..02138349d 100644 --- a/backend/tests/test_health.py +++ b/backend/tests/test_health.py @@ -8,6 +8,7 @@ def _set_video_pipeline(state): state.state.gpu_slot = GpuSlot( active_pipeline=VideoPipelineState( pipeline=FakeFastVideoPipeline(), + model_id="ltx-2.3-22b-distilled-1.1", is_compiled=False, ), ) diff --git a/backend/tests/test_ltx_api_client.py b/backend/tests/test_ltx_api_client.py index f3ad3f3e4..b2038ae59 100644 --- a/backend/tests/test_ltx_api_client.py +++ b/backend/tests/test_ltx_api_client.py @@ -205,6 +205,75 @@ def test_upload_file_returns_storage_uri(tmp_path) -> None: assert http.calls[1].method == "put" +def test_upload_file_rewrites_private_runpod_upload_url(tmp_path) -> None: + image_path = tmp_path / "input.png" + image_path.write_bytes(b"fake-image") + + http = FakeHTTPClient() + http.queue( + "post", + FakeResponse( + status_code=200, + json_payload={ + "upload_url": "http://100.65.29.114:60780/v1/upload/abc123.bin", + "storage_uri": "private-upload://abc123.bin", + }, + ), + ) + http.queue("put", FakeResponse(status_code=200, json_payload={"storage_uri": "private-upload://abc123.png"})) + + client = LTXAPIClientImpl(http=http, ltx_api_base_url="https://api.ltx.video") + out = client.upload_file( + api_key="test-key", + file_path=str(image_path), + base_url="https://ae5of3dwp6xlyx-8000.proxy.runpod.net", + ) + + assert out == "private-upload://abc123.png" + assert http.calls[0].url == "https://ae5of3dwp6xlyx-8000.proxy.runpod.net/v1/upload" + assert http.calls[1].url == "https://ae5of3dwp6xlyx-8000.proxy.runpod.net/v1/upload/abc123.bin" + + +def test_ensure_remote_model_downloaded_starts_and_polls_download() -> None: + http = FakeHTTPClient() + http.queue( + "post", + FakeResponse(status_code=200, json_payload={"sessionId": "download-session"}), + ) + http.queue( + "get", + FakeResponse(status_code=200, json_payload={"status": "downloading"}), + FakeResponse(status_code=200, json_payload={"status": "complete"}), + ) + + client = LTXAPIClientImpl(http=http, ltx_api_base_url="https://api.ltx.video") + client.ensure_remote_model_downloaded( + api_key="runpod-token", + base_url="https://pod.example", + cp_ids={ + "ltx-2.3-22b-distilled-1.1", + "ltx-2.3-spatial-upscaler-x2-1.1", + }, + poll_interval_seconds=0, + ) + + assert len(http.calls) == 3 + assert http.calls[0].url == "https://pod.example/api/models/download" + assert http.calls[0].headers == { + "Authorization": "Bearer runpod-token", + "Content-Type": "application/json", + } + assert http.calls[0].json_payload == { + "type": "download", + "cp_ids": [ + "ltx-2.3-22b-distilled-1.1", + "ltx-2.3-spatial-upscaler-x2-1.1", + ], + } + assert http.calls[1].url == "https://pod.example/api/models/download/progress?sessionId=download-session" + assert http.calls[2].url == "https://pod.example/api/models/download/progress?sessionId=download-session" + + def test_generate_audio_to_video_with_audio_uri_downloads_video() -> None: http = FakeHTTPClient() http.queue( diff --git a/backend/tests/test_response_models.py b/backend/tests/test_response_models.py index 16230c3f3..0a6d008a6 100644 --- a/backend/tests/test_response_models.py +++ b/backend/tests/test_response_models.py @@ -12,6 +12,7 @@ def test_camelcase_keys(self, client, test_state): test_state.state.gpu_slot = GpuSlot( active_pipeline=VideoPipelineState( pipeline=pipeline, + model_id="ltx-2.3-22b-distilled-1.1", is_compiled=False, ), ) diff --git a/backend/tests/test_settings.py b/backend/tests/test_settings.py index 44a310e07..461b94edc 100644 --- a/backend/tests/test_settings.py +++ b/backend/tests/test_settings.py @@ -20,6 +20,9 @@ def test_default_settings(self, client, default_app_settings, test_state): assert data["useTorchCompile"] is False assert data["hasLtxApiKey"] is False assert data["userPrefersLtxApiVideoGenerations"] is False + assert data["videoGenerationProvider"] == "local" + assert data["runpodApiUrl"] == "" + assert data["hasRunpodApiToken"] is False assert data["hasFalApiKey"] is False assert data["useLocalTextEncoder"] is False assert data["promptCacheSize"] == 100 @@ -34,6 +37,7 @@ def test_default_settings(self, client, default_app_settings, test_state): assert "fastModel" not in data assert "proModel" not in data assert "ltxApiKey" not in data + assert "runpodApiToken" not in data assert "falApiKey" not in data assert "geminiApiKey" not in data @@ -92,15 +96,26 @@ def test_update_api_keys(self, client, test_state): "/api/settings", json={ "ltxApiKey": "ltx-key-abc", + "runpodApiToken": "runpod-token-abc", "geminiApiKey": "gemini-key-xyz", "falApiKey": "fal-key-123", }, ) assert r.status_code == 200 assert test_state.state.app_settings.ltx_api_key == "ltx-key-abc" + assert test_state.state.app_settings.runpod_api_token == "runpod-token-abc" assert test_state.state.app_settings.gemini_api_key == "gemini-key-xyz" assert test_state.state.app_settings.fal_api_key == "fal-key-123" + def test_update_runpod_provider_and_url(self, client, test_state): + r = client.post( + "/api/settings", + json={"videoGenerationProvider": "runpod", "runpodApiUrl": "https://pod.example"}, + ) + assert r.status_code == 200 + assert test_state.state.app_settings.video_generation_provider == "runpod" + assert test_state.state.app_settings.runpod_api_url == "https://pod.example" + def test_update_user_prefers_api_video_generations(self, client, test_state): r = client.post("/api/settings", json={"userPrefersLtxApiVideoGenerations": True}) assert r.status_code == 200 @@ -108,10 +123,12 @@ def test_update_user_prefers_api_video_generations(self, client, test_state): def test_empty_string_does_not_erase_key(self, client, test_state): test_state.state.app_settings.ltx_api_key = "real-key" + test_state.state.app_settings.runpod_api_token = "real-runpod-token" test_state.state.app_settings.fal_api_key = "fal-key" - r = client.post("/api/settings", json={"ltxApiKey": "", "falApiKey": ""}) + r = client.post("/api/settings", json={"ltxApiKey": "", "runpodApiToken": "", "falApiKey": ""}) assert r.status_code == 200 assert test_state.state.app_settings.ltx_api_key == "real-key" + assert test_state.state.app_settings.runpod_api_token == "real-runpod-token" assert test_state.state.app_settings.fal_api_key == "fal-key" def test_omitted_key_does_not_erase_key(self, client, test_state): diff --git a/docs/runpod-private-api.md b/docs/runpod-private-api.md new file mode 100644 index 000000000..ef00b892d --- /dev/null +++ b/docs/runpod-private-api.md @@ -0,0 +1,112 @@ +# Private RunPod API + +This mode runs the LTX Desktop Python backend on a RunPod Pod and lets the +desktop app use it as a private remote GPU provider. + +## Step-by-step setup + +### 1. Choose a Pod + +Recommended for LTX 2.3: + +- GPU: A100 80 GB +- RAM: 112 GB or more +- Disk: 150 GB or more for ephemeral testing +- Exposed port: HTTP `8000` + +A 100 GB ephemeral disk can work for quick tests, but model caches and generated +outputs may fill it quickly. When budget allows, move `LTX_APP_DATA_DIR` to +persistent storage. + +### 2. Build the container image + +From the repository root, build the RunPod image: + +```bash +docker build -f runpod/Dockerfile -t ltx-desktop-runpod:latest . +``` + +Push that image to a registry your RunPod account can pull. + +### 3. Configure Pod environment variables + +Set these variables on the Pod: + +```bash +RUNPOD_PRIVATE_API_TOKEN=change-this-long-random-token +LTX_APP_DATA_DIR=/workspace/ltx-data +LTX_PORT=8000 +``` + +Optional: + +```bash +USE_SAGE_ATTENTION=1 +``` + +### 4. Start the Pod + +Expose HTTP port `8000` through RunPod's HTTP proxy. After the Pod starts, the +private API URL is: + +```text +https://-8000.proxy.runpod.net +``` + +Check health: + +```bash +curl -H "Authorization: Bearer " \ + https://-8000.proxy.runpod.net/health +``` + +Expected response includes `status: "ok"` and GPU information. + +### 5. Configure LTX Desktop + +In LTX Desktop: + +1. Open Settings. +2. In General, choose `Private RunPod API` for video generation. +3. In API Keys, set the RunPod URL, for example: + +```text +https://-8000.proxy.runpod.net +``` + +4. Save the same token you set as `RUNPOD_PRIVATE_API_TOKEN`. + +The desktop backend sends video and retake jobs to `/v1/*` on the private server. +Input media is uploaded to the Pod before generation; the generated video is +downloaded back into the desktop app's normal outputs folder. + +### 6. Generate + +Use Gen Space normally. When you select a model variation, the desktop backend +asks the Pod to download any missing required checkpoint files before inference. +This keeps first-use model downloads out of the long generation request, which +avoids RunPod HTTP proxy timeouts during model warm-up. + +## Model variations + +Private RunPod API mode exposes the local LTX model variations: + +- `fast` — LTX 2.3 Fast (Distilled 1.1) +- `fast_legacy` — LTX 2.3 Fast (Distilled 1.0) + +The selected model's transformer, upscaler, and text encoder checkpoints are +downloaded to `LTX_APP_DATA_DIR/models` on the Pod if they are missing. + +## Current scope + +Implemented private remote operations: + +- text-to-video +- image-to-video +- audio-to-video +- retake +- prompt enhancement + +The private server uses the local LTX pipeline, so it currently exposes local +model limits: `fast` model at `540p`, `720p`, and `1080p`. Official LTX API mode +remains available separately for the existing higher-resolution API specs. diff --git a/frontend/App.tsx b/frontend/App.tsx index 23e4de263..074dc93af 100644 --- a/frontend/App.tsx +++ b/frontend/App.tsx @@ -180,9 +180,18 @@ function AppContent() { const isForcedFirstRun = setupState !== 'loading' && setupState.needsSetup && !setupState.needsLicense && forceApiGenerations + const isOfficialLtxProvider = settings.videoGenerationProvider === 'ltx_api' + || (settings.videoGenerationProvider === 'local' && settings.userPrefersLtxApiVideoGenerations) + const isRunpodProviderConfigured = settings.videoGenerationProvider === 'runpod' + && Boolean(settings.runpodApiUrl) + && settings.hasRunpodApiToken const shouldAutoFinalizeForcedFirstRun = - isForcedFirstRun && isLoaded && settings.hasLtxApiKey && !isFinalizingFirstRun && !firstRunFinalizeError + isForcedFirstRun + && isLoaded + && (settings.hasLtxApiKey || isRunpodProviderConfigured) + && !isFinalizingFirstRun + && !firstRunFinalizeError const areRequiredModelsDownloaded = useCallback(async () => { const [ltxResult, imgGenResult] = await Promise.all([ @@ -344,8 +353,8 @@ function AppContent() { const showGlobalControls = currentView !== 'home' && connected && setupState !== 'loading' && !setupState.needsSetup const shouldBlockUntilSettingsLoaded = forceApiGenerations && !isLoaded - const shouldShowForcedFirstRunUpsell = isForcedFirstRun && isLoaded && !settings.hasLtxApiKey - const shouldShowGlobalForcedUpsell = forceApiGenerations && setupState !== 'loading' && !setupState.needsSetup && isLoaded && !settings.hasLtxApiKey + const shouldShowForcedFirstRunUpsell = isForcedFirstRun && isLoaded && isOfficialLtxProvider && !settings.hasLtxApiKey + const shouldShowGlobalForcedUpsell = forceApiGenerations && setupState !== 'loading' && !setupState.needsSetup && isLoaded && isOfficialLtxProvider && !settings.hasLtxApiKey const shouldBlockForLtxKey = shouldShowForcedFirstRunUpsell || shouldShowGlobalForcedUpsell useEffect(() => { @@ -413,6 +422,10 @@ function AppContent() { saveLtxApiKey, settings.hasFalApiKey, settings.hasLtxApiKey, + settings.hasRunpodApiToken, + settings.runpodApiUrl, + settings.userPrefersLtxApiVideoGenerations, + settings.videoGenerationProvider, ]) if (pythonReady === null) { diff --git a/frontend/components/SettingsModal.tsx b/frontend/components/SettingsModal.tsx index 9c353d0e3..54419aec6 100644 --- a/frontend/components/SettingsModal.tsx +++ b/frontend/components/SettingsModal.tsx @@ -17,12 +17,14 @@ interface SettingsModalProps { type TabId = 'general' | 'apiKeys' | 'promptEnhancer' | 'about' export function SettingsModal({ isOpen, onClose, initialTab }: SettingsModalProps) { - const { settings, updateSettings, saveLtxApiKey, saveFalApiKey, saveGeminiApiKey, forceApiGenerations } = useAppSettings() + const { settings, updateSettings, saveLtxApiKey, saveRunpodApiToken, saveFalApiKey, saveGeminiApiKey, forceApiGenerations } = useAppSettings() const onSettingsChange = (next: AppSettings) => updateSettings(next) const [activeTab, setActiveTab] = useState('general') const [ltxApiKeyInput, setLtxApiKeyInput] = useState('') const ltxApiKeyInputRef = useRef(null) const [focusLtxApiKeyInputOnTabChange, setFocusLtxApiKeyInputOnTabChange] = useState(false) + const [runpodApiTokenInput, setRunpodApiTokenInput] = useState('') + const runpodApiTokenInputRef = useRef(null) const [falApiKeyInput, setFalApiKeyInput] = useState('') const falApiKeyInputRef = useRef(null) const [geminiApiKeyInput, setGeminiApiKeyInput] = useState('') @@ -252,6 +254,14 @@ export function SettingsModal({ isOpen, onClose, initialTab }: SettingsModalProp { id: 'promptEnhancer' as TabId, label: 'Prompt Enhancer', icon: Sparkles }, { id: 'about' as TabId, label: 'About', icon: Info }, ] + const promptEnhancerBackendLabel = settings.videoGenerationProvider === 'runpod' + ? 'your private RunPod server' + : settings.useLocalTextEncoder + ? 'the local text encoder' + : 'the LTX API' + const promptEnhancerAvailable = settings.videoGenerationProvider === 'runpod' + ? Boolean(settings.runpodApiUrl && settings.hasRunpodApiToken) + : settings.useLocalTextEncoder || settings.hasLtxApiKey return (
@@ -332,54 +342,73 @@ export function SettingsModal({ isOpen, onClose, initialTab }: SettingsModalProp
- {!forceApiGenerations && ( -
+

Videos Generation

-
{ - if (!settings.hasLtxApiKey) { - openApiKeysAndFocusLtxInput() - return - } - onSettingsChange({ - ...settings, - userPrefersLtxApiVideoGenerations: !settings.userPrefersLtxApiVideoGenerations, - }) - }} - > -
-
-
- - Generate With API +
+ {[ + { + id: 'local' as const, + title: 'Local GPU', + description: 'Use this computer for video generation.', + warning: forceApiGenerations ? 'Local video generation is not available on this computer.' : null, + }, + { + id: 'runpod' as const, + title: 'Private RunPod API', + description: 'Send video jobs to your self-hosted RunPod FastAPI server.', + warning: (!settings.runpodApiUrl || !settings.hasRunpodApiToken) + ? 'RunPod URL and token required — configure them in the API Keys tab.' + : null, + }, + { + id: 'ltx_api' as const, + title: 'Official LTX API', + description: 'Use LTX cloud API for video generation.', + warning: !settings.hasLtxApiKey ? 'LTX API key required — configure it in the API Keys tab.' : null, + }, + ].map((provider) => ( +
{ + onSettingsChange({ + ...settings, + videoGenerationProvider: provider.id, + userPrefersLtxApiVideoGenerations: provider.id === 'ltx_api', + }) + }} + > +
+
+
+ + {provider.title} +
+

{provider.description}

+
+
+ {settings.videoGenerationProvider === provider.id && } +
-

- Use LTX API for video generation when an LTX API key is configured. -

-
-
- {settings.userPrefersLtxApiVideoGenerations && } -
-
- {!settings.hasLtxApiKey && ( -
- - API key required — configure it in the API Keys tab. + {provider.warning && ( +
+ + {provider.warning} +
+ )}
- )} + ))}
-
- )} +
{/* Text Encoding Section */} {!forceApiGenerations && ( @@ -765,6 +794,70 @@ export function SettingsModal({ isOpen, onClose, initialTab }: SettingsModalProp
+ {/* Private RunPod API Section */} +
+
+ +

Private RunPod API

+
+ +

+ Connect video generation and retake jobs to your self-hosted RunPod FastAPI server. +

+ +
+ onSettingsChange({ ...settings, runpodApiUrl: e.target.value.trim() })} + placeholder="https://your-runpod-url.proxy.runpod.net" + onKeyDown={(e) => e.stopPropagation()} + className="w-full px-3 py-2 bg-zinc-800 border border-zinc-700 rounded-lg text-sm text-white placeholder-zinc-500 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent" + /> +
+ setRunpodApiTokenInput(e.target.value)} + placeholder={settings.hasRunpodApiToken ? 'Enter new token to replace...' : 'Enter your private server token...'} + stopPropagation + className="flex-1" + /> + +
+
+
+ {settings.runpodApiUrl && settings.hasRunpodApiToken ? ( + <> + + Server configured + + ) : ( + <> + + URL and token required + + )} +
+
+
+
+ {/* FAL API Key Section */}
@@ -957,20 +1050,23 @@ export function SettingsModal({ isOpen, onClose, initialTab }: SettingsModalProp

- Automatically enhances your prompts via the LTX API with rich visual details, sound descriptions, + Automatically enhances your prompts via {promptEnhancerBackendLabel} with rich visual details, sound descriptions, and motion cues to help generate higher quality videos. Control independently for each generation type.

- {!settings.hasLtxApiKey ? ( + {!promptEnhancerAvailable ? (
-

LTX API key required

+

+ {settings.videoGenerationProvider === 'runpod' ? 'RunPod server required' : 'Text encoder required'} +

- Prompt enhancement runs server-side on the LTX API. To use this feature, you need to configure - an API key in the API Keys tab. + {settings.videoGenerationProvider === 'runpod' + ? 'Prompt enhancement will run on your private server. Configure the RunPod URL and token in the API Keys tab.' + : 'Prompt enhancement needs either the local text encoder or an LTX API key configured in the API Keys tab.'}

@@ -978,7 +1074,7 @@ export function SettingsModal({ isOpen, onClose, initialTab }: SettingsModalProp onClick={() => setActiveTab('apiKeys')} className="w-full mt-1 px-4 py-2 bg-blue-600 hover:bg-blue-500 text-white text-sm font-medium rounded-lg transition-colors" > - Set API Key + {settings.videoGenerationProvider === 'runpod' ? 'Configure RunPod' : 'Open API Keys'}
diff --git a/frontend/components/SettingsPanel.tsx b/frontend/components/SettingsPanel.tsx index dea6c78d2..53c288f45 100644 --- a/frontend/components/SettingsPanel.tsx +++ b/frontend/components/SettingsPanel.tsx @@ -5,12 +5,13 @@ import { resolveVideoGenerationOptions, sanitizeVideoGenerationSettings, type VideoGenerationModelSpecItem, + type VideoGenerationPipeline, } from '../lib/video-generation-model-specs' export type GenerationMode = 'text-to-video' | 'image-to-video' | 'text-to-image' export interface GenerationSettings { - model: 'fast' | 'pro' + model: VideoGenerationPipeline duration: number videoResolution: string fps: number diff --git a/frontend/contexts/AppSettingsContext.tsx b/frontend/contexts/AppSettingsContext.tsx index 26e50f7e9..d5c7e4f1a 100644 --- a/frontend/contexts/AppSettingsContext.tsx +++ b/frontend/contexts/AppSettingsContext.tsx @@ -6,6 +6,9 @@ export interface AppSettings { useTorchCompile: boolean hasLtxApiKey: boolean userPrefersLtxApiVideoGenerations: boolean + videoGenerationProvider: 'local' | 'ltx_api' | 'runpod' + runpodApiUrl: string + hasRunpodApiToken: boolean hasFalApiKey: boolean hasGeminiApiKey: boolean useLocalTextEncoder: boolean @@ -21,6 +24,9 @@ export const DEFAULT_APP_SETTINGS: AppSettings = { useTorchCompile: false, hasLtxApiKey: false, userPrefersLtxApiVideoGenerations: false, + videoGenerationProvider: 'local', + runpodApiUrl: '', + hasRunpodApiToken: false, hasFalApiKey: false, hasGeminiApiKey: false, useLocalTextEncoder: false, @@ -41,10 +47,12 @@ interface AppSettingsContextValue { updateSettings: (patch: Partial | ((prev: AppSettings) => AppSettings)) => void refreshSettings: () => Promise saveLtxApiKey: (value: string) => Promise + saveRunpodApiToken: (value: string) => Promise saveFalApiKey: (value: string) => Promise saveGeminiApiKey: (value: string) => Promise forceApiGenerations: boolean shouldVideoGenerateWithLtxApi: boolean + shouldVideoGenerateWithRemoteApi: boolean } const AppSettingsContext = createContext(null) @@ -66,6 +74,9 @@ function normalizeAppSettings(data: Partial): AppSettings { useTorchCompile: data.useTorchCompile ?? DEFAULT_APP_SETTINGS.useTorchCompile, hasLtxApiKey: data.hasLtxApiKey ?? DEFAULT_APP_SETTINGS.hasLtxApiKey, userPrefersLtxApiVideoGenerations: data.userPrefersLtxApiVideoGenerations ?? DEFAULT_APP_SETTINGS.userPrefersLtxApiVideoGenerations, + videoGenerationProvider: data.videoGenerationProvider ?? DEFAULT_APP_SETTINGS.videoGenerationProvider, + runpodApiUrl: data.runpodApiUrl ?? DEFAULT_APP_SETTINGS.runpodApiUrl, + hasRunpodApiToken: data.hasRunpodApiToken ?? DEFAULT_APP_SETTINGS.hasRunpodApiToken, hasFalApiKey: data.hasFalApiKey ?? DEFAULT_APP_SETTINGS.hasFalApiKey, hasGeminiApiKey: data.hasGeminiApiKey ?? DEFAULT_APP_SETTINGS.hasGeminiApiKey, useLocalTextEncoder: data.useLocalTextEncoder ?? DEFAULT_APP_SETTINGS.useLocalTextEncoder, @@ -194,7 +205,7 @@ export function AppSettingsProvider({ children }: { children: ReactNode }) { useEffect(() => { if (!isLoaded || backendProcessStatus !== 'alive') return const syncTimer = setTimeout(async () => { - const { hasLtxApiKey: _a, hasFalApiKey: _b, hasGeminiApiKey: _c, modelsDir: _d, ...syncPayload } = settings + const { hasLtxApiKey: _a, hasRunpodApiToken: _b, hasFalApiKey: _c, hasGeminiApiKey: _d, modelsDir: _e, ...syncPayload } = settings const result = await ApiClient.updateSettings(syncPayload) if (!result.ok) { // Best-effort settings sync. @@ -219,6 +230,14 @@ export function AppSettingsProvider({ children }: { children: ReactNode }) { await refreshSettings() }, [refreshSettings]) + const saveRunpodApiToken = useCallback(async (value: string) => { + const result = await ApiClient.updateSettings({ runpodApiToken: value }) + if (!result.ok) { + throw new Error(result.error.message) + } + await refreshSettings() + }, [refreshSettings]) + const saveGeminiApiKey = useCallback(async (value: string) => { const result = await ApiClient.updateSettings({ geminiApiKey: value }) if (!result.ok) { @@ -236,7 +255,14 @@ export function AppSettingsProvider({ children }: { children: ReactNode }) { }, [refreshSettings]) const shouldVideoGenerateWithLtxApi = - forceApiGenerations || (settings.userPrefersLtxApiVideoGenerations && settings.hasLtxApiKey) + settings.videoGenerationProvider === 'ltx_api' + || (settings.videoGenerationProvider === 'local' && forceApiGenerations) + || ( + settings.videoGenerationProvider === 'local' + && settings.userPrefersLtxApiVideoGenerations + && settings.hasLtxApiKey + ) + const shouldVideoGenerateWithRemoteApi = shouldVideoGenerateWithLtxApi || settings.videoGenerationProvider === 'runpod' const contextValue = useMemo( () => ({ @@ -246,12 +272,14 @@ export function AppSettingsProvider({ children }: { children: ReactNode }) { updateSettings, refreshSettings, saveLtxApiKey, + saveRunpodApiToken, saveFalApiKey, saveGeminiApiKey, forceApiGenerations, shouldVideoGenerateWithLtxApi, + shouldVideoGenerateWithRemoteApi, }), - [forceApiGenerations, isLoaded, refreshSettings, runtimePolicyLoaded, saveFalApiKey, saveGeminiApiKey, saveLtxApiKey, settings, shouldVideoGenerateWithLtxApi, updateSettings], + [forceApiGenerations, isLoaded, refreshSettings, runtimePolicyLoaded, saveFalApiKey, saveGeminiApiKey, saveLtxApiKey, saveRunpodApiToken, settings, shouldVideoGenerateWithLtxApi, shouldVideoGenerateWithRemoteApi, updateSettings], ) return {children} diff --git a/frontend/generated/backend-openapi.json b/frontend/generated/backend-openapi.json index f30623b5a..adcb72544 100644 --- a/frontend/generated/backend-openapi.json +++ b/frontend/generated/backend-openapi.json @@ -92,6 +92,28 @@ ], "title": "Promptenhancerenabledt2V" }, + "runpodApiToken": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Runpodapitoken" + }, + "runpodApiUrl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Runpodapiurl" + }, "seedLocked": { "anyOf": [ { @@ -135,6 +157,22 @@ } ], "title": "Userprefersltxapivideogenerations" + }, + "videoGenerationProvider": { + "anyOf": [ + { + "enum": [ + "local", + "ltx_api", + "runpod" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Videogenerationprovider" } }, "title": "AppSettingsPatch", @@ -179,7 +217,9 @@ "items": { "enum": [ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -256,7 +296,9 @@ "items": { "enum": [ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -274,7 +316,9 @@ "items": { "enum": [ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -293,7 +337,9 @@ { "enum": [ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -543,6 +589,17 @@ "title": "Duration", "type": "integer" }, + "enhancePrompt": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enhanceprompt" + }, "fps": { "default": 24, "enum": [ @@ -569,6 +626,7 @@ "default": "fast", "enum": [ "fast", + "fast_legacy", "pro" ], "title": "Model", @@ -1044,7 +1102,9 @@ { "enum": [ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -1072,6 +1132,7 @@ "pipeline": { "enum": [ "fast", + "fast_legacy", "pro" ], "title": "Pipeline", @@ -1177,7 +1238,9 @@ "items": { "enum": [ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -1209,7 +1272,9 @@ "items": { "enum": [ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -1268,7 +1333,9 @@ "items": { "enum": [ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -1285,7 +1352,9 @@ "items": { "enum": [ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -1299,7 +1368,10 @@ "type": "array" }, "ltx_model_id": { - "const": "ltx-2.3-22b-distilled", + "enum": [ + "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1" + ], "title": "Ltx Model Id", "type": "string" }, @@ -1335,7 +1407,9 @@ "items": { "enum": [ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -1359,7 +1433,9 @@ "items": { "enum": [ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -1438,6 +1514,240 @@ "title": "ModelStatusItem", "type": "object" }, + "PrivateAudioToVideoRequest": { + "properties": { + "aspect_ratio": { + "default": "16:9", + "title": "Aspect Ratio", + "type": "string" + }, + "audio_uri": { + "title": "Audio Uri", + "type": "string" + }, + "duration": { + "default": 5, + "title": "Duration", + "type": "number" + }, + "enhance_prompt": { + "default": false, + "title": "Enhance Prompt", + "type": "boolean" + }, + "fps": { + "default": 24, + "title": "Fps", + "type": "number" + }, + "image_uri": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Image Uri" + }, + "model": { + "default": "fast", + "title": "Model", + "type": "string" + }, + "prompt": { + "title": "Prompt", + "type": "string" + }, + "resolution": { + "default": "1080p", + "title": "Resolution", + "type": "string" + } + }, + "required": [ + "prompt", + "audio_uri" + ], + "title": "PrivateAudioToVideoRequest", + "type": "object" + }, + "PrivateImageToVideoRequest": { + "properties": { + "aspect_ratio": { + "default": "16:9", + "title": "Aspect Ratio", + "type": "string" + }, + "camera_motion": { + "default": "none", + "title": "Camera Motion", + "type": "string" + }, + "duration": { + "default": 5, + "title": "Duration", + "type": "number" + }, + "enhance_prompt": { + "default": false, + "title": "Enhance Prompt", + "type": "boolean" + }, + "fps": { + "default": 24, + "title": "Fps", + "type": "number" + }, + "generate_audio": { + "default": false, + "title": "Generate Audio", + "type": "boolean" + }, + "image_uri": { + "title": "Image Uri", + "type": "string" + }, + "model": { + "default": "fast", + "title": "Model", + "type": "string" + }, + "prompt": { + "title": "Prompt", + "type": "string" + }, + "resolution": { + "default": "1080p", + "title": "Resolution", + "type": "string" + } + }, + "required": [ + "prompt", + "image_uri" + ], + "title": "PrivateImageToVideoRequest", + "type": "object" + }, + "PrivateRetakeRequest": { + "properties": { + "duration": { + "title": "Duration", + "type": "number" + }, + "mode": { + "default": "replace_audio_and_video", + "enum": [ + "replace_audio_and_video", + "replace_video", + "replace_audio" + ], + "title": "Mode", + "type": "string" + }, + "prompt": { + "default": "", + "title": "Prompt", + "type": "string" + }, + "start_time": { + "title": "Start Time", + "type": "number" + }, + "video_uri": { + "title": "Video Uri", + "type": "string" + } + }, + "required": [ + "video_uri", + "start_time", + "duration" + ], + "title": "PrivateRetakeRequest", + "type": "object" + }, + "PrivateTextToVideoRequest": { + "properties": { + "aspect_ratio": { + "default": "16:9", + "title": "Aspect Ratio", + "type": "string" + }, + "camera_motion": { + "default": "none", + "title": "Camera Motion", + "type": "string" + }, + "duration": { + "default": 5, + "title": "Duration", + "type": "number" + }, + "enhance_prompt": { + "default": false, + "title": "Enhance Prompt", + "type": "boolean" + }, + "fps": { + "default": 24, + "title": "Fps", + "type": "number" + }, + "generate_audio": { + "default": false, + "title": "Generate Audio", + "type": "boolean" + }, + "model": { + "default": "fast", + "title": "Model", + "type": "string" + }, + "prompt": { + "title": "Prompt", + "type": "string" + }, + "resolution": { + "default": "1080p", + "title": "Resolution", + "type": "string" + } + }, + "required": [ + "prompt" + ], + "title": "PrivateTextToVideoRequest", + "type": "object" + }, + "PrivateUploadInitResponse": { + "properties": { + "required_headers": { + "additionalProperties": { + "type": "string" + }, + "default": {}, + "title": "Required Headers", + "type": "object" + }, + "storage_uri": { + "title": "Storage Uri", + "type": "string" + }, + "upload_url": { + "title": "Upload Url", + "type": "string" + } + }, + "required": [ + "upload_url", + "storage_uri" + ], + "title": "PrivateUploadInitResponse", + "type": "object" + }, "RetakeCancelledResponse": { "properties": { "status": { @@ -1559,6 +1869,11 @@ "title": "Hasltxapikey", "type": "boolean" }, + "hasRunpodApiToken": { + "default": false, + "title": "Hasrunpodapitoken", + "type": "boolean" + }, "lockedSeed": { "default": 42, "title": "Lockedseed", @@ -1584,6 +1899,11 @@ "title": "Promptenhancerenabledt2V", "type": "boolean" }, + "runpodApiUrl": { + "default": "", + "title": "Runpodapiurl", + "type": "string" + }, "seedLocked": { "default": false, "title": "Seedlocked", @@ -1603,6 +1923,16 @@ "default": false, "title": "Userprefersltxapivideogenerations", "type": "boolean" + }, + "videoGenerationProvider": { + "default": "local", + "enum": [ + "local", + "ltx_api", + "runpod" + ], + "title": "Videogenerationprovider", + "type": "string" } }, "title": "SettingsResponse", @@ -1711,7 +2041,9 @@ { "enum": [ "ltx-2.3-22b-distilled", + "ltx-2.3-22b-distilled-1.1", "ltx-2.3-spatial-upscaler-x2-1.0", + "ltx-2.3-spatial-upscaler-x2-1.1", "ltx-2.3-22b-ic-lora-union-control-ref0.5", "dpt-hybrid-midas", "yolox-l-torchscript", @@ -3083,6 +3415,299 @@ "health" ] } + }, + "/v1/audio-to-video": { + "post": { + "operationId": "route_private_audio_to_video_v1_audio_to_video_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PrivateAudioToVideoRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "4XX": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPErrorResponse" + } + } + }, + "description": "Client Error" + }, + "5XX": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPErrorResponse" + } + } + }, + "description": "Server Error" + } + }, + "summary": "Route Private Audio To Video", + "tags": [ + "private-ltx-api" + ] + } + }, + "/v1/image-to-video": { + "post": { + "operationId": "route_private_image_to_video_v1_image_to_video_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PrivateImageToVideoRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "4XX": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPErrorResponse" + } + } + }, + "description": "Client Error" + }, + "5XX": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPErrorResponse" + } + } + }, + "description": "Server Error" + } + }, + "summary": "Route Private Image To Video", + "tags": [ + "private-ltx-api" + ] + } + }, + "/v1/retake": { + "post": { + "operationId": "route_private_retake_v1_retake_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PrivateRetakeRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "4XX": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPErrorResponse" + } + } + }, + "description": "Client Error" + }, + "5XX": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPErrorResponse" + } + } + }, + "description": "Server Error" + } + }, + "summary": "Route Private Retake", + "tags": [ + "private-ltx-api" + ] + } + }, + "/v1/text-to-video": { + "post": { + "operationId": "route_private_text_to_video_v1_text_to_video_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PrivateTextToVideoRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "4XX": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPErrorResponse" + } + } + }, + "description": "Client Error" + }, + "5XX": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPErrorResponse" + } + } + }, + "description": "Server Error" + } + }, + "summary": "Route Private Text To Video", + "tags": [ + "private-ltx-api" + ] + } + }, + "/v1/upload": { + "post": { + "operationId": "route_private_upload_init_v1_upload_post", + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PrivateUploadInitResponse" + } + } + }, + "description": "Successful Response" + }, + "4XX": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPErrorResponse" + } + } + }, + "description": "Client Error" + }, + "5XX": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPErrorResponse" + } + } + }, + "description": "Server Error" + } + }, + "summary": "Route Private Upload Init", + "tags": [ + "private-ltx-api" + ] + } + }, + "/v1/upload/{upload_id}": { + "put": { + "operationId": "route_private_upload_put_v1_upload__upload_id__put", + "parameters": [ + { + "in": "path", + "name": "upload_id", + "required": true, + "schema": { + "title": "Upload Id", + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "additionalProperties": { + "type": "string" + }, + "title": "Response Route Private Upload Put V1 Upload Upload Id Put", + "type": "object" + } + } + }, + "description": "Successful Response" + }, + "4XX": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPErrorResponse" + } + } + }, + "description": "Client Error" + }, + "5XX": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPErrorResponse" + } + } + }, + "description": "Server Error" + } + }, + "summary": "Route Private Upload Put", + "tags": [ + "private-ltx-api" + ] + } } } } diff --git a/frontend/generated/backend-openapi.ts b/frontend/generated/backend-openapi.ts index 30dec08a7..9ce177ea2 100644 --- a/frontend/generated/backend-openapi.ts +++ b/frontend/generated/backend-openapi.ts @@ -462,6 +462,108 @@ export interface paths { patch?: never; trace?: never; }; + "/v1/audio-to-video": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Route Private Audio To Video */ + post: operations["route_private_audio_to_video_v1_audio_to_video_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/v1/image-to-video": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Route Private Image To Video */ + post: operations["route_private_image_to_video_v1_image_to_video_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/v1/retake": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Route Private Retake */ + post: operations["route_private_retake_v1_retake_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/v1/text-to-video": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Route Private Text To Video */ + post: operations["route_private_text_to_video_v1_text_to_video_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/v1/upload": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Route Private Upload Init */ + post: operations["route_private_upload_init_v1_upload_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/v1/upload/{upload_id}": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + /** Route Private Upload Put */ + put: operations["route_private_upload_put_v1_upload__upload_id__put"]; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; } export type webhooks = Record; export interface components { @@ -484,6 +586,10 @@ export interface components { promptEnhancerEnabledI2V?: boolean | null; /** Promptenhancerenabledt2V */ promptEnhancerEnabledT2V?: boolean | null; + /** Runpodapitoken */ + runpodApiToken?: string | null; + /** Runpodapiurl */ + runpodApiUrl?: string | null; /** Seedlocked */ seedLocked?: boolean | null; /** Uselocaltextencoder */ @@ -492,6 +598,8 @@ export interface components { useTorchCompile?: boolean | null; /** Userprefersltxapivideogenerations */ userPrefersLtxApiVideoGenerations?: boolean | null; + /** Videogenerationprovider */ + videoGenerationProvider?: ("local" | "ltx_api" | "runpod") | null; }; /** CancelCancellingResponse */ CancelCancellingResponse: { @@ -514,7 +622,7 @@ export interface components { /** CheckModelAccessRequest */ CheckModelAccessRequest: { /** Cp Ids */ - cp_ids?: ("ltx-2.3-22b-distilled" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; + cp_ids?: ("ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-spatial-upscaler-x2-1.1" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; }; /** CheckModelAccessResponse */ CheckModelAccessResponse: { @@ -544,11 +652,11 @@ export interface components { /** DownloadProgressRunningResponse */ DownloadProgressRunningResponse: { /** All Files */ - all_files: ("ltx-2.3-22b-distilled" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; + all_files: ("ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-spatial-upscaler-x2-1.1" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; /** Completed Files */ - completed_files: ("ltx-2.3-22b-distilled" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; + completed_files: ("ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-spatial-upscaler-x2-1.1" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; /** Current Downloading File */ - current_downloading_file: ("ltx-2.3-22b-distilled" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo") | null; + current_downloading_file: ("ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-spatial-upscaler-x2-1.1" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo") | null; /** Current File Progress */ current_file_progress: number; /** Error */ @@ -662,6 +770,8 @@ export interface components { * @enum {integer} */ duration: 5 | 6 | 8 | 10 | 12 | 14 | 16 | 18 | 20; + /** Enhanceprompt */ + enhancePrompt?: boolean | null; /** * Fps * @default 24 @@ -675,7 +785,7 @@ export interface components { * @default fast * @enum {string} */ - model: "fast" | "pro"; + model: "fast" | "fast_legacy" | "pro"; /** * Negativeprompt * @default @@ -890,7 +1000,7 @@ export interface components { /** ImageGenRecommendationResponse */ ImageGenRecommendationResponse: { /** Cp To Download */ - cp_to_download: ("ltx-2.3-22b-distilled" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo") | null; + cp_to_download: ("ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-spatial-upscaler-x2-1.1" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo") | null; }; /** LTXVideoGenerationModelSpecItem */ LTXVideoGenerationModelSpecItem: { @@ -898,7 +1008,7 @@ export interface components { * Pipeline * @enum {string} */ - pipeline: "fast" | "pro"; + pipeline: "fast" | "fast_legacy" | "pro"; spec: components["schemas"]["LTXVideoGenerationSpec"]; }; /** LTXVideoGenerationResolutionSpec */ @@ -924,7 +1034,7 @@ export interface components { /** LtxDownloadRecommendationResponse */ LtxDownloadRecommendationResponse: { /** Cps To Download */ - cps_to_download: ("ltx-2.3-22b-distilled" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; + cps_to_download: ("ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-spatial-upscaler-x2-1.1" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; /** * Status * @constant @@ -934,7 +1044,7 @@ export interface components { /** LtxIcLoraRecommendationResponse */ LtxIcLoraRecommendationResponse: { /** Cps To Download */ - cps_to_download: ("ltx-2.3-22b-distilled" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; + cps_to_download: ("ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-spatial-upscaler-x2-1.1" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; }; /** LtxInsufficientFundsErrorResponse */ LtxInsufficientFundsErrorResponse: { @@ -957,14 +1067,14 @@ export interface components { /** LtxUpgradeRecommendationResponse */ LtxUpgradeRecommendationResponse: { /** Cps To Delete */ - cps_to_delete: ("ltx-2.3-22b-distilled" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; + cps_to_delete: ("ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-spatial-upscaler-x2-1.1" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; /** Cps To Download */ - cps_to_download: ("ltx-2.3-22b-distilled" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; + cps_to_download: ("ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-spatial-upscaler-x2-1.1" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; /** * Ltx Model Id - * @constant + * @enum {string} */ - ltx_model_id: "ltx-2.3-22b-distilled"; + ltx_model_id: "ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1"; /** * Status * @constant @@ -976,12 +1086,12 @@ export interface components { /** ModelDeleteRequest */ ModelDeleteRequest: { /** Cp Ids */ - cp_ids?: ("ltx-2.3-22b-distilled" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; + cp_ids?: ("ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-spatial-upscaler-x2-1.1" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; }; /** ModelDownloadRequest */ ModelDownloadRequest: { /** Cp Ids */ - cp_ids?: ("ltx-2.3-22b-distilled" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; + cp_ids?: ("ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-spatial-upscaler-x2-1.1" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo")[]; /** * Type * @default download @@ -1012,6 +1122,171 @@ export interface components { /** Name */ name: string; }; + /** PrivateAudioToVideoRequest */ + PrivateAudioToVideoRequest: { + /** + * Aspect Ratio + * @default 16:9 + */ + aspect_ratio: string; + /** Audio Uri */ + audio_uri: string; + /** + * Duration + * @default 5 + */ + duration: number; + /** + * Enhance Prompt + * @default false + */ + enhance_prompt: boolean; + /** + * Fps + * @default 24 + */ + fps: number; + /** Image Uri */ + image_uri?: string | null; + /** + * Model + * @default fast + */ + model: string; + /** Prompt */ + prompt: string; + /** + * Resolution + * @default 1080p + */ + resolution: string; + }; + /** PrivateImageToVideoRequest */ + PrivateImageToVideoRequest: { + /** + * Aspect Ratio + * @default 16:9 + */ + aspect_ratio: string; + /** + * Camera Motion + * @default none + */ + camera_motion: string; + /** + * Duration + * @default 5 + */ + duration: number; + /** + * Enhance Prompt + * @default false + */ + enhance_prompt: boolean; + /** + * Fps + * @default 24 + */ + fps: number; + /** + * Generate Audio + * @default false + */ + generate_audio: boolean; + /** Image Uri */ + image_uri: string; + /** + * Model + * @default fast + */ + model: string; + /** Prompt */ + prompt: string; + /** + * Resolution + * @default 1080p + */ + resolution: string; + }; + /** PrivateRetakeRequest */ + PrivateRetakeRequest: { + /** Duration */ + duration: number; + /** + * Mode + * @default replace_audio_and_video + * @enum {string} + */ + mode: "replace_audio_and_video" | "replace_video" | "replace_audio"; + /** + * Prompt + * @default + */ + prompt: string; + /** Start Time */ + start_time: number; + /** Video Uri */ + video_uri: string; + }; + /** PrivateTextToVideoRequest */ + PrivateTextToVideoRequest: { + /** + * Aspect Ratio + * @default 16:9 + */ + aspect_ratio: string; + /** + * Camera Motion + * @default none + */ + camera_motion: string; + /** + * Duration + * @default 5 + */ + duration: number; + /** + * Enhance Prompt + * @default false + */ + enhance_prompt: boolean; + /** + * Fps + * @default 24 + */ + fps: number; + /** + * Generate Audio + * @default false + */ + generate_audio: boolean; + /** + * Model + * @default fast + */ + model: string; + /** Prompt */ + prompt: string; + /** + * Resolution + * @default 1080p + */ + resolution: string; + }; + /** PrivateUploadInitResponse */ + PrivateUploadInitResponse: { + /** + * Required Headers + * @default {} + */ + required_headers: { + [key: string]: string; + }; + /** Storage Uri */ + storage_uri: string; + /** Upload Url */ + upload_url: string; + }; /** RetakeCancelledResponse */ RetakeCancelledResponse: { /** @@ -1084,6 +1359,11 @@ export interface components { * @default false */ hasLtxApiKey: boolean; + /** + * Hasrunpodapitoken + * @default false + */ + hasRunpodApiToken: boolean; /** * Lockedseed * @default 42 @@ -1109,6 +1389,11 @@ export interface components { * @default true */ promptEnhancerEnabledT2V: boolean; + /** + * Runpodapiurl + * @default + */ + runpodApiUrl: string; /** * Seedlocked * @default false @@ -1129,6 +1414,12 @@ export interface components { * @default false */ userPrefersLtxApiVideoGenerations: boolean; + /** + * Videogenerationprovider + * @default local + * @enum {string} + */ + videoGenerationProvider: "local" | "ltx_api" | "runpod"; }; /** StatusResponse */ StatusResponse: { @@ -1179,7 +1470,7 @@ export interface components { /** TextEncoderRecommendationResponse */ TextEncoderRecommendationResponse: { /** Cp To Download */ - cp_to_download: ("ltx-2.3-22b-distilled" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo") | null; + cp_to_download: ("ltx-2.3-22b-distilled" | "ltx-2.3-22b-distilled-1.1" | "ltx-2.3-spatial-upscaler-x2-1.0" | "ltx-2.3-spatial-upscaler-x2-1.1" | "ltx-2.3-22b-ic-lora-union-control-ref0.5" | "dpt-hybrid-midas" | "yolox-l-torchscript" | "dw-ll-ucoco-384-bs5" | "gemma-3-12b-it-qat-q4_0-unquantized" | "z-image-turbo") | null; /** Expected Size Bytes */ expected_size_bytes: number; /** Expected Size Gb */ @@ -2277,4 +2568,252 @@ export interface operations { }; }; }; + route_private_audio_to_video_v1_audio_to_video_post: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["PrivateAudioToVideoRequest"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": unknown; + }; + }; + /** @description Client Error */ + "4XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPErrorResponse"]; + }; + }; + /** @description Server Error */ + "5XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPErrorResponse"]; + }; + }; + }; + }; + route_private_image_to_video_v1_image_to_video_post: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["PrivateImageToVideoRequest"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": unknown; + }; + }; + /** @description Client Error */ + "4XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPErrorResponse"]; + }; + }; + /** @description Server Error */ + "5XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPErrorResponse"]; + }; + }; + }; + }; + route_private_retake_v1_retake_post: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["PrivateRetakeRequest"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": unknown; + }; + }; + /** @description Client Error */ + "4XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPErrorResponse"]; + }; + }; + /** @description Server Error */ + "5XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPErrorResponse"]; + }; + }; + }; + }; + route_private_text_to_video_v1_text_to_video_post: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["PrivateTextToVideoRequest"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": unknown; + }; + }; + /** @description Client Error */ + "4XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPErrorResponse"]; + }; + }; + /** @description Server Error */ + "5XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPErrorResponse"]; + }; + }; + }; + }; + route_private_upload_init_v1_upload_post: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["PrivateUploadInitResponse"]; + }; + }; + /** @description Client Error */ + "4XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPErrorResponse"]; + }; + }; + /** @description Server Error */ + "5XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPErrorResponse"]; + }; + }; + }; + }; + route_private_upload_put_v1_upload__upload_id__put: { + parameters: { + query?: never; + header?: never; + path: { + upload_id: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + [key: string]: string; + }; + }; + }; + /** @description Client Error */ + "4XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPErrorResponse"]; + }; + }; + /** @description Server Error */ + "5XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPErrorResponse"]; + }; + }; + }; + }; } diff --git a/runpod/Dockerfile b/runpod/Dockerfile new file mode 100644 index 000000000..3a42df342 --- /dev/null +++ b/runpod/Dockerfile @@ -0,0 +1,30 @@ +FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu22.04 + +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 +ENV LTX_APP_DATA_DIR=/workspace/ltx-data +ENV LTX_PORT=8000 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + curl \ + ffmpeg \ + git \ + libgl1 \ + libglib2.0-0 \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +ENV PATH="/root/.local/bin:${PATH}" + +WORKDIR /app/backend +COPY backend/pyproject.toml backend/uv.lock ./ +RUN uv sync --frozen + +COPY backend ./ +COPY runpod/start.sh /usr/local/bin/ltx-runpod-start +RUN chmod +x /usr/local/bin/ltx-runpod-start + +EXPOSE 8000 +CMD ["ltx-runpod-start"] diff --git a/runpod/start.sh b/runpod/start.sh new file mode 100644 index 000000000..b3b17641a --- /dev/null +++ b/runpod/start.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euo pipefail + +export LTX_APP_DATA_DIR="${LTX_APP_DATA_DIR:-/workspace/ltx-data}" +export LTX_PORT="${LTX_PORT:-8000}" + +mkdir -p "${LTX_APP_DATA_DIR}" + +if [[ -z "${RUNPOD_PRIVATE_API_TOKEN:-${LTX_AUTH_TOKEN:-}}" ]]; then + echo "RUNPOD_PRIVATE_API_TOKEN or LTX_AUTH_TOKEN must be set." >&2 + exit 1 +fi + +exec uv run python runpod_server.py