diff --git a/.env.example b/.env.example index 2cfcbd5b..0af31cab 100644 --- a/.env.example +++ b/.env.example @@ -12,9 +12,8 @@ ADMIN_CORS_ORIGINS=http://localhost:5173 # Server port (default: 3001) PORT=3001 -# Number of available rotating proxy IPs (from llm-proxy pool). -# When unset or 0, IP capacity checks are skipped (backward compatible). -# PROXY_IP_COUNT=3 +# LLM proxy router URL (set automatically by pnpm run deploy-proxy) +LLM_PROXY_URL=https://llm-proxy-router..workers.dev # Maximum serial fallback attempts before giving up and returning an error (default: 5) MAX_FALLBACK_RETRIES=5 diff --git a/.roo/specs/zero-configuration-deployment/design.md b/.roo/specs/zero-configuration-deployment/design.md new file mode 100644 index 00000000..046a282c --- /dev/null +++ b/.roo/specs/zero-configuration-deployment/design.md @@ -0,0 +1,405 @@ +# Zero-Configuration Deployment — Design + +## Architecture Overview + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ Operator's Machine │ +│ │ +│ ┌───────────────────────────────────────────────────────────────┐ │ +│ │ freellmapi-alpha (monorepo) │ │ +│ │ │ │ +│ │ .env ←── scripts/setup.ts (non-interactive, auto-generates) │ │ +│ │ │ │ │ +│ │ ├── server/ (Express API + dashboard) │ │ +│ │ │ └── src/index.ts ← topology reconciliation at startup │ │ +│ │ ├── client/ (React dashboard UI) │ │ +│ │ ├── shared/ (shared types) │ │ +│ │ ├── scripts/ │ │ +│ │ │ ├── setup.ts (non-interactive setup) │ │ +│ │ │ ├── deploy-proxy.ts (auto-deploy llm-proxy) │ │ +│ │ │ ├── reconcile-topology.ts (drift detection + reconcile) │ │ +│ │ │ ├── verify-deploy.ts (post-deploy verification) │ │ +│ │ │ ├── sync-secrets.ts (secret sync — unchanged) │ │ +│ │ │ └── rotate-secrets.ts (secret rotation — unchanged) │ │ +│ │ ├── install.sh (Linux/macOS bootstrap) │ │ +│ │ ├── install.ps1 (Windows bootstrap) │ │ +│ │ └── llm-proxy/ (git submodule) │ │ +│ │ │ │ │ +│ │ └── .env ←── same INTERNAL_AUTH_SECRET, no ROUTER_DOMAIN│ │ +│ └───────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ │ deploy-proxy.ts │ +│ ▼ │ +│ ┌───────────────────────────────────────────────────────────────┐ │ +│ │ Cloudflare Workers │ │ +│ │ │ │ +│ │ llm-proxy-router..workers.dev │ │ +│ │ │ │ │ +│ │ ├── /internal/v1/topology (dynamic, from env.PROXY_COUNT)│ │ +│ │ └── PROXY_1, PROXY_2, ... PROXY_N │ │ +│ └───────────────────────────────────────────────────────────────┘ │ +│ ▲ │ +│ │ topology discovery + reconciliation │ +│ freellmapi server ───────┘ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## End-to-End Flow + +### Installation Flow (Zero-Interactive) + +``` +install.sh / install.ps1 + │ + ├── 1. Git submodules (git submodule update --init) + ├── 2. Install dependencies (pnpm install, cd llm-proxy && npm install) + ├── 3. Validate prerequisites (node, pnpm, wrangler, wrangler whoami) + └── 4. Invoke: pnpm run setup -- --non-interactive + │ + ├── 4a. Generate all secrets (auto, no prompts) + │ ├── ENCRYPTION_KEY = crypto.randomBytes(32).toString('hex') + │ ├── ADMIN_DASHBOARD_KEY = 'freellmapi-admin-' + crypto.randomBytes(32).toString('hex') + │ ├── INTERNAL_AUTH_SECRET = crypto.randomBytes(32).toString('hex') + │ └── AUTH_KEY = crypto.randomBytes(12).toString('base64url') + │ + ├── 4b. Write .env files + │ ├── freellmapi-alpha/.env: ENCRYPTION_KEY, ADMIN_DASHBOARD_KEY, + │ │ INTERNAL_AUTH_SECRET, LLM_PROXY_URL (placeholder) + │ └── llm-proxy/.env: AUTH_KEY, INTERNAL_AUTH_SECRET, PROXY_COUNT=1 + │ + └── 4c. Invoke: pnpm run deploy-proxy + │ + ├── 4c-i. Read AUTH_KEY, INTERNAL_AUTH_SECRET, PROXY_COUNT from llm-proxy/.env + ├── 4c-ii. Generate TOML configs (no custom domain, workers.dev only) + ├── 4c-iii. Deploy proxy workers via wrangler deploy + ├── 4c-iv. Deploy router worker via wrangler deploy + ├── 4c-v. Capture router URL from wrangler output + │ (regex: https://llm-proxy-router\.[^.]+\.workers\.dev) + ├── 4c-vi. Write LLM_PROXY_URL to freellmapi-alpha/.env + └── 4c-vii. Print summary with router URL +``` + +### Startup Flow (Self-Healing) + +``` +server/src/index.ts → main() + │ + ├── 1. assertAdminAuthConfigured() + ├── 2. initDb() + ├── 3. await initTopology() ← existing: fetches /internal/v1/topology + ├── 4. await reconcileTopology() ← NEW: detect drift, auto-reconcile + │ │ + │ ├── 4a. Query DB for expected worker count: + │ │ SELECT MAX(key_count) FROM ( + │ │ SELECT COUNT(*) as key_count FROM api_keys WHERE enabled = 1 GROUP BY platform + │ │ ) + │ │ Default: 1 if no keys exist + │ │ + │ ├── 4b. Read actual worker count from topology.workerCount + │ │ + │ ├── 4c. Compare: + │ │ ├── actual >= expected → no action (over-provisioning OK) + │ │ └── actual < expected → drift detected + │ │ + │ └── 4d. If drift detected: + │ ├── Log: "Topology drift: {actual} workers, expected {expected}" + │ ├── Read current AUTH_KEY, INTERNAL_AUTH_SECRET from llm-proxy/.env + │ ├── Generate new TOML configs with updated PROXY_COUNT + │ ├── Deploy all workers via wrangler deploy + │ ├── Verify new topology endpoint returns updated workerCount + │ └── Log result (success/failure) + │ + ├── 5. createApp() + └── 6. startHealthChecker() +``` + +## Component Designs + +### 1. Non-Interactive Setup (`scripts/setup.ts`) + +**Change**: Remove all `await prompt()` calls. All values are auto-generated or auto-detected. + +**Current blocking prompts**: +- Line 168-177: `await prompt('Enter AUTH_KEY ...')` → auto-generate via `generateAuthKey()` +- Line 183-189: `await prompt('Enter router domain ...')` → removed entirely (no ROUTER_DOMAIN) + +**New behavior**: +- `--non-interactive` is the default mode (no flag needed) +- `--interactive` flag enables the old prompt-based flow for advanced users +- AUTH_KEY: always auto-generated using `crypto.randomBytes(12).toString('base64url')` (16 chars, URL-safe) +- ROUTER_DOMAIN: removed from setup.ts entirely (handled by deploy-proxy.ts) +- LLM_PROXY_URL: set to placeholder `https://llm-proxy-router..workers.dev` after deploy-proxy completes + +**Backward compatibility**: The `--interactive` flag preserves the old behavior for users who want manual control. + +### 2. Auto-Deploy Script (`scripts/deploy-proxy.ts`) + +**New file**: `scripts/deploy-proxy.ts` — orchestrates llm-proxy deployment from freellmapi. + +**Responsibilities**: +1. Read `AUTH_KEY`, `INTERNAL_AUTH_SECRET`, `PROXY_COUNT` from `llm-proxy/.env` +2. Generate TOML configs for all proxy workers and the router +3. Deploy all workers via `wrangler deploy` +4. Capture the router URL from wrangler output +5. Write `LLM_PROXY_URL` to `freellmapi-alpha/.env` + +**TOML generation changes** (vs existing `llm-proxy/scripts/deploy.ts`): +- Router TOML: **no `routes` field** (uses `*.workers.dev` subdomain automatically) +- Router TOML: **no `ROUTER_DOMAIN` env var** (removed) +- Proxy TOML: unchanged (still uses `WORKER_ROLE=proxy`, `PROXY_INDEX`, `INTERNAL_AUTH_SECRET`) +- Worker names: `llm-proxy-01`, `llm-proxy-02`, etc. (unchanged) + +**URL capture from wrangler output**: +Wrangler deploy output includes lines like: +``` +Uploaded llm-proxy-router (2.34 sec) +Published llm-proxy-router (0.45 sec): https://llm-proxy-router..workers.dev +``` +Regex to extract: `https://llm-proxy-router\.[a-zA-Z0-9-]+\.workers\.dev` + +**Idempotency**: `wrangler deploy` updates existing workers by name. No duplicate workers are created on re-run. + +### 3. Dynamic Topology Endpoint (`llm-proxy/src/router.ts`) + +**Change**: Replace static `TOPOLOGY` import with dynamic generation from `env.PROXY_COUNT`. + +**Current code** (line 4, 23): +```typescript +import { TOPOLOGY } from "./generated/topology"; +// ... +return jsonResponse(TOPOLOGY); +``` + +**New code**: +```typescript +// Remove: import { TOPOLOGY } from "./generated/topology"; +// ... +const proxyCount = Number(env.PROXY_COUNT); +const proxies = Array.from({ length: proxyCount }, (_, i) => ({ + id: i, + name: `llm-proxy-${String(i + 1).padStart(2, "0")}`, + status: "active" as const, +})); +const topologyGeneratedAt = Math.floor(Date.now() / 1000); +const hashInput = JSON.stringify({ schemaVersion: 1, workerCount: proxyCount, proxies }); +const topologyId = `sha256:${crypto.createHash("sha256").update(hashInput).digest("hex")}`; +return jsonResponse({ + schemaVersion: 1, + topologyId, + topologyGeneratedAt, + workerCount: proxyCount, + proxies, +}); +``` + +**Schema compatibility**: The response shape is identical to the current static `TOPOLOGY` object — same field names, types, and structure. The `topologyId` uses the same hash algorithm as `deploy.ts:generateTopologyModule()`. + +**Note**: The `crypto` import needs to be added to `router.ts` for the `topologyId` hash. + +### 4. Topology Reconciliation (`scripts/reconcile-topology.ts`) + +**New file**: `scripts/reconcile-topology.ts` — detects and fixes topology drift. + +**Algorithm**: +1. Read `LLM_PROXY_URL` and `INTERNAL_AUTH_SECRET` from freellmapi `.env` +2. Fetch current topology from `${LLM_PROXY_URL}/internal/v1/topology` +3. Query DB for expected worker count: + ```sql + SELECT MAX(key_count) FROM ( + SELECT COUNT(*) as key_count FROM api_keys WHERE enabled = 1 GROUP BY platform + ) + ``` + Default to `1` if result is `null` or `0`. +4. Compare `topology.workerCount` vs expected count +5. If `topology.workerCount >= expected`: log "No drift detected", exit 0 +6. If `topology.workerCount < expected`: + - Log drift detection with current vs expected counts + - Read `AUTH_KEY`, `INTERNAL_AUTH_SECRET` from `llm-proxy/.env` + - Set `PROXY_COUNT` in `llm-proxy/.env` to the expected count + - Generate new TOML configs with updated worker count + - Deploy all workers via `wrangler deploy` + - Verify new topology returns updated `workerCount` + - Log result + +**Invocation**: +- Automatic: called from `server/src/index.ts` during startup (async, non-blocking) +- Manual: `pnpm run reconcile-topology` +- Dry-run: `pnpm run reconcile-topology -- --dry-run` + +**Failure handling**: If reconciliation fails (e.g., wrangler not authenticated, network error), log a clear error and continue. The server operates with the current (under-provisioned) topology. + +### 5. Worker Count from Provider Keys + +**Source of truth**: `api_keys` table in SQLite. + +**Query**: +```sql +SELECT MAX(key_count) FROM ( + SELECT COUNT(*) as key_count FROM api_keys WHERE enabled = 1 GROUP BY platform +) +``` + +**Interpretation**: +- Result `null` (no keys): default to `1` (minimum viable pool) +- Result `0` (all keys disabled): default to `1` +- Result `N`: use `N` as the expected worker count + +**Rationale**: The maximum key count across platforms represents the peak concurrent load the system needs to handle. Each key gets one worker slot (REQ-KS1: one key = one worker). + +### 6. Eliminate PROXY_IP_COUNT + +**Changes**: +- Remove `PROXY_IP_COUNT` from `.env.example` +- Remove `PROXY_IP_COUNT` from `ipPoolCapacity.ts` fallback chain +- `getWorkerCount()` in `ipPoolCapacity.ts`: only uses dynamic topology (no env fallback) +- `isStickyRoutingEnabled()`: only checks `isDynamicTopologyAvailable()` (no env check) +- Deprecation warning: if `PROXY_IP_COUNT` is set in `.env`, log a warning on startup: + ``` + [deprecation] PROXY_IP_COUNT is deprecated and ignored. Worker count is now derived from provider API keys. + ``` + +### 7. Eliminate ROUTER_DOMAIN + +**Changes**: +- Remove `ROUTER_DOMAIN` from `llm-proxy/.env.example` +- Remove `ROUTER_DOMAIN` from `llm-proxy/scripts/deploy.ts` (no longer needed) +- Remove `ROUTER_DOMAIN` env var from router TOML generation +- Router TOML: no `routes` field (workers.dev subdomain is automatic) +- `llm-proxy/src/router.ts`: remove `ROUTER_DOMAIN` from env type and usage + - The encoder page (line 34) uses `env.ROUTER_DOMAIN || url.hostname` → change to `url.hostname` + +### 8. Eliminate PROXY_COUNT (as a static config) + +**Changes**: +- `PROXY_COUNT` is still written to `llm-proxy/.env` but is now managed by freellmapi +- Initial value: `1` (single proxy for new installations) +- Updated by `reconcile-topology.ts` when drift is detected +- Removed from interactive setup (no prompt) +- The `llm-proxy/scripts/deploy.ts` script still reads `PROXY_COUNT` from env for manual deployments + +### 9. Updated `install.sh` / `install.ps1` + +**Changes**: +- After `pnpm run setup` completes, the setup script itself triggers `pnpm run deploy-proxy` +- No change to the install script's own flow — it already invokes `pnpm run setup` +- The setup script's `--non-interactive` mode triggers deploy-proxy automatically + +### 10. Updated `server/src/index.ts` + +**Change**: Add topology reconciliation call after `initTopology()`. + +```typescript +import { reconcileTopology } from './services/topologyReconciliation.js'; + +async function main() { + assertAdminAuthConfigured(); + initDb(); + await initTopology(); + await reconcileTopology(); // NEW: non-blocking drift detection + reconciliation + const app = createApp(); + // ... +} +``` + +**New file**: `server/src/services/topologyReconciliation.ts` — wraps `scripts/reconcile-topology.ts` logic as an importable module. + +## File Changes + +| File | Change | +|---|---| +| `scripts/setup.ts` | Remove `await prompt()` for AUTH_KEY (auto-generate). Remove ROUTER_DOMAIN prompt entirely. Add `--non-interactive` as default. | +| `scripts/deploy-proxy.ts` | **NEW**. Orchestrates llm-proxy deployment, captures workers.dev URL, writes LLM_PROXY_URL. | +| `scripts/reconcile-topology.ts` | **NEW**. Drift detection + automatic reconciliation script. | +| `server/src/services/topologyReconciliation.ts` | **NEW**. Importable module wrapping reconcile logic for startup call. | +| `server/src/index.ts` | Add `await reconcileTopology()` after `initTopology()`. | +| `llm-proxy/src/router.ts` | Remove `import { TOPOLOGY } from "./generated/topology"`. Generate topology dynamically from `env.PROXY_COUNT`. Remove `ROUTER_DOMAIN` usage. | +| `llm-proxy/scripts/deploy.ts` | Remove `requireEnv("ROUTER_DOMAIN")`. Remove `routes` from router TOML. Remove `ROUTER_DOMAIN` from router TOML vars. Remove `generateTopologyModule()` call. | +| `llm-proxy/.env.example` | Remove `ROUTER_DOMAIN`. Remove `PROXY_COUNT` (commented out with note: managed by freellmapi). | +| `.env.example` | Remove `PROXY_IP_COUNT` entry. Add `LLM_PROXY_URL` entry. | +| `server/src/services/ipPoolCapacity.ts` | Remove `PROXY_IP_COUNT` fallback from `getWorkerCount()`. Remove `PROXY_IP_COUNT` check from `isStickyRoutingEnabled()`. Add deprecation warning if `PROXY_IP_COUNT` is set. | +| `server/src/services/proxyTopology.ts` | No changes needed (already fetches from endpoint). | +| `install.sh` | No changes needed (already invokes `pnpm run setup` which triggers deploy-proxy). | +| `install.ps1` | No changes needed (same as above). | + +## Data Flow Diagrams + +### Secret Generation and Flow + +``` +scripts/setup.ts (non-interactive) + │ + ├── generateHexSecret() → ENCRYPTION_KEY → freellmapi-alpha/.env + ├── generateAdminKey() → ADMIN_DASHBOARD_KEY → freellmapi-alpha/.env + ├── generateHexSecret() → INTERNAL_AUTH_SECRET → freellmapi-alpha/.env + llm-proxy/.env + └── generateAuthKey() → AUTH_KEY → llm-proxy/.env + +scripts/deploy-proxy.ts + │ + ├── Reads from llm-proxy/.env: AUTH_KEY, INTERNAL_AUTH_SECRET, PROXY_COUNT + ├── Generates TOML configs with these values + ├── Deploys workers + ├── Captures workers.dev URL + └── Writes LLM_PROXY_URL → freellmapi-alpha/.env + +At runtime: + │ + ├── llm-proxy router: reads INTERNAL_AUTH_SECRET, AUTH_KEY, PROXY_COUNT from env (TOML vars) + └── freellmapi server: reads INTERNAL_AUTH_SECRET, LLM_PROXY_URL from .env +``` + +### Topology Lifecycle + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Topology Lifecycle │ +│ │ +│ Deploy Time Runtime │ +│ ─────────── ─────── │ +│ deploy-proxy.ts index.ts → initTopology() │ +│ ├── Generate TOML (PROXY_COUNT) ├── Fetch /internal/v1/topology│ +│ ├── wrangler deploy ├── Cache snapshot │ +│ └── Capture URL └── Log worker count │ +│ │ +│ Reconcile Time Capacity Service │ +│ ─────────────── ──────────────── │ +│ reconcile-topology.ts ipPoolCapacity.ts │ +│ ├── Query DB for key counts ├── getWorkerCount() │ +│ ├── Compare vs topology │ └── topology.getWorkerCount()│ +│ ├── If drift: redeploy ├── allocateIpForKey() │ +│ └── Verify new topology └── releaseIpForKey() │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Error Handling + +| Condition | Component | Behavior | +|---|---|---| +| AUTH_KEY not set | `setup.ts` | Auto-generate (no prompt) | +| ROUTER_DOMAIN not set | `deploy-proxy.ts` | Use workers.dev (no custom domain needed) | +| wrangler not authenticated | `deploy-proxy.ts` | Fail with clear error + instructions | +| Deploy fails | `deploy-proxy.ts` | Retry up to 3x, then fail with manual recovery instructions | +| Topology fetch fails | `reconcile-topology.ts` | Log warning, skip reconciliation, continue startup | +| Reconciliation fails | `reconcile-topology.ts` | Log error, continue with current topology | +| PROXY_IP_COUNT set in .env | `ipPoolCapacity.ts` | Log deprecation warning, ignore value | +| DB query returns null | `reconcile-topology.ts` | Default to 1 worker | +| wrangler output doesn't match URL regex | `deploy-proxy.ts` | Fail with error: "Could not detect router URL from wrangler output" | + +## Backward Compatibility + +| Scenario | Behavior | +|---|---| +| Existing deployment with ROUTER_DOMAIN set | Preserved. `setup.ts --interactive` can still set it. `deploy-proxy.ts` uses it if present. | +| Existing deployment with PROXY_COUNT set | Preserved. `reconcile-topology.ts` updates it when drift detected. | +| Existing deployment with PROXY_IP_COUNT set | Deprecated. Logged as warning. Ignored by capacity service. | +| Manual `cd llm-proxy && npm run deploy` | Still works. `deploy.ts` still reads PROXY_COUNT from env. | +| Existing topology response schema | Unchanged. Dynamic endpoint returns identical shape. | +| Existing `.env` files | Never overwritten by `setup.ts`. Missing keys are appended. | + +## Security Considerations + +- `.env` files are created with restrictive permissions (`chmod 600`) where supported +- Secrets are never logged to stdout (only key names and confirmation) +- `INTERNAL_AUTH_SECRET` is synchronized between both `.env` files automatically +- Generated AUTH_KEY is 16 characters (12 bytes base64url), sufficient for URL path segment security +- No secrets are embedded in committed source code or TOML files diff --git a/.roo/specs/zero-configuration-deployment/requirements.md b/.roo/specs/zero-configuration-deployment/requirements.md new file mode 100644 index 00000000..8ecf4314 --- /dev/null +++ b/.roo/specs/zero-configuration-deployment/requirements.md @@ -0,0 +1,231 @@ +# Zero-Configuration Deployment Requirements + +## Overview + +This specification defines the requirements for transforming freellmapi into a true zero-configuration deployment experience. A first-time user with only Wrangler authentication must be able to install and run the entire stack without editing any `.env` files or understanding Cloudflare Workers, domains, topology, routing, or capacity management. + +The target user journey is: +1. Install Node.js +2. Run `wrangler login` +3. Run `install.sh` or `install.ps1` +4. Start freellmapi (`pnpm dev` or `pnpm start`) + +Everything between `wrangler login` and `pnpm dev` must be fully automated. + +--- + +## REQ-Z1: Zero-Interactive Installation + +The installation process must require zero interactive prompts beyond the initial `wrangler login` (which is a Cloudflare prerequisite). + +### Requirements + +- **REQ-Z1.1**: The `install.sh` and `install.ps1` scripts must complete the entire installation without prompting the user for any configuration values. +- **REQ-Z1.2**: The `setup.ts` script must operate in fully non-interactive mode by default. All values must be auto-generated or auto-detected. +- **REQ-Z1.3**: The `--non-interactive` flag (or absence of `--interactive`) must be the default mode for `setup.ts`. +- **REQ-Z1.4**: If a required value cannot be auto-generated or auto-detected, the script must fail with a clear error message and non-zero exit code rather than prompting. +- **REQ-Z1.5**: The `setup.ts` script must support an optional `--interactive` flag for advanced users who want manual control. + +--- + +## REQ-Z2: Auto-Deploy llm-proxy + +The installer must automatically deploy llm-proxy to Cloudflare Workers as part of the installation process. + +### Requirements + +- **REQ-Z2.1**: After `setup.ts` completes, `install.sh` / `install.ps1` must automatically invoke the llm-proxy deployment process. +- **REQ-Z2.2**: The deployment must use `wrangler deploy` to deploy all proxy workers and the router worker. +- **REQ-Z2.3**: The deployment must handle Wrangler authentication failures gracefully with a clear error message. +- **REQ-Z2.4**: The deployment must verify that all workers were deployed successfully before proceeding. +- **REQ-Z2.5**: If deployment fails, the script must report the failure clearly and provide manual recovery instructions. +- **REQ-Z2.6**: The deployment step must be idempotent — re-running install must not create duplicate workers. + +--- + +## REQ-Z3: Auto-Detect workers.dev URL + +When no custom domain is configured, the system must automatically use Cloudflare's free `*.workers.dev` subdomain. + +### Requirements + +- **REQ-Z3.1**: The llm-proxy deploy process must default to deploying the router to `*.workers.dev` when no `ROUTER_DOMAIN` is configured. +- **REQ-Z3.2**: The deploy script must capture the deployed router URL from `wrangler deploy` output. +- **REQ-Z3.3**: The captured URL must be written to both `llm-proxy/.env` as `ROUTER_DOMAIN` and to the freellmapi `.env` as `LLM_PROXY_URL`. +- **REQ-Z3.4**: If a custom `ROUTER_DOMAIN` is already configured in `llm-proxy/.env`, it must be preserved (not overwritten with `*.workers.dev`). +- **REQ-Z3.5**: The `ROUTER_DOMAIN` value must not include the `https://` prefix — it must be a bare domain (e.g., `llm-proxy-router..workers.dev`). + +--- + +## REQ-Z4: Auto-Configure LLM_PROXY_URL + +The freellmapi server's `LLM_PROXY_URL` must be automatically set to the deployed router's URL. + +### Requirements + +- **REQ-Z4.1**: After llm-proxy deployment, the deploy process must write `LLM_PROXY_URL` to the freellmapi `.env` file. +- **REQ-Z4.2**: `LLM_PROXY_URL` must be in the format `https://{ROUTER_DOMAIN}`. +- **REQ-Z4.3**: If `LLM_PROXY_URL` is already set in `.env`, it must be preserved unless `--regenerate` is specified. +- **REQ-Z4.4**: The value must be validated as a valid HTTPS URL before writing. + +--- + +## REQ-Z5: Auto-Generate All Secrets + +All secrets must be auto-generated with no user input required. + +### Requirements + +- **REQ-Z5.1**: `ENCRYPTION_KEY` — auto-generate using `crypto.randomBytes(32).toString('hex')` (64 hex chars). +- **REQ-Z5.2**: `ADMIN_DASHBOARD_KEY` — auto-generate using `'freellmapi-admin-' + crypto.randomBytes(32).toString('hex')`. +- **REQ-Z5.3**: `INTERNAL_AUTH_SECRET` — auto-generate using `crypto.randomBytes(32).toString('hex')` and synchronize to both `.env` files. +- **REQ-Z5.4**: `AUTH_KEY` — auto-generate using a secure random string (minimum 16 characters). No user prompt. +- **REQ-Z5.5**: All secrets must be preserved on re-run (never overwrite existing values unless `--regenerate` is specified). +- **REQ-Z5.6**: Generated secrets must be printed to stdout with a warning to back them up. + +--- + +## REQ-Z6: Dynamic Topology (Eliminate Static topology.ts) + +The router must generate topology responses dynamically from environment variables, eliminating the static `src/generated/topology.ts` module. + +### Requirements + +- **REQ-Z6.1**: The `/internal/v1/topology` endpoint on the router must generate its response dynamically from `env.PROXY_COUNT` and the service bindings available on the router. +- **REQ-Z6.2**: The response schema must remain backwards-compatible with the existing `TopologySnapshot` interface (same `schemaVersion`, `topologyId`, `topologyGeneratedAt`, `workerCount`, `proxies` fields). +- **REQ-Z6.3**: The `topologyId` must be computed as a deterministic hash of the topology-defining fields (same algorithm as current `deploy.ts`). +- **REQ-Z6.4**: The `proxies` array must be generated by iterating from `1` to `env.PROXY_COUNT`, creating entries with `id: i-1`, `name: llm-proxy-{i}`, `status: "active"`. +- **REQ-Z6.5**: The static `src/generated/topology.ts` file must no longer be imported by the router. +- **REQ-Z6.6**: The `import { TOPOLOGY } from "./generated/topology"` statement must be removed from `router.ts`. +- **REQ-Z6.7**: The `generateTopologyModule()` function in `deploy.ts` must be removed (no longer needed). + +--- + +## REQ-Z7: Worker Count from Provider Keys + +The system must automatically determine the required worker count from the number of enabled API keys per provider platform. + +### Requirements + +- **REQ-Z7.1**: On startup, freellmapi must query the database for the maximum count of enabled API keys across all platforms: + ```sql + SELECT MAX(key_count) FROM ( + SELECT COUNT(*) as key_count FROM api_keys WHERE enabled = 1 GROUP BY platform + ) + ``` +- **REQ-Z7.2**: If no API keys exist, the default worker count must be `1` (minimum viable pool). +- **REQ-Z7.3**: The computed worker count must be compared against the topology's `workerCount` to detect drift. +- **REQ-Z7.4**: The worker count must be configurable via `PROXY_COUNT` in `llm-proxy/.env` for manual override. +- **REQ-Z7.5**: The initial deployment must default to `PROXY_COUNT=1` (single proxy) to minimize resource usage for new installations. + +--- + +## REQ-Z8: Startup Drift Detection + +The freellmapi server must detect topology drift on every startup. + +### Requirements + +- **REQ-Z8.1**: During startup, after fetching topology, the server must compare `topology.workerCount` against the computed expected worker count from REQ-Z7. +- **REQ-Z8.2**: If `topology.workerCount < expectedCount`, drift is detected and reconciliation must be triggered. +- **REQ-Z8.3**: If `topology.workerCount >= expectedCount`, no action is needed (over-provisioning is acceptable). +- **REQ-Z8.4**: Drift detection must be logged with clear messages indicating current vs expected worker count. +- **REQ-Z8.5**: Drift detection must not block server startup — reconciliation runs asynchronously. + +--- + +## REQ-Z9: Automatic Reconciliation + +When drift is detected, the system must automatically reconcile the topology. + +### Requirements + +- **REQ-Z9.1**: A `scripts/reconcile-topology.ts` script must exist that performs topology reconciliation. +- **REQ-Z9.2**: The reconciliation script must: + 1. Read the expected worker count from the database + 2. Generate new TOML configs for all proxy workers and the router + 3. Run `wrangler deploy` for each worker + 4. Verify the deployment by fetching the topology endpoint +- **REQ-Z9.3**: The reconciliation must be triggered automatically by the server on startup when drift is detected. +- **REQ-Z9.4**: The reconciliation must be manually triggerable via `pnpm run reconcile-topology`. +- **REQ-Z9.5**: If reconciliation fails, the server must log a clear error and continue operating with the current topology (degraded but functional). +- **REQ-Z9.6**: The reconciliation script must support `--dry-run` mode. + +--- + +## REQ-Z10: Eliminate PROXY_IP_COUNT + +The `PROXY_IP_COUNT` environment variable must be removed from freellmapi's configuration. + +### Requirements + +- **REQ-Z10.1**: The `PROXY_IP_COUNT` entry must be removed from `.env.example`. +- **REQ-Z10.2**: The `ipPoolCapacity.ts` service must obtain worker count exclusively from the dynamic topology client. +- **REQ-Z10.3**: The fallback chain in `ipPoolCapacity.ts` must be: dynamic topology → 0 (disabled). +- **REQ-Z10.4**: The `isStickyRoutingEnabled()` function must check only `isDynamicTopologyAvailable()`, not `PROXY_IP_COUNT`. +- **REQ-Z10.5**: All references to `PROXY_IP_COUNT` in code and documentation must be removed. +- **REQ-Z10.6**: Existing installations with `PROXY_IP_COUNT` set must log a deprecation warning on startup but continue to function. + +--- + +## REQ-Z11: Idempotent Re-Installation + +Re-running the install process must be safe and must not disrupt a working system. + +### Requirements + +- **REQ-Z11.1**: Re-running `install.sh` / `install.ps1` must not overwrite existing `.env` files. +- **REQ-Z11.2**: Re-running `setup.ts` must not regenerate existing secrets. +- **REQ-Z11.3**: Re-running the llm-proxy deployment must update existing workers (not create duplicates). +- **REQ-Z11.4**: Re-running must not reset the database or lose any data. +- **REQ-Z11.5**: Each step must report what it checked and whether changes were made. + +--- + +## REQ-Z12: Backwards Compatibility + +Changes must not break existing deployments that use the current configuration model. + +### Requirements + +- **REQ-Z12.1**: The `/internal/v1/topology` endpoint response schema must remain identical (same field names, types, and structure). +- **REQ-Z12.2**: Existing `PROXY_IP_COUNT` configurations must continue to work during a transition period (deprecated but functional). +- **REQ-Z12.3**: The `ROUTER_DOMAIN` configuration in `llm-proxy/.env` must continue to work for users with custom domains. +- **REQ-Z12.4**: The `LLM_PROXY_URL` configuration in freellmapi's `.env` must continue to work. +- **REQ-Z12.5**: The `llm-proxy/scripts/deploy.ts` script must continue to function for manual deployments. +- **REQ-Z12.6**: Existing `wrangler.toml` configurations must continue to work. + +--- + +## Out of Scope + +- Automated Cloudflare DNS provisioning +- Automated domain registration +- Custom domain configuration (users who want custom domains can still configure them manually) +- CI/CD pipeline configuration +- Docker / containerization +- Multi-environment management (staging, production) +- `ENCRYPTION_KEY` rotation automation +- Secret storage in a vault +- Monorepo migration +- Uninstall / cleanup scripts +- Changes to the llm-proxy proxy worker logic (only router and deployment changes) + +--- + +## Traceability + +| Requirement | Current Pain Point | +|---|---| +| REQ-Z1 | `setup.ts` prompts for AUTH_KEY and ROUTER_DOMAIN | +| REQ-Z2 | llm-proxy deployment is a separate manual step | +| REQ-Z3 | No `*.workers.dev` fallback — requires custom domain | +| REQ-Z4 | `LLM_PROXY_URL` must be manually configured | +| REQ-Z5 | AUTH_KEY requires user input | +| REQ-Z6 | Static `topology.ts` requires code change + redeploy to update worker count | +| REQ-Z7 | Worker count is static, not derived from actual key count | +| REQ-Z8 | No drift detection exists | +| REQ-Z9 | No automatic reconciliation exists | +| REQ-Z10 | `PROXY_IP_COUNT` is a configuration drift source | +| REQ-Z11 | Re-running setup can overwrite values | +| REQ-Z12 | N/A (new requirement for this spec) | diff --git a/.roo/specs/zero-configuration-deployment/tasks.md b/.roo/specs/zero-configuration-deployment/tasks.md new file mode 100644 index 00000000..0e2c403d --- /dev/null +++ b/.roo/specs/zero-configuration-deployment/tasks.md @@ -0,0 +1,586 @@ +# Zero-Configuration Deployment — Tasks + +## Overview + +This document defines the implementation tasks for transforming freellmapi into a zero-configuration deployment experience. Tasks are organized by phase, with dependencies tracked between phases. + +**Key principles**: +- Each task is independently verifiable +- Phases must be completed in order +- Idempotency is required for all scripts +- Backward compatibility is maintained at every step + +--- + +## Phase 1: Make setup.ts Non-Interactive + +**Goal**: Eliminate all blocking prompts from `setup.ts`. All secrets auto-generated. + +### T1.1 — Add `--non-interactive` mode to setup.ts + +Modify `scripts/setup.ts`: +- Default mode is non-interactive (no prompts) +- Add `--interactive` flag to enable old prompt-based behavior +- When non-interactive: + - AUTH_KEY: auto-generate via `generateAuthKey()` (no prompt) + - ROUTER_DOMAIN: skip entirely (handled by deploy-proxy later) + - LLM_PROXY_URL: skip (set by deploy-proxy after deployment) +- When `--interactive`: + - Preserve existing prompt behavior for AUTH_KEY and ROUTER_DOMAIN +- Add `--non-interactive` flag detection alongside existing `--dry-run` and `--regenerate` + +### T1.2 — Auto-generate AUTH_KEY without prompt + +In `scripts/setup.ts`, replace the interactive AUTH_KEY prompt (lines 168-177): +```typescript +// Before: +const authKey = await prompt('Enter AUTH_KEY (or press Enter for random):', suggested); + +// After (non-interactive): +const authKey = generateAuthKey(); +logAction('generate', 'AUTH_KEY', 'llm-proxy/.env'); +llmProxyUpdates.set('AUTH_KEY', authKey); +``` + +### T1.3 — Remove ROUTER_DOMAIN prompt from setup.ts + +In `scripts/setup.ts`, remove the ROUTER_DOMAIN prompt block (lines 183-189): +```typescript +// Remove: +const existingRouterDomain = llmProxyEnv.get('ROUTER_DOMAIN') || 'router.example.com'; +const routerDomain = await prompt('Enter router domain:', existingRouterDomain); +``` + +Also remove the LLM_PROXY_URL derivation from ROUTER_DOMAIN (lines 191-197). LLM_PROXY_URL will be set by `deploy-proxy.ts` after deployment. + +### T1.4 — Add deploy-proxy invocation to setup.ts + +At the end of `setup.ts` `main()`, after writing configuration: +```typescript +// After writing .env files, trigger deploy-proxy +if (!isDryRun) { + console.log('\n── Deploying llm-proxy ──'); + // Import and run deploy-proxy + const { deployProxy } = await import('./deploy-proxy.js'); + await deployProxy(); +} else { + console.log(' [dry-run] Would run: pnpm run deploy-proxy'); +} +``` + +### T1.5 — Add `deploy-proxy` script to root package.json + +In root `package.json`, add: +```json +"deploy-proxy": "tsx scripts/deploy-proxy.ts" +``` + +### T1.6 — Verify setup.ts backward compatibility + +- Run `pnpm run setup -- --interactive` on an existing installation → should prompt as before +- Run `pnpm run setup` (no flags) on a clean installation → should complete without prompts +- Run `pnpm run setup -- --dry-run` → should report all actions without writing + +--- + +## Phase 2: Create deploy-proxy.ts + +**Goal**: New script that deploys llm-proxy, captures the workers.dev URL, and writes LLM_PROXY_URL. + +### T2.1 — Create `scripts/deploy-proxy.ts` skeleton + +Create the file with: +- CLI args: `--dry-run` +- Read `AUTH_KEY`, `INTERNAL_AUTH_SECRET`, `PROXY_COUNT` from `llm-proxy/.env` +- Default `PROXY_COUNT` to `1` if not set +- Validate all required values are present + +### T2.2 — Implement TOML generation in deploy-proxy.ts + +Implement two functions: +- `generateProxyToml(index, internalSecret)` — same as `llm-proxy/scripts/deploy.ts:generateProxyToml()` +- `generateRouterToml(proxyCount, internalSecret, authKey)` — modified version: + - **No `routes` field** (uses workers.dev subdomain) + - **No `ROUTER_DOMAIN` in vars** + - Same `services` bindings as before + +### T2.3 — Implement wrangler deploy execution in deploy-proxy.ts + +Implement: +- `runWranglerDeploy(configPath)` — same pattern as `llm-proxy/scripts/deploy.ts:runWranglerDeploy()` +- `deployWithRetry(worker)` — same retry logic (3 attempts, exponential backoff) +- `deployParallel(workers)` — deploy proxies in parallel with stagger + +### T2.4 — Implement URL capture from wrangler output + +In the router deploy step: +- Capture stdout from `wrangler deploy` +- Extract URL using regex: `/https:\/\/llm-proxy-router\.[a-zA-Z0-9-]+\.workers\.dev/` +- If no match, fail with error: "Could not detect router URL from wrangler output. Check wrangler deploy output manually." +- Return the captured URL + +### T2.5 — Write LLM_PROXY_URL to .env + +After successful deployment: +- Use `updateEnvKey()` from `scripts/lib/env.ts` to write `LLM_PROXY_URL` to `freellmapi-alpha/.env` +- Format: `https://llm-proxy-router..workers.dev` +- Validate it's a valid HTTPS URL before writing + +### T2.6 — Implement dry-run mode in deploy-proxy.ts + +In dry-run mode: +- Report all actions: TOML generation, wrangler deploy commands, URL capture, .env update +- No files written, no network calls +- Exit 0 + +### T2.7 — Implement error handling in deploy-proxy.ts + +- If `llm-proxy/.env` is missing required values → fail with clear message +- If wrangler deploy fails after retries → fail with error + manual recovery instructions +- If URL capture fails → fail with error + suggest manual LLM_PROXY_URL configuration +- Print summary of deployed workers (success/failure count) + +--- + +## Phase 3: Dynamic Topology Endpoint + +**Goal**: Replace static `topology.ts` import with dynamic generation in the router. + +### T3.1 — Remove static topology import from router.ts + +In `llm-proxy/src/router.ts`: +- Remove `import { TOPOLOGY } from "./generated/topology"` (line 4) +- Remove `TOPOLOGY` from the env type (if referenced) + +### T3.2 — Add dynamic topology generation to router.ts + +In the `/internal/v1/topology` handler (lines 18-24): +```typescript +import crypto from "crypto"; + +// Replace: return jsonResponse(TOPOLOGY); +const proxyCount = Number(env.PROXY_COUNT); +const proxies = Array.from({ length: proxyCount }, (_, i) => ({ + id: i, + name: `llm-proxy-${String(i + 1).padStart(2, "0")}`, + status: "active" as const, +})); +const topologyGeneratedAt = Math.floor(Date.now() / 1000); +const hashInput = JSON.stringify({ schemaVersion: 1, workerCount: proxyCount, proxies }); +const topologyId = `sha256:${crypto.createHash("sha256").update(hashInput).digest("hex")}`; +return jsonResponse({ + schemaVersion: 1, + topologyId, + topologyGeneratedAt, + workerCount: proxyCount, + proxies, +}); +``` + +### T3.3 — Remove ROUTER_DOMAIN from router.ts + +In `llm-proxy/src/router.ts`: +- Remove `ROUTER_DOMAIN` from the env type declaration +- Replace `env.ROUTER_DOMAIN || url.hostname` with `url.hostname` (line 34) + +### T3.4 — Remove generateTopologyModule from deploy.ts + +In `llm-proxy/scripts/deploy.ts`: +- Remove the `generateTopologyModule()` function (lines 248-273) +- Remove the call to `generateTopologyModule()` in `main()` (lines 327-332) +- Remove the `topologyDir` and `topologyModule` variables + +### T3.5 — Remove ROUTER_DOMAIN from deploy.ts + +In `llm-proxy/scripts/deploy.ts`: +- Remove `requireEnv("ROUTER_DOMAIN", 1)` (line 282) +- Remove `routerDomain` parameter from `generateRouterToml()` +- Remove `routes: [{ pattern: routerDomain, custom_domain: true }]` from router TOML +- Remove `ROUTER_DOMAIN: routerDomain` from router TOML vars +- Update `main()` to not pass `routerDomain` to `generateRouterToml()` + +### T3.6 — Verify topology response schema + +Deploy the updated router and verify: +- `GET /internal/v1/topology` returns HTTP 200 with `X-Internal-Auth` header +- Response matches the `TopologySnapshot` interface exactly +- `topologyId` format is `sha256:` +- `proxies` array length equals `PROXY_COUNT` + +--- + +## Phase 4: Topology Reconciliation + +**Goal**: Detect and fix topology drift on startup. + +### T4.1 — Create `scripts/reconcile-topology.ts` + +Create the script with: +- Read `LLM_PROXY_URL` and `INTERNAL_AUTH_SECRET` from freellmapi `.env` +- Read `AUTH_KEY`, `INTERNAL_AUTH_SECRET`, `PROXY_COUNT` from `llm-proxy/.env` +- `--dry-run` flag support + +### T4.2 — Implement expected worker count query + +Implement `getExpectedWorkerCount()`: +```typescript +import { getDb } from '../server/src/db/index.js'; + +function getExpectedWorkerCount(): number { + const db = getDb(); + const row = db.prepare(` + SELECT MAX(key_count) as max_count FROM ( + SELECT COUNT(*) as key_count FROM api_keys WHERE enabled = 1 GROUP BY platform + ) + `).get() as { max_count: number | null }; + return row?.max_count && row.max_count > 0 ? row.max_count : 1; +} +``` + +### T4.3 — Implement drift detection + +Implement `detectDrift()`: +- Fetch current topology from `${LLM_PROXY_URL}/internal/v1/topology` +- Compare `topology.workerCount` vs `getExpectedWorkerCount()` +- Return `{ drifted: boolean, actual: number, expected: number }` + +### T4.4 — Implement reconciliation + +Implement `reconcile()`: +- If no drift: log "No drift detected ({actual} workers)", return success +- If drift detected: + 1. Log: "Topology drift detected: {actual} workers, expected {expected}" + 2. Update `PROXY_COUNT` in `llm-proxy/.env` to the expected count + 3. Generate new TOML configs (reuse functions from `deploy-proxy.ts`) + 4. Deploy all workers via wrangler deploy + 5. Verify new topology returns updated `workerCount` + 6. Log result + +### T4.5 — Create `server/src/services/topologyReconciliation.ts` + +Create an importable module that wraps the reconciliation logic: +```typescript +export async function reconcileTopology(): Promise { + try { + // Same logic as reconcile-topology.ts but importable + // Runs asynchronously, doesn't block startup + } catch (err) { + console.error('[reconciliation] failed:', err); + // Continue operating with current topology + } +} +``` + +### T4.6 — Add reconciliation call to server startup + +In `server/src/index.ts`: +```typescript +import { reconcileTopology } from './services/topologyReconciliation.js'; + +async function main() { + assertAdminAuthConfigured(); + initDb(); + await initTopology(); + await reconcileTopology(); // NEW + const app = createApp(); + // ... +} +``` + +### T4.7 — Add `reconcile-topology` script to package.json + +In root `package.json`: +```json +"reconcile-topology": "tsx scripts/reconcile-topology.ts" +``` + +--- + +## Phase 5: Eliminate Deprecated Env Vars + +**Goal**: Remove PROXY_IP_COUNT, ROUTER_DOMAIN, and PROXY_COUNT from manual configuration. + +### T5.1 — Update `.env.example` + +In `freellmapi-alpha/.env.example`: +- Remove the `PROXY_IP_COUNT` entry (lines 15-17) +- Add `LLM_PROXY_URL=https://llm-proxy-router..workers.dev` + +### T5.2 — Update `llm-proxy/.env.example` + +In `llm-proxy/.env.example`: +- Remove `ROUTER_DOMAIN=router.example.com` (line 12) +- Change `PROXY_COUNT=3` to `# PROXY_COUNT=1 (managed by freellmapi, set to 1 for new installations)` + +### T5.3 — Remove PROXY_IP_COUNT from ipPoolCapacity.ts + +In `server/src/services/ipPoolCapacity.ts`: +- In `getWorkerCount()`: remove the `PROXY_IP_COUNT` fallback (lines 68-70) +- New `getWorkerCount()`: + ```typescript + export function getWorkerCount(): number { + if (isDynamicTopologyAvailable()) { + return getTopologyWorkerCount(); + } + return 0; + } + ``` +- In `isStickyRoutingEnabled()`: remove the `PROXY_IP_COUNT` check (lines 91-100) +- New `isStickyRoutingEnabled()`: + ```typescript + export function isStickyRoutingEnabled(): boolean { + return isDynamicTopologyAvailable(); + } + ``` +- Add deprecation warning at module load: + ```typescript + if (process.env.PROXY_IP_COUNT !== undefined) { + console.warn('[deprecation] PROXY_IP_COUNT is deprecated and ignored. Worker count is now derived from provider API keys.'); + } + ``` + +### T5.4 — Remove ROUTER_DOMAIN from router.ts env type + +In `llm-proxy/src/router.ts`: +- Remove `ROUTER_DOMAIN` from the env type in `handleRouterRequest` parameter + +--- + +## Phase 6: Update install.sh / install.ps1 + +**Goal**: Ensure install scripts trigger the full zero-config flow. + +### T6.1 — Verify install.sh triggers deploy-proxy via setup.ts + +The existing `install.sh` already invokes `pnpm run setup`. Since `setup.ts` now triggers `deploy-proxy` automatically in non-interactive mode, no changes to `install.sh` are needed. + +Verify: +- `install.sh` → `pnpm run setup` → `setup.ts` (non-interactive) → `deploy-proxy.ts` +- The full chain completes without user input + +### T6.2 — Verify install.ps1 triggers deploy-proxy via setup.ts + +Same as T6.1 for PowerShell. Verify the chain works on Windows. + +### T6.3 — Update install.sh final instructions + +Update the "Next steps" message in `install.sh`: +```bash +echo "Next steps:" +echo " pnpm dev Start local development" +echo " pnpm run verify Verify deployment" +# Remove: "cd llm-proxy && npm run deploy Deploy proxy to Cloudflare" +``` + +### T6.4 — Update install.ps1 final instructions + +Same as T6.3 for `install.ps1`. + +--- + +## Phase 7: Update verify-deploy.ts + +**Goal**: Update verification script for the new architecture. + +### T7.1 — Update verify-deploy.ts for workers.dev URL + +In `scripts/verify-deploy.ts`: +- The existing checks still work (topology endpoint, schema validation) +- Update check 6 (fallback mode) to reflect PROXY_IP_COUNT deprecation: + - If `PROXY_IP_COUNT` is set, report it as deprecated + - Report dynamic topology availability as the primary status + +### T7.2 — Add LLM_PROXY_URL validation + +In `scripts/verify-deploy.ts`: +- Add a check that `LLM_PROXY_URL` matches the expected workers.dev pattern +- If `LLM_PROXY_URL` contains `workers.dev`, note that zero-config deployment is active + +--- + +## Phase 8: Update README.md + +**Goal**: Document the new zero-config installation flow. + +### T8.1 — Update freellmapi-alpha README.md Quick Start + +Replace the Quick Start section: +```markdown +## Quick Start + +1. Install Node.js +2. Run `wrangler login` +3. Run `./install.sh` (Linux/macOS) or `.\install.ps1` (Windows) +4. Run `pnpm dev` + +That's it. The installer handles everything: +- Deploys llm-proxy to Cloudflare Workers +- Configures all secrets automatically +- Sets up the database +- Verifies the deployment +``` + +### T8.2 — Update README.md Advanced Configuration + +Add a section documenting: +- `--interactive` flag for manual control +- `--regenerate` flag for secret rotation +- `--dry-run` flag for previewing changes +- Manual `pnpm run reconcile-topology` for topology reconciliation +- Manual `pnpm run deploy-proxy` for re-deploying llm-proxy + +### T8.3 — Update README.md Environment Variables + +Update the environment variable documentation: +- Remove `PROXY_IP_COUNT` from the table +- Remove `ROUTER_DOMAIN` from the table +- Add `LLM_PROXY_URL` to the table +- Note that `PROXY_COUNT` in `llm-proxy/.env` is managed by freellmapi + +--- + +## Phase 9: Tests + +**Goal**: Verify all new behavior works correctly. + +### T9.1 — Test setup.ts non-interactive mode + +```bash +# Clean environment (no .env files) +rm -f .env llm-proxy/.env +pnpm run setup -- --dry-run +# Verify: reports all actions, writes nothing + +pnpm run setup +# Verify: completes without prompts, creates .env files, deploys llm-proxy +``` + +### T9.2 — Test setup.ts interactive mode + +```bash +pnpm run setup -- --interactive +# Verify: prompts for AUTH_KEY and ROUTER_DOMAIN (backward compat) +``` + +### T9.3 — Test setup.ts idempotency + +```bash +# Run setup twice +pnpm run setup +pnpm run setup +# Verify: second run preserves all values, reports "preserved" +``` + +### T9.4 — Test deploy-proxy.ts + +```bash +pnpm run deploy-proxy -- --dry-run +# Verify: reports TOML generation, wrangler commands, URL capture + +pnpm run deploy-proxy +# Verify: deploys workers, captures URL, writes LLM_PROXY_URL +``` + +### T9.5 — Test deploy-proxy.ts idempotency + +```bash +pnpm run deploy-proxy +pnpm run deploy-proxy +# Verify: second run updates existing workers (no duplicates) +``` + +### T9.6 — Test dynamic topology endpoint + +```bash +# Deploy router with PROXY_COUNT=2 +curl -H "X-Internal-Auth: $INTERNAL_AUTH_SECRET" $LLM_PROXY_URL/internal/v1/topology +# Verify: workerCount=2, proxies array has 2 entries + +# Update PROXY_COUNT to 4, redeploy +curl -H "X-Internal-Auth: $INTERNAL_AUTH_SECRET" $LLM_PROXY_URL/internal/v1/topology +# Verify: workerCount=4, proxies array has 4 entries +``` + +### T9.7 — Test topology reconciliation + +```bash +# Add API keys to DB (simulate 5 keys for one platform) +pnpm run reconcile-topology -- --dry-run +# Verify: reports drift detection, would redeploy with PROXY_COUNT=5 + +pnpm run reconcile-topology +# Verify: redeploys with updated worker count, new topology reflects change +``` + +### T9.8 — Test PROXY_IP_COUNT deprecation + +```bash +# Set PROXY_IP_COUNT in .env +echo "PROXY_IP_COUNT=3" >> .env +pnpm dev +# Verify: deprecation warning logged, value ignored +``` + +### T9.9 — Test verify-deploy.ts + +```bash +pnpm run verify +# Verify: all checks pass, reports workers.dev URL +``` + +### T9.10 — Test end-to-end flow + +```bash +# Clean environment +rm -f .env llm-proxy/.env server/data/freeapi.db + +# Full install +./install.sh +# Verify: completes without prompts + +# Start server +pnpm dev +# Verify: server starts, topology fetched, reconciliation runs + +# Verify deployment +pnpm run verify +# Verify: all checks pass +``` + +--- + +## Dependencies + +``` +Phase 1 (T1.1-T1.6) → Phase 2 (T2.1-T2.7) [setup.ts must trigger deploy-proxy] +Phase 1 (T1.1-T1.6) → Phase 3 (T3.1-T3.6) [setup.ts no longer needs ROUTER_DOMAIN] +Phase 2 (T2.1-T2.7) → Phase 4 (T4.1-T4.7) [deploy-proxy.ts TOML functions reusable] +Phase 3 (T3.1-T3.6) → Phase 4 (T4.1-T4.7) [dynamic topology needed for reconciliation] +Phase 4 (T4.1-T4.7) → Phase 5 (T5.1-T5.4) [reconciliation must work before removing fallbacks] +Phase 5 (T5.1-T5.4) → Phase 6 (T6.1-T6.4) [env vars cleaned before install scripts] +Phase 6 (T6.1-T6.4) → Phase 7 (T7.1-T7.2) [install flow stable before verify updates] +Phase 7 (T7.1-T7.2) → Phase 8 (T8.1-T8.3) [verify works before documenting] +Phase 8 (T8.1-T8.3) → Phase 9 (T9.1-T9.10) [all code complete before testing] +``` + +## Verification Checklist + +After all phases complete: + +- [ ] `./install.sh` completes without any prompts on a clean environment +- [ ] `pnpm run setup` (no flags) completes without prompts +- [ ] `pnpm run setup -- --interactive` prompts for AUTH_KEY and ROUTER_DOMAIN +- [ ] `pnpm run setup -- --dry-run` reports actions without writing +- [ ] `pnpm run deploy-proxy` deploys llm-proxy and writes LLM_PROXY_URL +- [ ] `pnpm run deploy-proxy -- --dry-run` reports actions without deploying +- [ ] `/internal/v1/topology` returns dynamic response (not static) +- [ ] Topology response schema matches `TopologySnapshot` interface +- [ ] `pnpm run reconcile-topology` detects drift and redeploys +- [ ] `pnpm run reconcile-topology -- --dry-run` reports without changes +- [ ] Server startup triggers reconciliation automatically +- [ ] `PROXY_IP_COUNT` in .env logs deprecation warning +- [ ] `pnpm run verify` passes all checks +- [ ] README.md documents the new Quick Start flow +- [ ] Existing installations with manual config continue to work +- [ ] No TypeScript compilation errors +- [ ] All existing tests pass diff --git a/README.md b/README.md index 6337819f..99b82668 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Aggregate the free tiers from Google, Groq, Cerebras, SambaNova, NVIDIA, Mistral - [Features](#features) - [Not yet supported](#not-yet-supported) - [Quick start](#quick-start) +- [Advanced configuration](#advanced-configuration) - [Using the API](#using-the-api) - [Screenshots](#screenshots) - [How it works](#how-it-works) @@ -99,50 +100,61 @@ PRs that add any of these are very welcome. See [Contributing](#contributing). ## Quick start -**Prerequisites:** Node.js 22, [pnpm](https://pnpm.io) (or use [Volta](https://volta.sh) — versions are pinned in `package.json`). For llm-proxy deployment: [Wrangler](https://developers.cloudflare.com/worklers/wrangler/) installed and authenticated. +1. Install [Node.js](https://nodejs.org/) 22+ +2. Run `wrangler login` +3. Run `./install.sh` (Linux/macOS) or `.\install.ps1` (Windows) +4. Run `pnpm dev` -### Linux / macOS +That's it. The installer handles everything: +- Deploys llm-proxy to Cloudflare Workers +- Configures all secrets automatically +- Sets up the database +- Verifies the deployment -```bash -git clone --recurse-submodules https://github.com/tashfeenahmed/freellmapi.git -cd freellmapi -./install.sh -``` +Open http://localhost:5173 (the Vite dev UI), add your provider keys on the **Keys** page, and grab your unified API key from the **Keys** page header. That unified key is what you point your OpenAI SDK at. -### Windows +## Advanced configuration -```powershell -git clone --recurse-submodules https://github.com/tashfeenahmed/freellmapi.git -cd freellmapi -.\install.ps1 -``` +### Install script flags -The install script initializes submodules, installs dependencies, validates prerequisites, and runs setup automatically. Setup generates all required secrets and creates `.env` files. +The install script supports several flags for manual control: -After install completes: +| Flag | Description | +|---|---| +| `--interactive` | Manual control over each setup step (prompts for secrets, deployment, etc.) | +| `--regenerate` | Rotate all secrets and re-deploy (useful for key rotation) | +| `--dry-run` | Preview all actions without making changes | + +### Manual commands + +For users who want to run steps individually: ```bash -pnpm dev -``` +# Run setup with manual prompts +pnpm run setup -- --interactive -Open http://localhost:5173 (the Vite dev UI), add your provider keys on the **Keys** page, and grab your unified API key from the **Keys** page header. That unified key is what you point your OpenAI SDK at. +# Preview setup actions without executing +pnpm run setup -- --dry-run -### Deploying llm-proxy (production) +# Re-deploy llm-proxy to Cloudflare Workers +pnpm run deploy-proxy -```bash -cd llm-proxy -npm run deploy -``` +# Re-deploy with a dry run +pnpm run deploy-proxy -- --dry-run -### Verifying deployment +# Reconcile topology after scaling proxy workers +pnpm run reconcile-topology -```bash +# Verify deployment health pnpm run verify + +# Verify with a dry run +pnpm run verify -- --dry-run ``` ### Manual setup (without install script) -If you prefer manual control: +If you prefer full manual control: ```bash git clone --recurse-submodules https://github.com/tashfeenahmed/freellmapi.git @@ -161,37 +173,6 @@ node server/dist/index.js # server + dashboard both served on :3001 For production, set `ADMIN_DASHBOARD_KEY` in `.env` and keep it private. The dashboard prompts for this key on first load and stores it in browser local storage to authenticate `/api/*` calls. `/v1/*` clients use the separate unified `freellmapi-…` key shown on the Keys page — the two keys cannot cross routes. -**All `.env` variables:** - -**All `.env` variables:** - -| Variable | Required | Description | -|---|---|---| -| `ENCRYPTION_KEY` | Yes | 64-char hex key for AES-256-GCM at-rest key encryption. | -| `ADMIN_DASHBOARD_KEY` | Yes (prod) | Bearer token for all `/api/*` dashboard routes. Min 24 chars. Omitting it only works in `NODE_ENV=development`. | -| `ADMIN_CORS_ORIGINS` | No | Comma-separated browser origins allowed to call `/api/*` cross-origin (e.g. `http://localhost:5173`). Same-origin deployments don't need this. | -| `DISABLE_HSTS` | No | Set `true` to skip HSTS headers — useful when terminating TLS at a reverse proxy. | -| `LOG_SENSITIVE_DATA` | No | Set `true` to log full request/response bodies. Off by default; never enable in production. | -| `PORT` | No | Server port (default `3001`). | -| `LLM_PROXY_URL` | No | Base URL of the llm-proxy router (e.g. `https://router.example.com`). Enables automatic proxy topology discovery at startup. If unset, `PROXY_IP_COUNT` is used as a static fallback. | -| `INTERNAL_AUTH_SECRET` | No | Must match llm-proxy's `INTERNAL_AUTH_SECRET`. Required for topology discovery when `LLM_PROXY_URL` is set. | -| `PROXY_IP_COUNT` | No | Static fallback for the number of proxy workers when topology discovery is unavailable. Defaults to `0` (IP capacity disabled). | - -## Proxy Topology Discovery - -When `LLM_PROXY_URL` is set, freellmapi-alpha fetches the proxy topology from llm-proxy at startup: - -``` -GET /internal/v1/topology -Header: X-Internal-Auth: -``` - -This returns the deployed worker count and proxy list, eliminating the need to manually synchronize `PROXY_IP_COUNT`. The topology is deploy-authoritative — generated once during `npm run deploy` and served as an immutable constant. - -**Fallback chain:** dynamic topology → `PROXY_IP_COUNT` env → `0` (disabled) - -Existing deployments without `LLM_PROXY_URL` continue to work exactly as before. - ## Using the API Any OpenAI-compatible client works. Examples: @@ -321,28 +302,28 @@ Request volume, success rate, tokens in and out, average latency, and per-provid │ curl / any │ ◀────────────────────── │ /v1/chat/completions │ │ OpenAI client │ streamed tokens └────────────┬────────────┘ └──────────────────┘ │ - ▼ - ┌──────────────────────────────────────────────────────┐ - │ Router (Thompson-sampling bandit) │ - │ 1. For each enabled model, sample a score: │ - │ score = Beta(wins+2, losses+2) sample │ - │ + INTELLIGENCE_WEIGHT × normalized rank │ - │ + SPEED_WEIGHT × (tok/s / max tok/s) │ - │ + TTFB_WEIGHT × ttfb_score │ - │ - slow-model penalty (if < 10 tok/s) │ - │ - rate-limit penalty × 0.05 │ - │ (balanced: intelligence 10%, smart mode: 60%) │ - │ 2. Sort descending; sticky session pins preferred. │ - │ 3. First model with a healthy, under-limit key │ - │ wins; decrypt key, call provider SDK. │ - │ 4. On 429/5xx → key cooldown + retry next key. │ - │ Model penalty only fires when all keys for │ - │ that model are exhausted by 429s. │ - └──────────────────────────────────────────────────────┘ - │ - ┌──────────────┬────────────┬──────────┴─────────┬─────────────┬──────────┐ - ▼ ▼ ▼ ▼ ▼ ▼ - Google Groq Cerebras OpenRouter HF …10 more + ▼ + ┌──────────────────────────────────────────────────────┐ + │ Router (Thompson-sampling bandit) │ + │ 1. For each enabled model, sample a score: │ + │ score = Beta(wins+2, losses+2) sample │ + │ + INTELLIGENCE_WEIGHT × normalized rank │ + │ + SPEED_WEIGHT × (tok/s / max tok/s) │ + │ + TTFB_WEIGHT × ttfb_score │ + │ - slow-model penalty (if < 10 tok/s) │ + │ - rate-limit penalty × 0.05 │ + │ (balanced: intelligence 10%, smart mode: 60%) │ + │ 2. Sort descending; sticky session pins preferred. │ + │ 3. First model with a healthy, under-limit key │ + │ wins; decrypt key, call provider SDK. │ + │ 4. On 429/5xx → key cooldown + retry next key. │ + │ Model penalty only fires when all keys for │ + │ that model are exhausted by 429s. │ + └──────────────────────────────────────────────────────┘ + │ + ┌──────────────┬────────────┬──────────┴─────────┬─────────────┬──────────┐ + ▼ ▼ ▼ ▼ ▼ ▼ + Google Groq Cerebras OpenRouter HF …10 more ``` - **Router** (`server/src/services/router.ts`) — Thompson-sampling multi-armed bandit. Samples from each model's Beta posterior over success rate, adds a normalized tok/s speed reward (models below 10 tok/s receive an active penalty), and subtracts a time-decaying rate-limit penalty for recent 429s. The bandit penalty is model-scoped and fires only when all keys for a model are exhausted by 429s in the current retry loop — a single key rate-limiting does not demote the model if other keys remain. Stochastic selection means the router naturally explores new models while converging on faster, more reliable ones as data accumulates. @@ -423,6 +404,36 @@ Removed since the April 2026 review: Hugging Face, Moonshot, and MiniMax direct **This project is for personal experimentation and learning, not production.** Free tiers exist so developers can prototype against them; they aren't a stable, supported inference substrate and shouldn't be treated as one. If you build something real on top of FreeLLMAPI, swap in a paid API before you ship. Your relationship with each upstream provider is governed by the terms you accepted when you created your account — those terms still apply when the traffic is proxied through this project, and you're responsible for complying with them. +## Environment variables + +| Variable | Required | Description | +|---|---|---| +| `ENCRYPTION_KEY` | Yes | 64-char hex key for AES-256-GCM at-rest key encryption. Generate with: `node -e "console.log(require('crypto').randomBytes(32).toString('hex'))"` | +| `ADMIN_DASHBOARD_KEY` | Yes (prod) | Bearer token for all `/api/*` dashboard routes. Min 24 chars. Omitting it only works in `NODE_ENV=development`. Generate with: `node -e "console.log('freellmapi-admin-' + require('crypto').randomBytes(32).toString('hex'))"` | +| `ADMIN_CORS_ORIGINS` | No | Comma-separated browser origins allowed to call `/api/*` cross-origin (e.g. `http://localhost:5173`). Same-origin deployments don't need this. | +| `DISABLE_HSTS` | No | Set `true` to skip HSTS headers — useful when terminating TLS at a reverse proxy. | +| `LOG_SENSITIVE_DATA` | No | Set `true` to log full request/response bodies. Off by default; never enable in production. | +| `PORT` | No | Server port (default `3001`). | +| `LLM_PROXY_URL` | No | Base URL of the llm-proxy router (e.g. `https://router.example.com` or `https://llm-proxy..workers.dev`). Enables automatic proxy topology discovery at startup. If unset, IP capacity is disabled. | +| `INTERNAL_AUTH_SECRET` | No | Must match llm-proxy's `INTERNAL_AUTH_SECRET`. Required for topology discovery when `LLM_PROXY_URL` is set. | + +> **Note:** `PROXY_COUNT` in `llm-proxy/.env` is managed by freellmapi-alpha's deploy process. Do not set it manually. + +## Proxy Topology Discovery + +When `LLM_PROXY_URL` is set, freellmapi-alpha fetches the proxy topology from llm-proxy at startup: + +``` +GET /internal/v1/topology +Header: X-Internal-Auth: +``` + +This returns the deployed worker count and proxy list, eliminating the need to manually synchronize proxy count. The topology is deploy-authoritative — generated once during `pnpm run deploy-proxy` and served as an immutable constant. + +**Fallback chain:** dynamic topology → disabled (IP capacity off) + +Existing deployments without `LLM_PROXY_URL` continue to work exactly as before. + ## License [MIT](./LICENSE) diff --git a/client/src/pages/AnalyticsPage.tsx b/client/src/pages/AnalyticsPage.tsx index b4c3f284..53a0ca4f 100644 --- a/client/src/pages/AnalyticsPage.tsx +++ b/client/src/pages/AnalyticsPage.tsx @@ -11,6 +11,48 @@ import { PageHeader } from '@/components/page-header' type TimeRange = '24h' | '7d' | '30d' +interface AnalyticsSummary { + totalRequests?: number + successRate?: number + totalInputTokens?: number + totalOutputTokens?: number + avgLatencyMs?: number + estimatedCostSavings?: number +} + +interface PlatformRow { + platform: string + requests: number + successRate: number + outputTokensPerSec?: number + [key: string]: unknown +} + +interface TimelineRow { + timestamp: string + successCount: number + failureCount: number +} + +interface ModelRow { + displayName: string + platform: string + intelligenceRank?: number + requests: number + successRate: number + avgLatencyMs: number + avgTtfbMs?: number + totalInputTokens?: number + totalOutputTokens?: number + outputTokensPerSec?: number +} + +interface ErrorRow { + id: string + platform: string + error: string + createdAt: string +} function formatTokens(n?: number): string { if (!n) return '0' if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M` @@ -45,11 +87,7 @@ const primaryFill = 'var(--foreground)' type SortKey = 'displayName' | 'platform' | 'intelligenceRank' | 'requests' | 'successRate' | 'avgLatencyMs' | 'avgTtfbMs' | 'totalInputTokens' | 'totalOutputTokens' | 'outputTokensPerSec' type SortDir = 'asc' | 'desc' -function sortModels( - rows: Record[], - key: SortKey, - dir: SortDir -): Record[] { +function sortModels(rows: ModelRow[], key: SortKey, dir: SortDir): ModelRow[] { return [...rows].sort((a, b) => { const av = a[key] const bv = b[key] @@ -96,30 +134,30 @@ export default function AnalyticsPage() { const { data: summary } = useQuery({ queryKey: ['analytics', 'summary', range], - queryFn: () => apiFetch(`/api/analytics/summary?range=${range}`), + queryFn: () => apiFetch(`/api/analytics/summary?range=${range}`), }) const { data: byPlatform = [] } = useQuery({ queryKey: ['analytics', 'by-platform', range], - queryFn: () => apiFetch(`/api/analytics/by-platform?range=${range}`), + queryFn: () => apiFetch(`/api/analytics/by-platform?range=${range}`), }) const { data: timeline = [] } = useQuery({ queryKey: ['analytics', 'timeline', range], - queryFn: () => apiFetch(`/api/analytics/timeline?range=${range}`), + queryFn: () => apiFetch(`/api/analytics/timeline?range=${range}`), }) const { data: byModel = [] } = useQuery({ queryKey: ['analytics', 'by-model', range], - queryFn: () => apiFetch(`/api/analytics/by-model?range=${range}`), + queryFn: () => apiFetch(`/api/analytics/by-model?range=${range}`), }) const { data: errors = [] } = useQuery({ queryKey: ['analytics', 'errors', range], - queryFn: () => apiFetch(`/api/analytics/errors?range=${range}`), + queryFn: () => apiFetch(`/api/analytics/errors?range=${range}`), }) - const byPlatformWithFailures = byPlatform.map((p: { requests: number; successRate: number; [key: string]: unknown }) => { + const byPlatformWithFailures = byPlatform.map((p: PlatformRow) => { const failed = Math.round(p.requests * (100 - p.successRate) / 100) return { ...p, successRequests: p.requests - failed, failedRequests: failed } }) @@ -268,7 +306,7 @@ export default function AnalyticsPage() { - {errors.slice(0, 20).map((e: { id: string; platform: string; error: string; createdAt: string }) => ( + {errors.slice(0, 20).map((e: ErrorRow) => ( {e.platform} {e.error} diff --git a/install.ps1 b/install.ps1 index 4e9552bc..c6d0ea8e 100644 --- a/install.ps1 +++ b/install.ps1 @@ -161,5 +161,4 @@ Ok "Installation complete!" Write-Host "" Write-Host "Next steps:" Write-Host " pnpm dev Start local development" -Write-Host " cd llm-proxy; npm run deploy Deploy proxy to Cloudflare" Write-Host " pnpm run verify Verify deployment" diff --git a/install.sh b/install.sh index 97aa104f..0c201bae 100755 --- a/install.sh +++ b/install.sh @@ -151,5 +151,4 @@ ok "Installation complete!" echo echo "Next steps:" echo " pnpm dev Start local development" -echo " cd llm-proxy && npm run deploy Deploy proxy to Cloudflare" echo " pnpm run verify Verify deployment" diff --git a/llm-proxy b/llm-proxy index dfd80965..5ec9db69 160000 --- a/llm-proxy +++ b/llm-proxy @@ -1 +1 @@ -Subproject commit dfd809657e09f6bcd4406e3968fd47757f604e6c +Subproject commit 5ec9db6975405ac195911649e9ddb5feab2b0f37 diff --git a/package.json b/package.json index 03fc26b2..df47369d 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,9 @@ "setup": "tsx scripts/setup.ts", "sync-secrets": "tsx scripts/sync-secrets.ts", "verify": "tsx scripts/verify-deploy.ts", - "rotate-secrets": "tsx scripts/rotate-secrets.ts" + "rotate-secrets": "tsx scripts/rotate-secrets.ts", + "deploy-proxy": "tsx scripts/deploy-proxy.ts", + "reconcile-topology": "tsx scripts/reconcile-topology.ts" }, "devDependencies": { "concurrently": "^9.1.2", diff --git a/scripts/deploy-proxy.ts b/scripts/deploy-proxy.ts new file mode 100644 index 00000000..0f772e6b --- /dev/null +++ b/scripts/deploy-proxy.ts @@ -0,0 +1,414 @@ +// Deploy Proxy — Deploys llm-proxy workers to Cloudflare, captures the +// workers.dev URL, and writes LLM_PROXY_URL into the project .env. +// +// Usage: +// pnpm run deploy-proxy — deploy and update .env +// pnpm run deploy-proxy -- --dry-run — report actions without writing +// +// When called programmatically: +// deployProxy(3) — deploy exactly 3 proxies (capacity computed externally) +// deployProxy() — fall back to PROXY_COUNT env var or default + +import { spawn } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { parseEnvFile, updateEnvKey } from "./lib/env.js"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const projectRoot = path.resolve(__dirname, ".."); +const llmProxyRoot = path.join(projectRoot, "llm-proxy"); +const llmProxyEnvPath = path.join(llmProxyRoot, ".env"); +const frellmapiEnvPath = path.join(projectRoot, ".env"); + +const DIST_DIR = path.join(llmProxyRoot, "dist"); + +const DEFAULT_PROXY_COUNT = 1; +const WORKER_NAME_PREFIX = "llm-proxy-"; +const WORKER_NAME_PAD = 2; + +const DEPLOY_CONFIG = { + maxRetries: 3, + baseDelayMs: 2000, + staggerDelayMs: 1000, +} as const; + +// ── CLI args ────────────────────────────────────────────────────────── + +const args = process.argv.slice(2); +const isDryRun = args.includes("--dry-run"); + +if (isDryRun) { + console.log("=== DRY RUN — no changes will be made ===\n"); +} + +// ── Types ───────────────────────────────────────────────────────────── + +interface WorkerConfig { + name: string; + configPath: string; + type: "proxy" | "router"; +} + +interface DeployResult { + worker: WorkerConfig; + success: boolean; + attempts: number; + error?: string; + durationMs: number; + stdout?: string; +} + +// ── Env loading ─────────────────────────────────────────────────────── + +function loadEnv(): Map { + if (!fs.existsSync(llmProxyEnvPath)) { + console.error( + "❌ llm-proxy/.env not found. Run `pnpm run setup` first." + ); + process.exit(1); + } + return parseEnvFile(llmProxyEnvPath); +} + +function requireEnv(env: Map, name: string, minLen: number): string { + const val = env.get(name); + if (!val) { + console.error(`❌ ${name} not set in llm-proxy/.env`); + process.exit(1); + } + if (val.length < minLen) { + console.error(`❌ ${name} must be at least ${minLen} characters`); + process.exit(1); + } + return val; +} + +// ── TOML generation ─────────────────────────────────────────────────── + +function tomlStringify(obj: Record, indent: string = ""): string { + const lines: string[] = []; + + for (const [key, value] of Object.entries(obj)) { + if (value === undefined || value === null) continue; + + if (Array.isArray(value)) { + for (const item of value) { + if (typeof item === "object" && item !== null) { + lines.push(`${indent}[[${key}]]`); + for (const [k, v] of Object.entries(item as Record)) { + if (v === undefined || v === null) continue; + lines.push(`${indent}${k} = ${tomlValue(v)}`); + } + } else { + lines.push(`${indent}${key} = ${tomlValue(item)}`); + } + } + } else if (typeof value === "object" && value !== null) { + lines.push(`${indent}[${key}]`); + lines.push(tomlStringify(value as Record, indent)); + } else { + lines.push(`${indent}${key} = ${tomlValue(value)}`); + } + } + + return lines.join("\n"); +} + +function tomlValue(value: unknown): string { + if (typeof value === "string") return JSON.stringify(value); + if (typeof value === "number") return value.toString(); + if (typeof value === "boolean") return value.toString(); + return `"${String(value)}"`; +} + +function generateProxyToml(index: number, internalSecret: string): string { + const name = `${WORKER_NAME_PREFIX}${String(index).padStart(WORKER_NAME_PAD, "0")}`; + + const config: Record = { + name, + main: "../src/worker.ts", + compatibility_date: "2024-12-01", + placement: { mode: "off" }, + vars: { + WORKER_ROLE: "proxy", + PROXY_INDEX: String(index), + INTERNAL_AUTH_SECRET: internalSecret, + }, + }; + + return tomlStringify(config); +} + +function generateRouterToml(proxyCount: number, internalSecret: string, authKey: string): string { + const services: Record[] = []; + for (let i = 1; i <= proxyCount; i++) { + services.push({ + binding: `PROXY_${i}`, + service: `${WORKER_NAME_PREFIX}${String(i).padStart(WORKER_NAME_PAD, "0")}`, + }); + } + + const config: Record = { + name: "llm-proxy-router", + main: "../src/worker.ts", + compatibility_date: "2024-12-01", + placement: { mode: "smart" }, + vars: { + WORKER_ROLE: "router", + AUTH_KEY: authKey, + INTERNAL_AUTH_SECRET: internalSecret, + PROXY_COUNT: String(proxyCount), + }, + services, + }; + + return tomlStringify(config); +} + +// ── Wrangler deploy ─────────────────────────────────────────────────── + +function runWranglerDeploy(configPath: string): Promise<{ success: boolean; stdout: string; stderr: string }> { + const configRel = path.relative(llmProxyRoot, configPath); + const cmd = `npx wrangler deploy -c ${configRel}`; + + return new Promise((resolve) => { + const proc = spawn(cmd, { cwd: llmProxyRoot, shell: true, stdio: "pipe" }); + let stdout = ""; + let stderr = ""; + + proc.on("error", (err) => { resolve({ success: false, stdout: "", stderr: err.message }); }); + proc.stdout?.on("data", (d: Buffer) => { + stdout += d.toString(); + }); + proc.stderr?.on("data", (d: Buffer) => { + stderr += d.toString(); + }); + + proc.on("close", (code) => { + resolve({ + success: code === 0, + stdout, + stderr: stderr || "", + }); + }); + }); +} + +async function deployWithRetry(worker: WorkerConfig): Promise { + const start = Date.now(); + let lastError = ""; + + for (let attempt = 1; attempt <= DEPLOY_CONFIG.maxRetries; attempt++) { + console.log(` 🔄 ${worker.name} (attempt ${attempt})...`); + const result = await runWranglerDeploy(worker.configPath); + + if (result.success) { + return { worker, success: true, attempts: attempt, durationMs: Date.now() - start, stdout: result.stdout }; + } + + lastError = result.stderr; + + if (attempt < DEPLOY_CONFIG.maxRetries) { + const delay = DEPLOY_CONFIG.baseDelayMs * 2 ** (attempt - 1); + console.log(` ⚠️ ${worker.name} failed, retrying in ${delay}ms...`); + await new Promise((r) => setTimeout(r, delay)); + } + } + + return { worker, success: false, attempts: DEPLOY_CONFIG.maxRetries, error: lastError, durationMs: Date.now() - start }; +} + +async function deployParallel(workers: WorkerConfig[]): Promise { + const tasks = workers.map((worker, index) => + (async () => { + await new Promise((r) => setTimeout(r, index * DEPLOY_CONFIG.staggerDelayMs)); + return deployWithRetry(worker); + })(), + ); + return Promise.all(tasks); +} + +// ── URL capture ─────────────────────────────────────────────────────── + +function captureRouterUrl(stdout: string): string | null { + const match = stdout.match(/https:\/\/llm-proxy-router\.[a-zA-Z0-9-]+\.workers\.dev/); + return match ? match[0] : null; +} + +// ── Summary ─────────────────────────────────────────────────────────── + +function printSummary(results: DeployResult[], totalStart: number): void { + const succeeded = results.filter((r) => r.success).length; + const totalMs = Date.now() - totalStart; + + console.log("\n┌─────────────────────────────────────────────┐"); + console.log("│ Deploy Summary │"); + console.log("├─────────────────────────────────────────────┤"); + + for (const r of results) { + const status = r.success ? "✅" : "❌"; + const name = r.worker.name.padEnd(20); + const attempts = r.success + ? `(${r.attempts} attempt${r.attempts > 1 ? "s" : ""}, ${(r.durationMs / 1000).toFixed(1)}s)` + : `(${r.attempts} attempts)`; + + console.log(`│ ${status} ${name} ${attempts.padEnd(20)} │`); + + if (!r.success && r.error) { + const errorLine = r.error.split("\n")[0].slice(0, 40); + console.log(`│ Error: ${errorLine.padEnd(32)} │`); + } + } + + console.log("├─────────────────────────────────────────────┤"); + console.log(`${`│ Total: ${succeeded}/${results.length} succeeded in ${(totalMs / 1000).toFixed(1)}s`.padEnd(46)}│`); + console.log("└─────────────────────────────────────────────┘"); +} + +// ── Main ────────────────────────────────────────────────────────────── + +export async function deployProxy(proxyCount?: number): Promise { + const env = loadEnv(); + + const authKey = requireEnv(env, "AUTH_KEY", 8); + const internalSecret = requireEnv(env, "INTERNAL_AUTH_SECRET", 32); + + // Use provided capacity, or fall back to env, or default to 1 + const effectiveCount = proxyCount ?? Number(env.get("PROXY_COUNT")) || DEFAULT_PROXY_COUNT; + + // Write the capacity to llm-proxy/.env so the router picks it up + // This is the ONLY place PROXY_COUNT is written — never user-editable + updateEnvKey(llmProxyEnvPath, "PROXY_COUNT", String(effectiveCount), false); + + console.log(`🚀 Deploying ${effectiveCount} proxies + router`); + + if (isDryRun) { + console.log(` [dry-run] Would generate TOML configs in ${path.relative(projectRoot, DIST_DIR)}`); + for (let i = 1; i <= effectiveCount; i++) { + const name = `${WORKER_NAME_PREFIX}${String(i).padStart(WORKER_NAME_PAD, "0")}`; + console.log(` [dry-run] Would deploy proxy: ${name}`); + } + console.log(` [dry-run] Would deploy router: llm-proxy-router`); + console.log(` [dry-run] Would capture router URL from wrangler output`); + console.log(` [dry-run] Would write LLM_PROXY_URL to .env`); + console.log("\n✅ Dry run complete. No files were modified."); + return; + } + + if (!fs.existsSync(DIST_DIR)) { + fs.mkdirSync(DIST_DIR, { recursive: true }); + } + + const totalStart = Date.now(); + const allResults: DeployResult[] = []; + + // Generate and deploy proxy workers + const proxyWorkers: WorkerConfig[] = []; + for (let i = 1; i <= effectiveCount; i++) { + const toml = generateProxyToml(i, internalSecret); + const configPath = path.join(DIST_DIR, `proxy-${String(i).padStart(2, "0")}.toml`); + fs.writeFileSync(configPath, toml); + proxyWorkers.push({ + name: `${WORKER_NAME_PREFIX}${String(i).padStart(WORKER_NAME_PAD, "0")}`, + configPath, + type: "proxy", + }); + } + + console.log("\n📦 Phase 1: Deploying proxies..."); + const proxyResults = await deployParallel(proxyWorkers); + allResults.push(...proxyResults); + + const failedProxies = proxyResults.filter((r) => !r.success); + if (failedProxies.length > 0) { + console.error(`\n⚠️ ${failedProxies.length} proxies failed. Continuing to Router...`); + } + + // Generate and deploy router + const routerToml = generateRouterToml(effectiveCount, internalSecret, authKey); + const routerConfigPath = path.join(DIST_DIR, "router.toml"); + fs.writeFileSync(routerConfigPath, routerToml); + const routerWorker: WorkerConfig = { + name: "llm-proxy-router", + configPath: routerConfigPath, + type: "router", + }; + + console.log("\n📦 Phase 2: Deploying router..."); + const routerResult = await deployWithRetry(routerWorker); + allResults.push(routerResult); + + // Summary + printSummary(allResults, totalStart); + + // Capture URL from router deploy output + if (!routerResult.success) { + console.error("\n❌ Router deployment failed. Cannot capture URL."); + console.error(" Manual recovery: deploy the router manually and set LLM_PROXY_URL in .env."); + process.exit(1); + } + + // Capture URL from router deploy output (stdout already captured in deployWithRetry) + const wranglerOutput = routerResult.stdout || ""; + const routerUrl = captureRouterUrl(wranglerOutput); + + if (!routerUrl) { + console.error("\n❌ Could not detect router URL from wrangler output."); + console.error(" Check wrangler deploy output manually."); + console.error(` Then set LLM_PROXY_URL in .env, e.g.:`); + console.error(` LLM_PROXY_URL=https://llm-proxy-router..workers.dev`); + process.exit(1); + } + + // Validate HTTPS URL + try { + const parsed = new URL(routerUrl); + if (parsed.protocol !== "https:") { + throw new Error("URL must use HTTPS"); + } + } catch (err) { + console.error(`\n❌ Invalid URL captured: ${routerUrl}`); + console.error(" Set LLM_PROXY_URL manually in .env."); + process.exit(1); + } + + console.log(` ✅ Router URL: ${routerUrl}`); + + // Write LLM_PROXY_URL to .env + console.log("\n📝 Writing LLM_PROXY_URL and ROUTER_DOMAIN to .env files..."); + updateEnvKey(frellmapiEnvPath, "LLM_PROXY_URL", routerUrl, false); + // Write bare domain to llm-proxy/.env as ROUTER_DOMAIN (for backward compat) + const bareDomain = routerUrl.replace(\/^https?:\/\//, ""); + updateEnvKey(llmProxyEnvPath, "ROUTER_DOMAIN", bareDomain, false); + console.log(` ✅ LLM_PROXY_URL and ROUTER_DOMAIN written to .env files`); + + const totalFailed = allResults.filter((r) => !r.success).length; + if (totalFailed > 0) { + console.error(`\n⚠️ ${totalFailed} worker(s) failed deployment.`); + process.exit(1); + } + + console.log(`\n✅ All systems operational.`); + console.log(` Configs: ${path.relative(projectRoot, DIST_DIR)}`); + console.log(` Router: ${routerUrl}`); +} + +// ── Entry point ─────────────────────────────────────────────────────── + +async function main(): Promise { + try { + await deployProxy(); + process.exit(0); + } catch (err) { + console.error("❌ deploy-proxy failed:", err); + process.exit(1); + } +} + +// Only run main if executed directly (not imported) +const isMain = import.meta.url === `file://${process.argv[1]}`; +if (isMain) { + main(); +} diff --git a/scripts/lib/reconcile-core.ts b/scripts/lib/reconcile-core.ts new file mode 100644 index 00000000..6beaeb98 --- /dev/null +++ b/scripts/lib/reconcile-core.ts @@ -0,0 +1,136 @@ +// Shared reconciliation logic — used by both the CLI script and server startup + +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { parseEnvFile } from "./env.js"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const projectRoot = path.resolve(__dirname, "../.."); +const frellmapiEnvPath = path.join(projectRoot, ".env"); + +interface TopologySnapshot { + schemaVersion: number; + topologyId: string; + topologyGeneratedAt: number; + workerCount: number; + proxies: Array<{ id: number; name: string; status: string }>; +} + +interface DriftResult { + drifted: boolean; + actual: number; + expected: number; +} + +async function getExpectedWorkerCount(): Promise { + const { initDb } = await import("../server/src/db/index.js"); + const { getRequiredWorkerCount } = await import("../server/src/services/capacityService.js"); + initDb(); + return getRequiredWorkerCount(); +} + +async function fetchTopologyWorkerCount( + proxyUrl: string, + internalAuth: string, +): Promise<{ ok: boolean; workerCount: number }> { + try { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 5000); + + try { + const res = await fetch(`${proxyUrl}/internal/v1/topology`, { + method: "GET", + headers: internalAuth ? { "X-Internal-Auth": internalAuth } : {}, + signal: controller.signal, + }); + + if (!res.ok) { + console.warn(`[reconcile] topology fetch failed with status ${res.status}`); + return { ok: false, workerCount: 0 }; + } + + const data: unknown = await res.json(); + if ( + typeof data === "object" && + data !== null && + "workerCount" in data && + typeof (data as TopologySnapshot).workerCount === "number" + ) { + return { ok: true, workerCount: (data as TopologySnapshot).workerCount }; + } + + console.warn("[reconcile] invalid topology response"); + return { ok: false, workerCount: 0 }; + } finally { + clearTimeout(timeout); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.warn(`[reconcile] topology unavailable (${message})`); + return { ok: false, workerCount: 0 }; + } +} + +export async function detectDrift(): Promise { + const env = parseEnvFile(frellmapiEnvPath); + const proxyUrl = env.get("LLM_PROXY_URL"); + const internalAuth = env.get("INTERNAL_AUTH_SECRET") ?? ""; + + if (!proxyUrl) { + console.log("[reconcile] LLM_PROXY_URL not set, skipping drift detection"); + return { drifted: false, actual: 0, expected: 0 }; + } + + const expected = await getExpectedWorkerCount(); + const topology = await fetchTopologyWorkerCount(proxyUrl, internalAuth); + + if (!topology.ok) { + console.warn("[reconcile] topology unavailable, skipping reconciliation"); + return { drifted: false, actual: 0, expected: 0 }; + } + + return { + drifted: topology.workerCount < expected, + actual: topology.workerCount, + expected, + }; +} + +export async function reconcile(): Promise { + const drift = await detectDrift(); + + if (!drift.drifted) { + if (drift.actual === 0 && drift.expected === 0) { + console.log("[reconcile] Skipped (topology unavailable)"); + } else { + console.log(`[reconcile] No drift detected (${drift.actual} workers)`); + } + return; + } + + console.log( + `[reconcile] Topology drift detected: ${drift.actual} workers, expected ${drift.expected}`, + ); + + console.log(`[reconcile] Redeploying llm-proxy with ${drift.expected} workers...`); + const { deployProxy } = await import("./deploy-proxy.js"); + await deployProxy(drift.expected); + + // Verify + const env = parseEnvFile(frellmapiEnvPath); + const proxyUrl = env.get("LLM_PROXY_URL"); + const internalAuth = env.get("INTERNAL_AUTH_SECRET") ?? ""; + const newTopology = await fetchTopologyWorkerCount(proxyUrl, internalAuth); + const newActual = newTopology.workerCount; + + if (newActual >= drift.expected) { + console.log(`[reconcile] Success: ${newActual} workers now available`); + } else { + console.error( + `[reconcile] Warning: ${newActual} workers after redeployment (expected ${drift.expected})`, + ); + console.error("[reconcile] The redeployment may still be propagating. Check again shortly."); + } +} diff --git a/scripts/reconcile-topology.ts b/scripts/reconcile-topology.ts new file mode 100644 index 00000000..2d8f21b4 --- /dev/null +++ b/scripts/reconcile-topology.ts @@ -0,0 +1,63 @@ +// Reconcile Topology — Detects and fixes topology drift between the number of +// provider API keys and the deployed llm-proxy worker count. +// +// Usage: +// pnpm run reconcile-topology — detect drift and redeploy if needed +// pnpm run reconcile-topology -- --dry-run — report actions without deploying +// +// Capacity is computed from the database and passed directly to deployProxy(), +// which writes PROXY_COUNT to llm-proxy/.env internally. The env var is never +// the source of truth — it is always written by code. + +import { detectDrift } from "./lib/reconcile-core.js"; + +// ── CLI args ────────────────────────────────────────────────────────── + +const args = process.argv.slice(2); +const isDryRun = args.includes("--dry-run"); + +if (isDryRun) { + console.log("=== DRY RUN — no changes will be made ===\n"); +} + +// ── Reconciliation ───────────────────────────────────────────────────── + +async function reconcile(): Promise { + const drift = await detectDrift(); + + if (!drift.drifted) { + if (drift.actual === 0 && drift.expected === 0) { + console.log("[reconcile] Skipped (topology unavailable)"); + } else { + console.log(`[reconcile] No drift detected (${drift.actual} workers)`); + } + return; + } + + console.log( + `[reconcile] Topology drift detected: ${drift.actual} workers, expected ${drift.expected}`, + ); + + if (isDryRun) { + console.log(` [dry-run] Would redeploy llm-proxy with ${drift.expected} workers`); + return; + } + + // Delegate to shared reconcile logic (handles deploy + verify) + const { reconcile: sharedReconcile } = await import("./lib/reconcile-core.js"); + await sharedReconcile(); +} + +// ── Main ─────────────────────────────────────────────────────────────── + +async function main(): Promise { + try { + await reconcile(); + process.exit(0); + } catch (err) { + console.error("[reconcile] Fatal error:", err); + process.exit(1); + } +} + +main(); diff --git a/scripts/setup.ts b/scripts/setup.ts index ffd8a0cf..995001ea 100644 --- a/scripts/setup.ts +++ b/scripts/setup.ts @@ -4,9 +4,10 @@ // creates .env files from templates, and populates missing configuration. // // Usage: -// pnpm run setup — interactive setup -// pnpm run setup -- --dry-run — report actions without writing -// pnpm run setup -- --regenerate — overwrite existing values +// pnpm run setup — non-interactive setup (default: auto-generate all secrets, deploy proxy) +// pnpm run setup -- --interactive — interactive setup (prompts for secrets) +// pnpm run setup -- --dry-run — report actions without writing +// pnpm run setup -- --regenerate — overwrite existing values import fs from 'node:fs'; import path from 'node:path'; @@ -29,6 +30,7 @@ const llmProxyEnvExample = path.join(llmProxyRoot, '.env.example'); const args = process.argv.slice(2); const isDryRun = args.includes('--dry-run'); const isRegenerate = args.includes('--regenerate'); +const isInteractive = args.includes('--interactive'); if (isDryRun) { console.log('=== DRY RUN — no changes will be made ===\n'); @@ -164,13 +166,21 @@ async function main(): Promise { } } - // AUTH_KEY — prompt user + // AUTH_KEY — auto-generate in non-interactive mode, prompt in interactive mode if (isRegenerate || !llmProxyEnv.has('AUTH_KEY') || !llmProxyEnv.get('AUTH_KEY')) { - const suggested = generateAuthKey(); - const authKey = await prompt('Enter AUTH_KEY (or press Enter for random):', suggested); - if (authKey) { - logAction('set', 'AUTH_KEY', 'llm-proxy/.env'); + if (!isInteractive) { + // Non-interactive: auto-generate without prompt + const authKey = generateAuthKey(); + logAction('generate', 'AUTH_KEY', 'llm-proxy/.env'); llmProxyUpdates.set('AUTH_KEY', authKey); + } else { + // Interactive: prompt user + const suggested = generateAuthKey(); + const authKey = await prompt('Enter AUTH_KEY (or press Enter for random):', suggested); + if (authKey) { + logAction('set', 'AUTH_KEY', 'llm-proxy/.env'); + llmProxyUpdates.set('AUTH_KEY', authKey); + } } } else { console.log(' preserved: AUTH_KEY (already set)'); @@ -180,20 +190,26 @@ async function main(): Promise { console.log('\n── Router Configuration ──'); - const existingRouterDomain = llmProxyEnv.get('ROUTER_DOMAIN') || 'router.example.com'; - const routerDomain = await prompt('Enter router domain:', existingRouterDomain); + if (isInteractive) { + // Interactive mode: preserve existing prompt behavior + const existingRouterDomain = llmProxyEnv.get('ROUTER_DOMAIN') || 'router.example.com'; + const routerDomain = await prompt('Enter router domain:', existingRouterDomain); - if (routerDomain && routerDomain !== existingRouterDomain) { - logAction('set', 'ROUTER_DOMAIN', 'llm-proxy/.env'); - llmProxyUpdates.set('ROUTER_DOMAIN', routerDomain); - } + if (routerDomain && routerDomain !== existingRouterDomain) { + logAction('set', 'ROUTER_DOMAIN', 'llm-proxy/.env'); + llmProxyUpdates.set('ROUTER_DOMAIN', routerDomain); + } - const llmProxyUrl = `https://${routerDomain}`; - if (!frellmapiEnv.has('LLM_PROXY_URL') || isRegenerate) { - logAction('set', 'LLM_PROXY_URL', '.env'); - frellmapiUpdates.set('LLM_PROXY_URL', llmProxyUrl); + const llmProxyUrl = `https://${routerDomain}`; + if (!frellmapiEnv.has('LLM_PROXY_URL') || isRegenerate) { + logAction('set', 'LLM_PROXY_URL', '.env'); + frellmapiUpdates.set('LLM_PROXY_URL', llmProxyUrl); + } else { + console.log(' preserved: LLM_PROXY_URL=' + frellmapiEnv.get('LLM_PROXY_URL')); + } } else { - console.log(' preserved: LLM_PROXY_URL=' + frellmapiEnv.get('LLM_PROXY_URL')); + // Non-interactive mode: skip ROUTER_DOMAIN entirely, deploy-proxy handles it + console.log(' ROUTER_DOMAIN and LLM_PROXY_URL will be configured by deploy-proxy'); } // ── Step 5: Write updates ─────────────────────────────────────────── @@ -218,19 +234,33 @@ async function main(): Promise { console.log(' No changes needed in llm-proxy/.env'); } - // ── Step 6: Summary ───────────────────────────────────────────────── + // ── Step 6: Deploy proxy (non-interactive mode only) ───────────────── + + if (!isDryRun && !isInteractive) { + console.log('\n── Deploying llm-proxy ──'); + const { deployProxy } = await import('./deploy-proxy.js'); + await deployProxy(); + } else if (isDryRun && !isInteractive) { + console.log('\n [dry-run] Would run: pnpm run deploy-proxy'); + } + + // ── Step 7: Summary ───────────────────────────────────────────────── console.log('\n── Summary ──'); if (isDryRun) { console.log(' Dry run complete. No files were modified.'); - } else { + } else if (isInteractive) { console.log(' Configuration complete.'); + console.log('\nNext steps:'); + console.log(' pnpm dev Start local development'); + console.log(' cd llm-proxy && npm run deploy Deploy proxy to Cloudflare'); + console.log(' pnpm run verify Verify deployment'); + } else { + console.log(' Installation complete. llm-proxy deployed.'); + console.log('\nNext steps:'); + console.log(' pnpm dev Start local development'); + console.log(' pnpm run verify Verify deployment'); } - - console.log('\nNext steps:'); - console.log(' pnpm dev Start local development'); - console.log(' cd llm-proxy && npm run deploy Deploy proxy to Cloudflare'); - console.log(' pnpm run verify Verify deployment'); } main() diff --git a/scripts/verify-deploy.ts b/scripts/verify-deploy.ts index 4544a7f5..a9196e6f 100644 --- a/scripts/verify-deploy.ts +++ b/scripts/verify-deploy.ts @@ -1,12 +1,13 @@ // Verify Deploy — Post-Deployment Verification // // Checks that the deployment is working correctly: -// 1. llm-proxy deployment is reachable -// 2. Topology endpoint returns HTTP 200 -// 3. Topology response validates against schema -// 4. freellmapi server is running (end-to-end) -// 5. Discovered worker count is valid (>= 0) -// 6. Fallback mode is reported correctly when dynamic topology unavailable +// 1. LLM_PROXY_URL is set in .env +// 2. llm-proxy deployment is reachable +// 3. Topology endpoint returns HTTP 200 +// 4. Topology response validates against schema +// 5. freellmapi server is running (end-to-end) +// 6. Discovered worker count is valid (>= 0) +// 7. Fallback mode is reported correctly when dynamic topology unavailable // // Usage: // pnpm run verify — run verification @@ -93,25 +94,48 @@ async function main(): Promise { const llmProxyUrl = env.get('LLM_PROXY_URL'); const internalAuth = env.get('INTERNAL_AUTH_SECRET'); + // ── Check 1: LLM_PROXY_URL is set ─────────────────────────────────── + + if (llmProxyUrl) { + const isWorkersDev = llmProxyUrl.includes('workers.dev'); + const detail = isWorkersDev + ? `${llmProxyUrl} (zero-config deployment via workers.dev)` + : llmProxyUrl; + pass('1. LLM_PROXY_URL set', detail); + } else { + fail('1. LLM_PROXY_URL set', 'LLM_PROXY_URL not set in .env'); + } + + // ── Check 2: PROXY_IP_COUNT deprecation warning ───────────────────── + + const proxyIpCount = env.get('PROXY_IP_COUNT'); + if (proxyIpCount) { + pass('2. PROXY_IP_COUNT deprecated', `PROXY_IP_COUNT=${proxyIpCount} is set but deprecated — dynamic topology is now the primary source`); + } else { + pass('2. PROXY_IP_COUNT deprecated', 'Not set — dynamic topology is the primary source'); + } + if (!llmProxyUrl) { - fail('Configuration', 'LLM_PROXY_URL not set in .env'); + fail('Configuration', 'LLM_PROXY_URL not set in .env — remaining checks will likely fail'); printResults(); process.exit(1); } if (isDryRun) { console.log(' [dry-run] Would execute the following checks:\n'); - console.log(' 1. Check llm-proxy deployment reachability'); - console.log(' 2. Check topology endpoint returns HTTP 200'); - console.log(' 3. Check topology response schema validation'); - console.log(' 4. Check freellmapi server is running (end-to-end)'); - console.log(' 5. Check workerCount >= 0'); - console.log(' 6. Check fallback mode reporting'); + console.log(' 1. Check LLM_PROXY_URL is set in .env'); + console.log(' 2. Check PROXY_IP_COUNT deprecation status'); + console.log(' 3. Check llm-proxy deployment reachability'); + console.log(' 4. Check topology endpoint returns HTTP 200'); + console.log(' 5. Check topology response schema validation'); + console.log(' 6. Check freellmapi server is running (end-to-end)'); + console.log(' 7. Check workerCount >= 0'); + console.log(' 8. Check fallback mode reporting'); console.log('\n Dry run complete. No network calls were made.'); process.exit(0); } - // ── Check 1: llm-proxy deployment reachable ──────────────────────── + // ── Check 3: llm-proxy deployment reachable ──────────────────────── try { const controller = new AbortController(); @@ -122,16 +146,16 @@ async function main(): Promise { }); clearTimeout(timeout); void res; // We only care about reachability - pass('1. llm-proxy reachable', llmProxyUrl); + pass('3. llm-proxy reachable', llmProxyUrl); } catch (err) { const msg = err instanceof Error ? err.message : String(err); - fail('1. llm-proxy reachable', msg); + fail('3. llm-proxy reachable', msg); // If we can't reach the proxy, remaining checks will likely fail too printResults(); process.exit(1); } - // ── Check 2: Topology endpoint returns HTTP 200 ───────────────────── + // ── Check 4: Topology endpoint returns HTTP 200 ───────────────────── let topologyData: unknown = null; try { @@ -141,39 +165,39 @@ async function main(): Promise { } const res = await fetch(`${llmProxyUrl}/internal/v1/topology`, { headers }); if (res.ok) { - pass('2. Topology endpoint HTTP 200', `Status: ${res.status}`); + pass('4. Topology endpoint HTTP 200', `Status: ${res.status}`); topologyData = await res.json(); } else { - fail('2. Topology endpoint HTTP 200', `Status: ${res.status}`); + fail('4. Topology endpoint HTTP 200', `Status: ${res.status}`); } } catch (err) { const msg = err instanceof Error ? err.message : String(err); - fail('2. Topology endpoint HTTP 200', msg); + fail('4. Topology endpoint HTTP 200', msg); } - // ── Check 3: Topology schema validation ───────────────────────────── + // ── Check 5: Topology schema validation ───────────────────────────── if (topologyData !== null) { if (isValidTopology(topologyData)) { - pass('3. Topology schema valid', `schemaVersion=${topologyData.schemaVersion}, proxies=${topologyData.proxies.length}`); + pass('5. Topology schema valid', `schemaVersion=${topologyData.schemaVersion}, proxies=${topologyData.proxies.length}`); } else { - fail('3. Topology schema valid', 'Response does not match expected topology schema'); + fail('5. Topology schema valid', 'Response does not match expected topology schema'); } - // ── Check 5: workerCount >= 0 ───────────────────────────────────── + // ── Check 7: workerCount >= 0 ───────────────────────────────────── const workerCount = (topologyData as TopologyResponse).workerCount; if (workerCount >= 0) { - pass('5. Worker count valid', `workerCount=${workerCount}`); + pass('7. Worker count valid', `workerCount=${workerCount}`); } else { - fail('5. Worker count valid', `workerCount=${workerCount} (expected >= 0)`); + fail('7. Worker count valid', `workerCount=${workerCount} (expected >= 0)`); } } else { - fail('3. Topology schema valid', 'Skipped (no topology data)'); - fail('5. Worker count valid', 'Skipped (no topology data)'); + fail('5. Topology schema valid', 'Skipped (no topology data)'); + fail('7. Worker count valid', 'Skipped (no topology data)'); } - // ── Check 4: freellmapi server is running (end-to-end) ────────────── + // ── Check 6: freellmapi server is running (end-to-end) ────────────── try { const serverPort = env.get('PORT') || '3001'; @@ -182,28 +206,27 @@ async function main(): Promise { }); if (serverRes.ok) { const pingData = await serverRes.json() as { status?: string }; - pass('4. freellmapi server running', `Server responded: ${pingData.status ?? 'ok'}`); + pass('6. freellmapi server running', `Server responded: ${pingData.status ?? 'ok'}`); } else { - fail('4. freellmapi server running', `Server responded with status ${serverRes.status}`); + fail('6. freellmapi server running', `Server responded with status ${serverRes.status}`); } } catch { // Server not running — this is optional, not a hard fail - pass('4. freellmapi server running', 'Server not running (optional check skipped)'); + pass('6. freellmapi server running', 'Server not running (optional check skipped)'); } - // ── Check 6: Fallback mode reporting ──────────────────────────────── + // ── Check 8: Fallback mode reporting ──────────────────────────────── - const proxyIpCount = env.get('PROXY_IP_COUNT'); if (!llmProxyUrl) { - pass('6. Fallback mode', 'LLM_PROXY_URL not set — topology discovery will be skipped'); + pass('8. Fallback mode', 'LLM_PROXY_URL not set — topology discovery will be skipped'); } else if (topologyData === null) { if (proxyIpCount) { - pass('6. Fallback mode', `Topology unavailable, PROXY_IP_COUNT=${proxyIpCount} will be used as fallback`); + pass('8. Fallback mode', `Topology unavailable, PROXY_IP_COUNT=${proxyIpCount} will be used as fallback (deprecated)`); } else { - pass('6. Fallback mode', 'Topology unavailable, PROXY_IP_COUNT not set — IP capacity disabled'); + pass('8. Fallback mode', 'Topology unavailable, PROXY_IP_COUNT not set — IP capacity disabled'); } } else { - pass('6. Fallback mode', 'Dynamic topology is available'); + pass('8. Fallback mode', 'Dynamic topology is available'); } // ── Results ───────────────────────────────────────────────────────── diff --git a/server/src/__tests__/services/ipPoolCapacity.test.ts b/server/src/__tests__/services/ipPoolCapacity.test.ts index 8416f64d..663c4aba 100644 --- a/server/src/__tests__/services/ipPoolCapacity.test.ts +++ b/server/src/__tests__/services/ipPoolCapacity.test.ts @@ -4,7 +4,6 @@ import { releaseIpForKey, hasIpCapacity, getIpCapacityStatus, - getIpCount, isIpCapacityEnabled, cleanupExpired, _reset, @@ -15,88 +14,104 @@ import { import { _reset as resetTopology, _setMockTopology } from '../../services/proxyTopology.js'; describe('IP Pool Capacity Manager', () => { - // Save and restore env between tests - const originalEnv = process.env.PROXY_IP_COUNT; - beforeEach(() => { _reset(); resetTopology(); - // Default: disabled (no PROXY_IP_COUNT, no mock topology) - delete process.env.PROXY_IP_COUNT; - }); - - afterAll(() => { - if (originalEnv !== undefined) { - process.env.PROXY_IP_COUNT = originalEnv; - } else { - delete process.env.PROXY_IP_COUNT; - } }); // ── Configuration ────────────────────────────────────────────────── - describe('getIpCount', () => { - it('returns 0 when PROXY_IP_COUNT is unset and no topology', () => { - delete process.env.PROXY_IP_COUNT; + describe('getWorkerCount', () => { + it('returns 0 when no topology is available', () => { resetTopology(); - expect(getIpCount()).toBe(0); + expect(getWorkerCount()).toBe(0); }); - it('returns 0 when PROXY_IP_COUNT is 0', () => { - process.env.PROXY_IP_COUNT = '0'; - expect(getIpCount()).toBe(0); - }); - - it('returns 0 when PROXY_IP_COUNT is invalid', () => { - process.env.PROXY_IP_COUNT = 'abc'; - expect(getIpCount()).toBe(0); + it('returns the topology worker count when available', () => { + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 5, + proxies: [], + }); + expect(getWorkerCount()).toBe(5); }); - it('returns the configured value', () => { - process.env.PROXY_IP_COUNT = '5'; - expect(getIpCount()).toBe(5); + it('returns 0 when topology has 0 workers', () => { + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 0, + proxies: [], + }); + expect(getWorkerCount()).toBe(0); }); }); describe('isIpCapacityEnabled', () => { - it('returns false when unset and no topology', () => { - delete process.env.PROXY_IP_COUNT; + it('returns false when no topology is available', () => { resetTopology(); expect(isIpCapacityEnabled()).toBe(false); }); - it('returns true when set to positive integer', () => { - process.env.PROXY_IP_COUNT = '3'; + it('returns true when topology reports workers', () => { + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 3, + proxies: [], + }); expect(isIpCapacityEnabled()).toBe(true); }); - it('returns true when topology is available', () => { - delete process.env.PROXY_IP_COUNT; - _setMockTopology({ workers: [{ index: 0, url: 'http://localhost:8080' }], workerCount: 1 }); - expect(isIpCapacityEnabled()).toBe(true); + it('returns false when topology has 0 workers', () => { + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 0, + proxies: [], + }); + expect(isIpCapacityEnabled()).toBe(false); }); }); // ── Allocation (allocateIpForKey) ────────────────────────────────── describe('allocateIpForKey', () => { - it('returns bypass when IP capacity is disabled', () => { - delete process.env.PROXY_IP_COUNT; + it('returns bypass when topology is unavailable', () => { resetTopology(); const result = allocateIpForKey('key-1'); expect(result.kind).toBe('bypass'); }); it('allocates when pool has space', () => { - process.env.PROXY_IP_COUNT = '3'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 3, + proxies: [], + }); const result = allocateIpForKey('key-1'); expect(result.kind).toBe('allocated'); - expect(result.ipIndex).toBeGreaterThanOrEqual(0); - expect(result.ipIndex).toBeLessThan(3); + if (result.kind === 'allocated') { + expect(result.ipIndex).toBeGreaterThanOrEqual(0); + expect(result.ipIndex).toBeLessThan(3); + } }); it('returns capacity_exhausted when pool is full', () => { - process.env.PROXY_IP_COUNT = '2'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 2, + proxies: [], + }); allocateIpForKey('key-1'); allocateIpForKey('key-2'); // Both slots occupied @@ -105,7 +120,13 @@ describe('IP Pool Capacity Manager', () => { }); it('returns key_busy when same key is already allocated', () => { - process.env.PROXY_IP_COUNT = '3'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 3, + proxies: [], + }); allocateIpForKey('key-1'); // Second allocation for same key should return key_busy const result = allocateIpForKey('key-1'); @@ -113,12 +134,20 @@ describe('IP Pool Capacity Manager', () => { }); it('allocates different workers for different keys', () => { - process.env.PROXY_IP_COUNT = '3'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 3, + proxies: [], + }); const result1 = allocateIpForKey('key-1'); const result2 = allocateIpForKey('key-2'); expect(result1.kind).toBe('allocated'); expect(result2.kind).toBe('allocated'); - expect(result1.ipIndex).not.toBe(result2.ipIndex); + if (result1.kind === 'allocated' && result2.kind === 'allocated') { + expect(result1.ipIndex).not.toBe(result2.ipIndex); + } }); }); @@ -126,7 +155,13 @@ describe('IP Pool Capacity Manager', () => { describe('releaseIpForKey', () => { it('frees the worker for reuse', () => { - process.env.PROXY_IP_COUNT = '1'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 1, + proxies: [], + }); allocateIpForKey('key-1'); const result = allocateIpForKey('key-2'); expect(result.kind).toBe('capacity_exhausted'); // pool full @@ -138,13 +173,25 @@ describe('IP Pool Capacity Manager', () => { }); it('is a no-op when key has no allocation', () => { - process.env.PROXY_IP_COUNT = '3'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 3, + proxies: [], + }); // Should not throw expect(() => releaseIpForKey('nonexistent-key')).not.toThrow(); }); it('re-entrant release is safe', () => { - process.env.PROXY_IP_COUNT = '3'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 3, + proxies: [], + }); allocateIpForKey('key-1'); releaseIpForKey('key-1'); releaseIpForKey('key-1'); // second release should not throw @@ -155,32 +202,55 @@ describe('IP Pool Capacity Manager', () => { // ── Capacity Queries ─────────────────────────────────────────────── describe('hasIpCapacity', () => { - it('returns true when IP capacity is disabled', () => { - delete process.env.PROXY_IP_COUNT; + it('returns true when topology is unavailable', () => { resetTopology(); expect(hasIpCapacity()).toBe(true); }); it('returns true when pool has space', () => { - process.env.PROXY_IP_COUNT = '3'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 3, + proxies: [], + }); expect(hasIpCapacity()).toBe(true); }); it('returns false when pool is full', () => { - process.env.PROXY_IP_COUNT = '1'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 1, + proxies: [], + }); allocateIpForKey('key-1'); expect(hasIpCapacity()).toBe(false); }); it('returns true for re-entrant key that already holds a worker even when pool is full', () => { - process.env.PROXY_IP_COUNT = '1'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 1, + proxies: [], + }); allocateIpForKey('key-1'); // Pool is full, but key-1 already has allocation expect(hasIpCapacity('key-1')).toBe(true); }); it('returns false for different key when pool is full', () => { - process.env.PROXY_IP_COUNT = '1'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 1, + proxies: [], + }); allocateIpForKey('key-1'); // Pool is full, key-2 has no allocation expect(hasIpCapacity('key-2')).toBe(false); @@ -188,8 +258,7 @@ describe('IP Pool Capacity Manager', () => { }); describe('getIpCapacityStatus', () => { - it('returns { used: 0, max: 0 } when disabled', () => { - delete process.env.PROXY_IP_COUNT; + it('returns { used: 0, max: 0 } when no topology', () => { resetTopology(); const status = getIpCapacityStatus('google'); expect(status.used).toBe(0); @@ -197,7 +266,13 @@ describe('IP Pool Capacity Manager', () => { }); it('tracks used count', () => { - process.env.PROXY_IP_COUNT = '3'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 3, + proxies: [], + }); allocateIpForKey('key-1'); allocateIpForKey('key-2'); @@ -210,13 +285,16 @@ describe('IP Pool Capacity Manager', () => { // ── Cleanup ───────────────────────────────────────────────────────── describe('cleanupExpired', () => { - it('removes expired allocations', () => { - process.env.PROXY_IP_COUNT = '1'; + it('is a no-op (expiration handled by releaseIpForKey)', () => { + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 1, + proxies: [], + }); allocateIpForKey('key-1'); - // Simulate time passing (TTL is 5 minutes by default) - // For testing, we need to manually expire or use a shorter TTL - // This test verifies the cleanup function exists and can be called cleanupExpired(); const status = getIpCapacityStatus('google'); @@ -225,7 +303,13 @@ describe('IP Pool Capacity Manager', () => { }); it('does not remove active allocations', () => { - process.env.PROXY_IP_COUNT = '3'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 3, + proxies: [], + }); allocateIpForKey('key-1'); cleanupExpired(); @@ -239,7 +323,13 @@ describe('IP Pool Capacity Manager', () => { describe('edge cases', () => { it('handles rapid allocate/release cycles', () => { - process.env.PROXY_IP_COUNT = '2'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 2, + proxies: [], + }); for (let i = 0; i < 10; i++) { const key = `key-${i}`; const result = allocateIpForKey(key); @@ -249,7 +339,13 @@ describe('IP Pool Capacity Manager', () => { }); it('handles many different keys independently', () => { - process.env.PROXY_IP_COUNT = '3'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 3, + proxies: [], + }); allocateIpForKey('key-1'); allocateIpForKey('key-2'); @@ -261,45 +357,73 @@ describe('IP Pool Capacity Manager', () => { }); // ══════════════════════════════════════════════════════════════════════ - // Phase 4: API-Key-Based Allocation Tests (T4.1–T4.13) + // Phase 4+5: Topology-Only Worker Count Tests // ══════════════════════════════════════════════════════════════════════ - describe('Phase 4: API-Key-Based Allocation (T4.1–T4.13)', () => { + describe('Phase 4+5: Topology-only worker count', () => { const POOL_SIZE = 3; - // T4.1 — Single allocation success - it('T4.1 — allocates worker for first request', () => { + // Single allocation success + it('allocates worker for first request', () => { _resetAssignments(); - process.env.PROXY_IP_COUNT = String(POOL_SIZE); + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: POOL_SIZE, + proxies: [], + }); const result = allocateIpForKey('key-1'); expect(result.kind).toBe('allocated'); - expect(result.ipIndex).toBeGreaterThanOrEqual(0); + if (result.kind === 'allocated') { + expect(result.ipIndex).toBeGreaterThanOrEqual(0); + } }); - // T4.2 — Same key concurrent → 409 - it('T4.2 — rejects same key concurrent request with 409', () => { + // Same key concurrent → key_busy + it('rejects same key concurrent request with key_busy', () => { _resetAssignments(); - process.env.PROXY_IP_COUNT = String(POOL_SIZE); + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: POOL_SIZE, + proxies: [], + }); allocateIpForKey('key-1'); const result = allocateIpForKey('key-1'); expect(result.kind).toBe('key_busy'); }); - // T4.3 — Different keys until full - it('T4.3 — allocates different workers for different keys', () => { + // Different keys until full + it('allocates different workers for different keys', () => { _resetAssignments(); - process.env.PROXY_IP_COUNT = String(POOL_SIZE); + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: POOL_SIZE, + proxies: [], + }); const r1 = allocateIpForKey('key-1'); const r2 = allocateIpForKey('key-2'); expect(r1.kind).toBe('allocated'); expect(r2.kind).toBe('allocated'); - expect(r1.ipIndex).not.toBe(r2.ipIndex); + if (r1.kind === 'allocated' && r2.kind === 'allocated') { + expect(r1.ipIndex).not.toBe(r2.ipIndex); + } }); - // T4.4 — Pool exhausted → 503 - it('T4.4 — rejects new key when pool is full with 503', () => { + // Pool exhausted → capacity_exhausted + it('rejects new key when pool is full with capacity_exhausted', () => { _resetAssignments(); - process.env.PROXY_IP_COUNT = String(POOL_SIZE); + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: POOL_SIZE, + proxies: [], + }); for (let i = 0; i < POOL_SIZE; i++) { allocateIpForKey(`key-${i}`); } @@ -307,10 +431,16 @@ describe('IP Pool Capacity Manager', () => { expect(result.kind).toBe('capacity_exhausted'); }); - // T4.5 — Release restores capacity - it('T4.5 — releases worker and restores capacity', () => { + // Release restores capacity + it('releases worker and restores capacity', () => { _resetAssignments(); - process.env.PROXY_IP_COUNT = String(POOL_SIZE); + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: POOL_SIZE, + proxies: [], + }); const r1 = allocateIpForKey('key-1'); expect(r1.kind).toBe('allocated'); releaseIpForKey('key-1'); @@ -318,10 +448,16 @@ describe('IP Pool Capacity Manager', () => { expect(r2.kind).toBe('allocated'); }); - // T4.6 — Exception path releases slot - it('T4.6 — releases worker even when request throws', () => { + // Exception path releases slot + it('releases worker even when request throws', () => { _resetAssignments(); - process.env.PROXY_IP_COUNT = String(POOL_SIZE); + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: POOL_SIZE, + proxies: [], + }); allocateIpForKey('key-1'); try { throw new Error('simulated'); @@ -333,37 +469,52 @@ describe('IP Pool Capacity Manager', () => { expect(_getActiveAssignmentCount()).toBe(0); }); - // T4.7 — Disabled mode bypass - it('T4.7 — bypasses allocation when sticky routing is disabled', () => { + // Disabled mode bypass (no topology) + it('bypasses allocation when topology is unavailable', () => { _resetAssignments(); - // No PROXY_IP_COUNT, no topology → disabled - delete process.env.PROXY_IP_COUNT; resetTopology(); const result = allocateIpForKey('key-1'); expect(result.kind).toBe('bypass'); }); - // T4.8 — workerCount=0 → 503 (not bypass) - it('T4.8 — returns capacity_exhausted when workerCount=0', () => { + // workerCount=0 → capacity_exhausted (not bypass) + it('returns capacity_exhausted when workerCount=0', () => { _resetAssignments(); - // PROXY_IP_COUNT=0 → getWorkerCount()=0, isStickyRoutingEnabled()=true (0 is valid non-negative) - process.env.PROXY_IP_COUNT = '0'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 0, + proxies: [], + }); const result = allocateIpForKey('key-1'); expect(result.kind).toBe('capacity_exhausted'); }); - // T4.9 — No worker leaks after failures - it('T4.9a — no worker leaks after key_busy rejection', () => { + // No worker leaks after failures + it('no worker leaks after key_busy rejection', () => { _resetAssignments(); - process.env.PROXY_IP_COUNT = String(POOL_SIZE); + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: POOL_SIZE, + proxies: [], + }); allocateIpForKey('key-1'); allocateIpForKey('key-1'); expect(_getActiveAssignmentCount()).toBe(1); }); - it('T4.9b — no worker leaks after capacity_exhausted rejection', () => { + it('no worker leaks after capacity_exhausted rejection', () => { _resetAssignments(); - process.env.PROXY_IP_COUNT = String(POOL_SIZE); + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: POOL_SIZE, + proxies: [], + }); for (let i = 0; i < POOL_SIZE; i++) { allocateIpForKey(`key-${i}`); } @@ -371,10 +522,16 @@ describe('IP Pool Capacity Manager', () => { expect(_getActiveAssignmentCount()).toBe(POOL_SIZE); }); - // T4.10 — Router integration: 409 on concurrent same-key requests - it('T4.10 — returns 409 for concurrent same-key requests', () => { + // Router integration: key_busy on concurrent same-key requests + it('returns key_busy for concurrent same-key requests', () => { _resetAssignments(); - process.env.PROXY_IP_COUNT = String(POOL_SIZE); + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: POOL_SIZE, + proxies: [], + }); const firstResult = allocateIpForKey('test-key'); expect(firstResult.kind).toBe('allocated'); const secondResult = allocateIpForKey('test-key'); @@ -384,10 +541,16 @@ describe('IP Pool Capacity Manager', () => { expect(thirdResult.kind).toBe('allocated'); }); - // T4.11 — Router integration: 503 when all workers occupied - it('T4.11 — returns 503 when all workers are occupied', () => { + // Router integration: capacity_exhausted when all workers occupied + it('returns capacity_exhausted when all workers are occupied', () => { _resetAssignments(); - process.env.PROXY_IP_COUNT = String(POOL_SIZE); + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: POOL_SIZE, + proxies: [], + }); const keys = Array.from({ length: POOL_SIZE }, (_, i) => `key-${i}`); keys.forEach(key => { const result = allocateIpForKey(key); @@ -400,10 +563,16 @@ describe('IP Pool Capacity Manager', () => { expect(retryResult.kind).toBe('allocated'); }); - // T4.12 — Router integration: Exception cleanup - it('T4.12 — releases worker on exception', () => { + // Router integration: Exception cleanup + it('releases worker on exception', () => { _resetAssignments(); - process.env.PROXY_IP_COUNT = String(POOL_SIZE); + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: POOL_SIZE, + proxies: [], + }); const result = allocateIpForKey('test-key'); expect(result.kind).toBe('allocated'); let exceptionThrown = false; @@ -420,23 +589,36 @@ describe('IP Pool Capacity Manager', () => { expect(newResult.kind).toBe('allocated'); }); - // T4.13 — Invalid PROXY_IP_COUNT values → disabled mode - it('T4.13a — treats invalid PROXY_IP_COUNT as disabled', () => { + // Topology-only: no PROXY_IP_COUNT fallback + it('ignores PROXY_IP_COUNT env var — uses topology only', () => { _resetAssignments(); - const invalidValues = ['abc', '-1', '1.5', '']; - invalidValues.forEach(value => { - process.env.PROXY_IP_COUNT = value; - const result = allocateIpForKey('key-1'); - expect(result.kind).toBe('bypass'); - delete process.env.PROXY_IP_COUNT; - }); + // Set PROXY_IP_COUNT but no topology → should bypass (not use PROXY_IP_COUNT) + process.env.PROXY_IP_COUNT = '5'; + resetTopology(); + const result = allocateIpForKey('key-1'); + expect(result.kind).toBe('bypass'); + delete process.env.PROXY_IP_COUNT; }); - it('T4.13b — accepts valid PROXY_IP_COUNT values', () => { + it('uses topology worker count even when PROXY_IP_COUNT is set', () => { _resetAssignments(); - process.env.PROXY_IP_COUNT = '3'; - const result = allocateIpForKey('key-1'); - expect(result.kind).toBe('allocated'); + process.env.PROXY_IP_COUNT = '10'; + _setMockTopology({ + schemaVersion: 1, + topologyId: 'test', + topologyGeneratedAt: Date.now(), + workerCount: 2, + proxies: [], + }); + // Should use topology count (2), not PROXY_IP_COUNT (10) + expect(getWorkerCount()).toBe(2); + // Fill 2 slots + allocateIpForKey('key-1'); + allocateIpForKey('key-2'); + // Third should be exhausted (not have 10 slots) + const result = allocateIpForKey('key-3'); + expect(result.kind).toBe('capacity_exhausted'); + delete process.env.PROXY_IP_COUNT; }); }); }); diff --git a/server/src/index.ts b/server/src/index.ts index feee44bb..f9ff5fea 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -5,6 +5,7 @@ import { initDb } from './db/index.js'; import { assertAdminAuthConfigured } from './middleware/adminAuth.js'; import { startHealthChecker } from './services/health.js'; import { initialize as initTopology } from './services/proxyTopology.js'; +import { reconcileTopology } from './services/topologyReconciliation.js'; const PORT = process.env.PORT ?? 3001; @@ -12,6 +13,7 @@ async function main() { assertAdminAuthConfigured(); initDb(); await initTopology(); + await reconcileTopology(); const app = createApp(); const server = app.listen(Number(PORT), '0.0.0.0', () => { diff --git a/server/src/services/capacityService.ts b/server/src/services/capacityService.ts new file mode 100644 index 00000000..3799be82 --- /dev/null +++ b/server/src/services/capacityService.ts @@ -0,0 +1,32 @@ +// Capacity Service — Single source of truth for worker count +// +// The required worker count is derived exclusively from the database: +// max(enabled API keys per platform) +// +// This service is used by: +// - topologyReconciliation.ts (startup self-healing) +// - reconcile-topology.ts (standalone reconciliation) +// - deploy-proxy.ts (initial deployment) +// - verify-deploy.ts (deployment verification) + +import { getDb } from "../db/index.js"; + +/** + * Returns the required worker count based on the maximum number of + * enabled API keys across all provider platforms. + * + * SQL: SELECT MAX(key_count) FROM ( + * SELECT COUNT(*) as key_count FROM api_keys WHERE enabled = 1 GROUP BY platform + * ) + * + * Falls back to 1 if no keys exist (minimum viable pool). + */ +export function getRequiredWorkerCount(): number { + const db = getDb(); + const row = db.prepare(` + SELECT MAX(key_count) as max_count FROM ( + SELECT COUNT(*) as key_count FROM api_keys WHERE enabled = 1 GROUP BY platform + ) + `).get() as { max_count: number | null }; + return row?.max_count && row.max_count > 0 ? row.max_count : 1; +} diff --git a/server/src/services/ipPoolCapacity.ts b/server/src/services/ipPoolCapacity.ts index 071a0e4f..3236baf0 100644 --- a/server/src/services/ipPoolCapacity.ts +++ b/server/src/services/ipPoolCapacity.ts @@ -7,14 +7,22 @@ // - Each IP slot serves 1 session at a time (conservative). // - LongCat: 1 session per IP (IP-bound regardless of key count). // -// When PROXY_IP_COUNT is unset or 0, all capacity checks pass through -// (backward compatible — no IP awareness). +// Worker count is derived exclusively from dynamic topology. +// The PROXY_IP_COUNT env var is deprecated and ignored. // // In-memory only, following the same pattern as ratelimit.ts. import { getWorkerCount as getTopologyWorkerCount, isDynamicTopologyAvailable } from "./proxyTopology.js"; import crypto from "crypto"; +// Deprecation warning for PROXY_IP_COUNT +if (process.env.PROXY_IP_COUNT !== undefined) { + console.warn( + "[deprecation] PROXY_IP_COUNT is deprecated and ignored. " + + "Worker count is now derived from provider API keys via dynamic topology.", + ); +} + /** * Short hash of an API key for logging — exposes only the first 12 hex chars * of a SHA-256 digest, sufficient for correlation without leaking the key. @@ -49,55 +57,29 @@ const DEFAULT_TTL_MS = 30 * 60 * 1000; // matches STICKY_TTL_MS // --------------------------------------------------------------------------- /** - * Returns the configured number of proxy workers. - * - * Fallback chain: - * 1. Dynamic topology (if available at startup) - * 2. PROXY_IP_COUNT env var (backward compatibility) - * 0. 0 (disabled) + * Returns the number of proxy workers from dynamic topology. * - * Uses isDynamicTopologyAvailable() rather than count > 0 because a - * zero-worker topology is still dynamically available (intentionally - * disables IP capacity limits). + * Worker count is now derived exclusively from the topology endpoint. + * Returns 0 when dynamic topology is unavailable (disabled). */ export function getWorkerCount(): number { if (isDynamicTopologyAvailable()) { return getTopologyWorkerCount(); } - - const raw = process.env.PROXY_IP_COUNT; - const count = raw ? parseInt(raw, 10) : 0; - return Number.isInteger(count) && count > 0 ? count : 0; -} - -/** @deprecated Use getWorkerCount() instead. */ -export function getIpCount(): number { - return getWorkerCount(); + return 0; } -/** True when PROXY_IP_COUNT is set to a positive integer. */ +/** True when dynamic topology reports workers available. */ export function isIpCapacityEnabled(): boolean { return getWorkerCount() > 0; } /** * Check if sticky routing is enabled. -/** - * Check if sticky routing is enabled. - * Returns true when either dynamic topology is available or PROXY_IP_COUNT is a valid non-negative integer. - * Invalid PROXY_IP_COUNT values (e.g. "abc", "-1") fall back to dynamic topology availability. + * Returns true when dynamic topology is available. */ export function isStickyRoutingEnabled(): boolean { - const raw = process.env.PROXY_IP_COUNT; - if (raw === undefined || raw.trim() === '') { - return isDynamicTopologyAvailable(); - } - const envCount = Number(raw); - if (!Number.isInteger(envCount) || envCount < 0) { - // Invalid value — fall back to dynamic topology - return isDynamicTopologyAvailable(); - } - return true; + return isDynamicTopologyAvailable(); } // --------------------------------------------------------------------------- @@ -234,7 +216,7 @@ export function releaseIp(_sessionKey: string): void { /** * Check whether there is IP capacity available in the global pool. - * Returns true when PROXY_IP_COUNT is unset (no limit). + * Returns true when dynamic topology is unavailable (no limit). * * Note: This checks global pool occupancy (any platform), consistent with * allocateIp which treats all occupied slots as unavailable. @@ -257,15 +239,15 @@ export function hasIpCapacity(apiKey?: string): boolean { * Return current IP usage for a platform. * When IP capacity is disabled, returns { used: 0, max: 0 }. * - * Note: max is the global ipCount (shared across platforms). + * Note: max is the global worker count (shared across platforms). * used is the number of assigned workers. */ export function getIpCapacityStatus(_platform: string): { used: number; max: number } { if (!isIpCapacityEnabled()) return { used: 0, max: 0 }; - const ipCount = getWorkerCount(); + const workerCount = getWorkerCount(); const used = workerToApiKey.size; - return { used, max: ipCount }; + return { used, max: workerCount }; } // --------------------------------------------------------------------------- diff --git a/server/src/services/topologyReconciliation.ts b/server/src/services/topologyReconciliation.ts new file mode 100644 index 00000000..cc7f675e --- /dev/null +++ b/server/src/services/topologyReconciliation.ts @@ -0,0 +1,69 @@ +// Topology Reconciliation Service — Active Self-Healing +// +// On server startup, detects topology drift and actively repairs it by: +// 1. Computing required worker count from DB (max enabled keys per platform) +// 2. Comparing against current topology worker count +// 3. If drift detected: redeploying via reconcile-topology script +// 4. Logging success/failure (non-blocking — server continues in degraded mode) + +import { + getWorkerCount as getTopologyWorkerCount, + isDynamicTopologyAvailable, +} from "./proxyTopology.js"; +import { getRequiredWorkerCount } from "./capacityService.js"; +import { spawn } from "node:child_process"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const projectRoot = path.resolve(__dirname, "../.."); + +export async function reconcileTopology(): Promise { + if (!isDynamicTopologyAvailable()) { + console.log("[reconcile] Topology not available, skipping reconciliation"); + return; + } + + const actualWorkerCount = getTopologyWorkerCount(); + const expectedWorkerCount = getRequiredWorkerCount(); + + if (actualWorkerCount >= expectedWorkerCount) { + console.log( + `[reconcile] No drift: ${actualWorkerCount} workers >= ${expectedWorkerCount} expected`, + ); + return; + } + + // Drift detected — actively repair in the background + console.log( + `[reconcile] Drift detected: ${actualWorkerCount} workers < ${expectedWorkerCount} expected`, + ); + console.log("[reconcile] Initiating automatic repair in the background..."); + + const scriptPath = path.join(projectRoot, "scripts", "reconcile-topology.ts"); + + // Run asynchronously to avoid blocking the event loop and server startup + const child = spawn("npx", ["tsx", scriptPath], { + cwd: projectRoot, + stdio: "inherit", + shell: true, + }); + + child.on("error", (err) => { + console.error(`[reconcile] Failed to start reconciliation process: ${err.message}`); + console.error(`[reconcile] Run manually: pnpm run reconcile-topology`); + }); + + child.on("close", (code) => { + if (code === 0) { + console.log("[reconcile] Repair completed successfully"); + } else { + console.error( + `[reconcile] Repair failed with exit code ${code}. ` + + `Server operating with degraded topology (${actualWorkerCount}/${expectedWorkerCount} workers). ` + + `Run: pnpm run reconcile-topology`, + ); + } + }); +}