From 64a2a61256825e5b77e44822e2e61f3b1742669d Mon Sep 17 00:00:00 2001 From: Tobias O Date: Fri, 19 Jun 2026 11:51:44 +0200 Subject: [PATCH] feat(plan): per-account limit overrides resolved at the auth seam MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the `plan` enum into a scheduling class (priority + SLO bucket) and per-account numeric quota. `PlanResolver.resolve` now returns `ResolvedAccount { plan, limits }`, merging a sparse `user.limits jsonb` override over plan-class defaults composed from existing config sources. - shared: `EffectiveLimits`/`ResolvedAccount`, `OverrideLimitsSchema` (strict, capped positive ints), `mergeLimits`, `MAX_SEQUENCE_LENGTH_CAP` - gateway: `DbPlanResolver` reads `plan, limits` and parses defensively (parse failure → class defaults); `AuthContext` carries `limits` - consumers read `auth.limits.*`: submissions/min, concurrency, per-request maxSequenceLength, and shedding's per-account sloSeconds - additive nullable `user.limits` migration + dev seed example - admin-only `PUT/DELETE /admin/accounts/{userId}/limits` (full-replace / clear), audit-logged, behind the existing admin gate - backend E2E proving an override enforces limits differing from the plan Archive the completed change; create the canonical `plan-limits` spec. Fold the prod shedding-validation details (enforce toggle, configurability round-trip, per-account sloSeconds knob) into `simplify-load-testing`. Co-Authored-By: Claude Opus 4.8 (1M context) --- CLAUDE.md | 3 +- infra/postgres/seeds/dev-users-plan.sql | 5 + .../.openspec.yaml | 2 + .../design.md | 77 +++++++++++++ .../proposal.md | 31 ++++++ .../specs/plan-limits/spec.md | 69 ++++++++++++ .../tasks.md | 40 +++++++ .../changes/simplify-load-testing/design.md | 9 +- .../specs/load-testing/spec.md | 27 ++++- .../changes/simplify-load-testing/tasks.md | 8 +- openspec/specs/plan-limits/spec.md | 75 +++++++++++++ packages/shared/src/index.ts | 11 +- packages/shared/src/plan.test.ts | 55 ++++++++++ packages/shared/src/plan.ts | 60 ++++++++++- packages/shared/src/types.ts | 3 + services/api-gateway/openapi.v1.json | 4 +- services/api-gateway/scripts/migrate.ts | 14 +++ services/api-gateway/src/admin/limits.test.ts | 102 ++++++++++++++++++ services/api-gateway/src/admin/limits.ts | 67 ++++++++++++ services/api-gateway/src/app.ts | 59 ++++++++-- .../src/contracts/job-contracts.test.ts | 12 ++- .../src/middleware/auth/auth.test.ts | 18 +++- .../src/middleware/auth/authenticate.ts | 9 +- .../src/middleware/rate-limit.test.ts | 23 ++-- .../api-gateway/src/middleware/rate-limit.ts | 20 +--- .../src/middleware/shedding.test.ts | 26 ++++- .../api-gateway/src/middleware/shedding.ts | 1 + .../api-gateway/src/plan/db-resolver.test.ts | 87 +++++++++++---- services/api-gateway/src/plan/db-resolver.ts | 46 ++++++-- services/api-gateway/src/routes/_utils.ts | 7 +- .../api-gateway/src/routes/embeddings.test.ts | 12 ++- services/api-gateway/src/routes/embeddings.ts | 9 ++ .../src/routes/predictions.test.ts | 15 ++- .../api-gateway/src/routes/predictions.ts | 9 ++ services/api-gateway/src/schemas/common.ts | 13 ++- .../api-gateway/src/schemas/schemas.test.ts | 10 +- services/api-gateway/src/shedding/decide.ts | 4 +- tests/backend-e2e/helpers.ts | 10 ++ tests/backend-e2e/limits-override.test.ts | 55 ++++++++++ 39 files changed, 1012 insertions(+), 95 deletions(-) create mode 100644 openspec/changes/archive/2026-06-19-per-account-limit-overrides/.openspec.yaml create mode 100644 openspec/changes/archive/2026-06-19-per-account-limit-overrides/design.md create mode 100644 openspec/changes/archive/2026-06-19-per-account-limit-overrides/proposal.md create mode 100644 openspec/changes/archive/2026-06-19-per-account-limit-overrides/specs/plan-limits/spec.md create mode 100644 openspec/changes/archive/2026-06-19-per-account-limit-overrides/tasks.md create mode 100644 openspec/specs/plan-limits/spec.md create mode 100644 services/api-gateway/src/admin/limits.test.ts create mode 100644 services/api-gateway/src/admin/limits.ts create mode 100644 tests/backend-e2e/limits-override.test.ts diff --git a/CLAUDE.md b/CLAUDE.md index 3b423e2..3a4a96e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -79,7 +79,8 @@ Workspaces: `packages/*`, `services/*`, `apps/*`, `infra/*`, `tests/*`, `scripts - better-auth + GitHub OAuth. Middleware enriches Hono context with the user; frontend `AuthProvider` (`apps/web/src/features/auth/context.tsx`) mirrors session state and sends `credentials: 'include'`. - Rate limits (submission + poll tiers) defined in `services/api-gateway/src/middleware/rate-limit.ts`; Redis-backed via `hono-rate-limiter`. -- Per-plan quotas in `packages/shared/src/plan.ts`. +- `plan` is the scheduling **class** (queue priority + SLO bucket); numeric **quota** is resolved separately. `PlanResolver.resolve` returns `ResolvedAccount = { plan, limits }` where `limits` is `EffectiveLimits` (`submissionsPerMinute`, `maxConcurrentJobs`, `maxSequenceLength`, `sloSeconds`) — class defaults merged with a per-account override from the `user.limits jsonb` column (`mergeLimits` + `OverrideLimitsSchema` in `packages/shared/src/plan.ts`; absent/invalid → class default). Consumers read `auth.limits.*` off the auth context, never `PLAN_LIMITS[plan]` at the call site. +- Overrides are admin-only: `PUT/DELETE /admin/accounts/{userId}/limits` (`services/api-gateway/src/admin/limits.ts`); never user-editable. PUT is full-replace; DELETE clears to class defaults. ## Errors diff --git a/infra/postgres/seeds/dev-users-plan.sql b/infra/postgres/seeds/dev-users-plan.sql index c259db4..8faace9 100644 --- a/infra/postgres/seeds/dev-users-plan.sql +++ b/infra/postgres/seeds/dev-users-plan.sql @@ -1,3 +1,8 @@ -- Seed dev-only user plan overrides. Safe to re-run. UPDATE "user" SET plan = 'pro' WHERE email = 'dev-pro@example.com'; UPDATE "user" SET plan = 'free' WHERE email = 'dev-free@example.com'; + +-- Example per-account limit override: a pro account with raised quotas. +UPDATE "user" +SET limits = '{"submissionsPerMinute": 120, "maxConcurrentJobs": 25}'::jsonb +WHERE email = 'dev-pro@example.com'; diff --git a/openspec/changes/archive/2026-06-19-per-account-limit-overrides/.openspec.yaml b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/.openspec.yaml new file mode 100644 index 0000000..95ae5a2 --- /dev/null +++ b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-06-18 diff --git a/openspec/changes/archive/2026-06-19-per-account-limit-overrides/design.md b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/design.md new file mode 100644 index 0000000..6df2de4 --- /dev/null +++ b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/design.md @@ -0,0 +1,77 @@ +## Context + +`DbPlanResolver.resolve(userId)` returns a bare `Plan` enum read from `SELECT plan FROM "user"`. Every quota decision then does its own `PLAN_LIMITS[plan]` lookup: + +- `middleware/rate-limit.ts` → `submissionsPerMinute` +- `routes/_utils.ts` (`withinConcurrentJobLimit`) → `maxConcurrentJobs` +- submission validation → `MAX_SEQUENCE_LENGTH` (a single global constant, not yet per-plan) +- shedding → `sloSeconds[plan]` from `loadSheddingConfig` + +Because the only DB-derived input is the class name, all accounts on a class share identical quotas. The `fix/shedding-residue-leak` branch added `RATE_LIMIT_SUBMISSIONS_*` env config, but those are global-per-class defaults (load-test driven) — they cannot express two enterprise customers with different agreements. This design introduces per-account overrides resolved at the existing resolver seam. + +## Goals / Non-Goals + +**Goals:** + +- A single resolution seam returns merged `EffectiveLimits`; call sites stop indexing `PLAN_LIMITS` directly for quota. +- Per-account overrides stored in `user.limits jsonb`, sparse and partial. +- Override envelope: `submissionsPerMinute`, `maxConcurrentJobs`, `maxSequenceLength`, `sloSeconds`. +- Admin-only management surface; never user-editable. +- Plan enum preserved for priority + SLO class. + +**Non-Goals:** + +- Enterprise self-serve UI or billing/entitlement-provider integration. +- Removing or replacing the `RATE_LIMIT_SUBMISSIONS_*` env knobs (they remain the per-class defaults). +- Per-account _priority_ or _class_ overrides — scheduling stays class-based. + +## Decisions + +### 1. Resolver returns `EffectiveLimits`, not `Plan` + +`PlanResolver.resolve` becomes `resolve(userId): Promise` where `ResolvedAccount = { plan: Plan; limits: EffectiveLimits }`. `limits` is computed as `mergeLimits(PLAN_LIMITS[plan]+slo, override)`. Call sites read `auth.limits.*`; scheduling reads `auth.plan`. + +_Alternative considered:_ keep returning `Plan` and add a parallel `resolveLimits`. Rejected — two lookups invite drift and a second DB round-trip; one resolved object keeps the merge in one place. + +### 2. Carry resolved limits on the auth context + +The auth context already exposes `{ sub, plan }`. Extend it to `{ sub, plan, limits }` so middleware and route utils read the pre-merged object without re-querying. The DB read already happens during plan resolution; the override travels in the same row (`SELECT plan, limits FROM "user"`), so no extra query. + +### 3. jsonb sparse override, validated with Zod + +`limits jsonb` (nullable). Stored value is a partial object; a Zod schema (`OverrideLimitsSchema`) validates positive integers per field and `.strict()` rejects unknown keys. jsonb chosen for flexibility — the agreement-shaped envelope is expected to grow and is rarely touched, so typed columns + migrations per field would be churn for little gain. The merge is `{ ...defaults, ...parsed }`; a parse failure is logged and treated as no override (fail-safe to class defaults). + +_Alternative considered:_ typed columns. Rejected for now — premature rigidity; revisit if the envelope stabilizes or needs DB-level constraints. + +### 4. `maxSequenceLength` becomes plan-aware + +Today `MAX_SEQUENCE_LENGTH` is one global constant. The default per-class value seeds from that constant for all classes (no behavior change by default); the override can raise it per account. Submission validation reads `auth.limits.maxSequenceLength`. + +### 5. SLO seconds resolved through the same object + +`loadSheddingConfig().sloSeconds` provides the class default. The resolver overlays a per-account `sloSeconds` when present. The shedding decision reads the account's resolved value rather than indexing the global config by plan. Default behavior unchanged when no override is set. + +### 6. Admin route mirrors flag overrides + +`PUT /admin/accounts/{userId}/limits` (validate + persist) and `DELETE` (clear → NULL), guarded by the same admin authorization as `PUT /admin/flags/`. No GET on a user-facing route; an admin GET is optional and in scope for the admin surface only. + +**PUT is full-replace**: the request body becomes the entire stored override object (any field not present is dropped, not merged). `DELETE` sets the column NULL to revert to class defaults. Chosen over PATCH/merge for predictability — there is one obvious way to read or reset an account's override, and "clear one field" needs no null-sentinel convention. PATCH deferred unless an operator workflow demands it. + +## Risks / Trade-offs + +- **Auth-context shape change ripples to every consumer** → Centralize the type in one place (`Variables`/auth type) and let the typechecker enumerate call sites; the compiler turns the ripple into a checklist. +- **jsonb lets malformed data reach the row out-of-band** (manual SQL, bad seed) → Resolver parses defensively and falls back to class defaults on any parse failure; never throws into the request path. +- **Override silently widens limits beyond infra capacity** (e.g. someone sets `submissionsPerMinute: 10^6`) → Validation caps with sane `.max()` bounds; shedding remains the backstop since it operates on observed throughput, not declared limits. +- **Drift between class default sources** (`PLAN_LIMITS`, `RATE_LIMIT_SUBMISSIONS_*` env, SLO config) → Resolver composes the class default from the same sources the gateway already loads at boot; no new default table. +- **Stale resolved limits within a request** → Limits resolve per request at auth time (same cadence as plan today); an override change takes effect on the next request, consistent with current plan-change behavior. + +## Migration Plan + +1. Add nullable `user.limits jsonb` column (additive migration; no backfill — NULL = class defaults). +2. Ship resolver + auth-context change; all defaults equal current behavior, so deploying with zero overrides is a no-op. +3. Add admin route last; until it exists, overrides can only be set via seed/SQL (used for the dev seed example). +4. **Rollback:** revert code (resolver falls back to class defaults regardless of column); the nullable column can remain unused with no effect. + +## Open Questions + +- Do we want an audit-log line on override set/clear? Likely yes (reuse the submission/admin logging idiom), but not blocking. diff --git a/openspec/changes/archive/2026-06-19-per-account-limit-overrides/proposal.md b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/proposal.md new file mode 100644 index 0000000..27809b1 --- /dev/null +++ b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/proposal.md @@ -0,0 +1,31 @@ +## Why + +Today `DbPlanResolver` reads only the `plan` enum name from Postgres, and every downstream consumer does a static `PLAN_LIMITS[plan]` lookup. As a result "enterprise" is just a bigger static tier — a generous pro — and two enterprise customers with different contractual limits are impossible to express. The original intent was that enterprise limits follow the per-customer agreement. This change wires that intent without turning every quota into a deployment-global env knob (which is what the load-test-motivated `RATE_LIMIT_SUBMISSIONS_*` config does, and which cannot differentiate two accounts on the same plan). + +## What Changes + +- Split the meaning carried by the `plan` enum into two concerns: + - **Class** — queue priority and SLO bucket. Stays an enum (`free` | `pro` | `enterprise`); a small ordered set is correct for scheduling. + - **Quota** — numeric entitlements (`submissionsPerMinute`, `maxConcurrentJobs`, `maxSequenceLength`, SLO seconds). Becomes per-account overridable. +- Add a `limits jsonb` column to the Postgres `user` table holding a sparse, partial override object. Absent column / absent field = fall back to the plan default. +- Change the plan-resolution seam: the resolver returns **EffectiveLimits** (`account.overrides ?? PLAN_LIMITS[class]`, merged field-by-field) instead of a bare `Plan` enum. The enum is still exposed for priority + SLO-class selection. +- Update consumers to read resolved limits: `middleware/rate-limit.ts` (submissions/min), `routes/_utils.ts` (concurrency), submission validation (`MAX_SEQUENCE_LENGTH`), and shedding SLO seconds. +- Add an **admin-only** endpoint to set/clear an account's override (mirrors the existing `PUT /admin/flags/` pattern). No enterprise self-serve — overrides are never user-editable. + +## Capabilities + +### New Capabilities + +- `plan-limits`: Resolution of an account's effective quota limits — merging per-account DB overrides over plan-class defaults — and the admin surface for managing those overrides. Establishes the class-vs-quota boundary that scheduling (priority/SLO) and quota enforcement (rate limit/concurrency/sequence length) both consume. + +### Modified Capabilities + + + +## Impact + +- **DB**: new `user.limits jsonb` column (nullable) + migration; dev seed (`infra/postgres/seeds/dev-users-plan.sql`) gains an example override. +- **Code**: `packages/shared/src/plan.ts` (`PlanResolver` contract → `EffectiveLimits`, override-merge helper, Zod override schema), `services/api-gateway/src/plan/db-resolver.ts`, `middleware/rate-limit.ts`, `routes/_utils.ts`, shedding SLO lookup, submission length validation, new `admin/limits.ts` route. +- **API**: new admin route `PUT/DELETE /admin/accounts/{userId}/limits`; no change to public submission contracts (behavior differs only by resolved numbers). +- **Auth/PII**: override values are non-PII operational numbers; admin route reuses existing admin authorization. +- **Out of scope**: enterprise self-serve UI, billing integration, the `RATE_LIMIT_SUBMISSIONS_*` env knobs (they remain as global-per-class defaults feeding `PLAN_LIMITS`). diff --git a/openspec/changes/archive/2026-06-19-per-account-limit-overrides/specs/plan-limits/spec.md b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/specs/plan-limits/spec.md new file mode 100644 index 0000000..3f6155c --- /dev/null +++ b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/specs/plan-limits/spec.md @@ -0,0 +1,69 @@ +## ADDED Requirements + +### Requirement: Plan class versus quota separation + +The system SHALL treat an account's `plan` as a scheduling **class** (queue priority and SLO bucket) and SHALL resolve numeric **quota** limits separately, so that two accounts on the same class MAY enforce different quotas. + +The quota envelope SHALL consist of: `submissionsPerMinute`, `maxConcurrentJobs`, `maxSequenceLength`, and `sloSeconds`. + +#### Scenario: Class still drives scheduling + +- **WHEN** an account's queue priority or SLO bucket is selected +- **THEN** the system uses the `plan` enum (`free` | `pro` | `enterprise`) and ignores any per-account quota override + +#### Scenario: Quota resolved independently of class + +- **WHEN** a quota limit is enforced for an account +- **THEN** the system uses the resolved effective limit, not a direct `PLAN_LIMITS[plan]` lookup at the call site + +### Requirement: Effective limit resolution + +The plan resolver SHALL return an `EffectiveLimits` object computed by merging a per-account override over the plan-class default on a field-by-field basis. An absent override object, an absent field, or a resolution failure SHALL fall back to the plan-class default for that field. + +#### Scenario: No override present + +- **WHEN** an account has no `limits` override stored +- **THEN** every effective limit equals the corresponding `PLAN_LIMITS[plan]` (and SLO default) value + +#### Scenario: Partial override merges field-by-field + +- **WHEN** an account's override sets only `maxConcurrentJobs` +- **THEN** `maxConcurrentJobs` uses the override value and all other limits fall back to the plan-class default + +#### Scenario: Resolution failure is safe + +- **WHEN** the override store read fails or returns an unparseable value +- **THEN** the resolver logs a warning and returns the plan-class defaults without throwing + +### Requirement: Override storage and validation + +Per-account overrides SHALL be stored in a nullable `limits jsonb` column on the Postgres `user` table as a sparse partial object. The system SHALL validate override values against a schema before applying them: each present field MUST be a positive integer (or zero where the underlying default permits zero, e.g. an SLO bucket), and unknown fields SHALL be rejected. + +#### Scenario: Valid sparse override accepted + +- **WHEN** an override `{ "submissionsPerMinute": 1000 }` is validated +- **THEN** validation passes and only `submissionsPerMinute` is overridden + +#### Scenario: Invalid override rejected + +- **WHEN** an override contains a negative number, a non-integer, or an unknown field +- **THEN** validation fails and the override is not persisted + +### Requirement: Admin-only override management + +The system SHALL expose admin-only endpoints to set and clear an account's limit override, mirroring the existing admin flag-override authorization. Overrides SHALL NOT be settable or viewable through any user-facing (non-admin) route. + +#### Scenario: Admin sets an override + +- **WHEN** an authenticated admin sends a valid override for an account via the admin route +- **THEN** the override is persisted and subsequent submissions for that account enforce the merged effective limits + +#### Scenario: Admin clears an override + +- **WHEN** an admin clears an account's override +- **THEN** the `limits` column is reset and the account reverts to plan-class defaults + +#### Scenario: Non-admin cannot set an override + +- **WHEN** a non-admin caller attempts to set or read an account override +- **THEN** the request is rejected by admin authorization diff --git a/openspec/changes/archive/2026-06-19-per-account-limit-overrides/tasks.md b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/tasks.md new file mode 100644 index 0000000..d5409d1 --- /dev/null +++ b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/tasks.md @@ -0,0 +1,40 @@ +## 1. Shared resolution layer (`packages/shared`) + +- [x] 1.1 Define `EffectiveLimits` type (`submissionsPerMinute`, `maxConcurrentJobs`, `maxSequenceLength`, `sloSeconds`) and `ResolvedAccount = { plan: Plan; limits: EffectiveLimits }` in `plan.ts` +- [x] 1.2 Add `OverrideLimitsSchema` (Zod, `.strict()`, positive-int fields with sane `.max()` caps) and an exported `LimitsOverride` type +- [x] 1.3 Add `mergeLimits(classDefaults, override)` field-by-field merge helper; absent/invalid → class default +- [x] 1.4 Change `PlanResolver.resolve` contract to return `ResolvedAccount`; export `MAX_SEQUENCE_LENGTH` as the per-class default seed +- [x] 1.5 Unit-test merge + schema (no override, partial override, invalid/unknown-field rejection) + +## 2. DB resolver + auth context (`services/api-gateway`) + +- [x] 2.1 Extend the user query to `SELECT plan, limits FROM "user"` and parse `limits` defensively in `DbPlanResolver` +- [x] 2.2 Resolver composes class defaults from existing config sources (`PLAN_LIMITS`, `RATE_LIMIT_SUBMISSIONS_*`, shedding `sloSeconds`) and merges the override; parse failure logs + falls back +- [x] 2.3 Extend the auth/`Variables` context type from `{ sub, plan }` to `{ sub, plan, limits }`; populate at resolution +- [x] 2.4 Update `DbPlanResolver` unit tests (no override, partial override, read failure → defaults) + +## 3. Quota consumers read resolved limits + +- [x] 3.1 `middleware/rate-limit.ts`: read `auth.limits.submissionsPerMinute` instead of `PLAN_LIMITS[plan]` +- [x] 3.2 `routes/_utils.ts` (`withinConcurrentJobLimit`): read `auth.limits.maxConcurrentJobs` +- [x] 3.3 Submission validation: enforce `auth.limits.maxSequenceLength` per request +- [x] 3.4 Shedding decision: use the account's resolved `sloSeconds` rather than indexing global config by plan +- [x] 3.5 Update affected unit tests; confirm defaults reproduce current behavior with no override + +## 4. Persistence + +- [x] 4.1 Add additive migration: nullable `user.limits jsonb` column (no backfill) +- [x] 4.2 Add an example override to `infra/postgres/seeds/dev-users-plan.sql` + +## 5. Admin management surface + +- [x] 5.1 Add `admin/limits.ts` route: `PUT /admin/accounts/{userId}/limits` (validate via `OverrideLimitsSchema`, persist) guarded by existing admin authorization +- [x] 5.2 Add `DELETE /admin/accounts/{userId}/limits` (set column NULL → revert to class defaults) +- [x] 5.3 Emit an audit log line on set/clear (reuse admin/submission logging idiom) +- [x] 5.4 Add route tests: valid set, invalid rejected, clear reverts, non-admin rejected + +## 6. Verification + +- [x] 6.1 `bun run typecheck && bun run lint && bun run test` green +- [x] 6.2 Backend E2E (`bun run test:int`) covering an account with an override enforcing different limits than its plan default +- [x] 6.3 Update CLAUDE.md plan/auth notes if the auth-context shape change warrants it diff --git a/openspec/changes/simplify-load-testing/design.md b/openspec/changes/simplify-load-testing/design.md index cca9fa0..648248e 100644 --- a/openspec/changes/simplify-load-testing/design.md +++ b/openspec/changes/simplify-load-testing/design.md @@ -70,6 +70,13 @@ Emit a `submissions_total{route,plan}` (or equivalently-labeled) counter at the - **Why a counter, not just the log line:** the log line is per-event and not aggregable in the metrics pipeline; reconciliation needs a rate-able series next to `bullmq_*`. - **Alternative — infer submissions from `http_requests_total{route,status}`:** rejected as the primary signal. HTTP status counts conflate retries, polls, and non-submission routes; an explicit submission counter is unambiguous and plan-labeled. +### Decision 7: The saturation run drives enforce mode and a per-account SLO knob + +Shedding defaults to shadow, so `saturate.js` MUST turn enforce on (the `shedding.enforce` flag, effective within the flag cache window, no restart) to exercise the 503 path, assert at least one shed, then revert. The shed threshold is the account's resolved `sloSeconds` (from `per-account-limit-overrides`' `user.limits`), not a static per-plan constant — so the run uses a dedicated saturation account with a tight `sloSeconds` override to trip shedding at low, deterministic load on prod rather than brute-forcing genuine over-cap throughput. This doubles as the configurability proof: flip enforce on → shed, flip off → admit. + +- **Alternative — assume prod already runs enforce:** rejected. Prod defaults to shadow; relying on ambient config silently turns the run into a no-op when enforce is off. +- **Alternative — saturate by raw over-cap throughput only:** rejected for prod. Genuine saturation burns GPU and writes permanent S3; a tight per-account SLO trips the same code path far cheaper and deterministically. + ## Risks / Trade-offs - **Reads metrics not yet built** → the compute/shed assertions depend on `prediction-latency-observability` and `fix-shedding-residue-leak`. Mitigation: sequence this change after them; land the admission tier + submission counter (both independent) first. @@ -89,6 +96,6 @@ Rollback: restore `throughput.js` and the old scenario set; the submission count ## Open Questions -- **Exact pro concurrent-cap value** for sizing `pipeline.js` VUs — read from env with a conservative default; pin once the plan-limits source (`packages/shared/src/plan.ts`) is confirmed during implementation. +- **Exact pro concurrent-cap value** for sizing `pipeline.js` VUs — _Resolved:_ the cap and SLO are now per-account `EffectiveLimits` (resolved by `per-account-limit-overrides` from `user.limits` over the plan-class default), not a static `PLAN_LIMITS[plan]` constant. `pipeline.js` still reads a VU cap from env (conservative default); `saturate.js` provisions a dedicated account with a `sloSeconds` override as the saturation knob. - **`admission.js` structure** — one k6 script with two scenarios (free + pro via `exec`), or keep them as separate VU functions in one file? Default: one file, two scenario functions, to keep the cron a single step. - **Submission counter label set** — `{route,plan}` is the proposed minimum; confirm whether `outcome` (accepted/shed/rate-limited) is worth adding here or is already covered by `admitted_residues_total` + `requests_shed_total`. Default: `{route,plan}` only, lean on existing counters for outcomes. diff --git a/openspec/changes/simplify-load-testing/specs/load-testing/spec.md b/openspec/changes/simplify-load-testing/specs/load-testing/spec.md index 638b78b..9f02bd9 100644 --- a/openspec/changes/simplify-load-testing/specs/load-testing/spec.md +++ b/openspec/changes/simplify-load-testing/specs/load-testing/spec.md @@ -49,12 +49,33 @@ The end-to-end scenario SHALL submit unique sequences and poll each job to a ter ### Requirement: The saturation scenario is the only shedding/503 coverage -The saturation scenario SHALL drive submissions above sustainable throughput with mixed sequence lengths, tolerate 503 responses, and read `shedding_residues_per_second` for steady-state calibration of the shedding EWMA. It SHALL be the sole scenario that exercises the 503 shed path, and SHALL run on demand only. +The saturation scenario SHALL drive submissions above sustainable throughput with mixed sequence lengths, tolerate 503 responses during the ramp, and read `shedding_residues_per_second` for steady-state calibration of the shedding EWMA. It SHALL be the sole scenario that exercises the 503 shed path, and SHALL run on demand only. + +Because shedding defaults to shadow mode (`SHED_MODE=shadow`), the run SHALL ensure enforce mode is active before asserting the shed path — by setting the `shedding.enforce` flag override (effective within the flag cache window, no restart) — and SHALL revert it on completion. Under sustained over-cap load in enforce mode the run SHALL observe at least one `503` carrying `Retry-After`; a run that produces no `503` SHALL fail rather than pass, so a shadow-mode no-op is never mistaken for shedding coverage. #### Scenario: Over-cap load surfaces shedding for calibration -- **WHEN** the saturation scenario sustains over-cap load -- **THEN** 503 responses are tolerated (not counted as failures), real network errors still fail the run, and `shedding_residues_per_second` is readable for calibration during the steady-state window +- **WHEN** the saturation scenario sustains over-cap load with enforce mode active +- **THEN** 503 responses are tolerated (not counted as failures) and carry `Retry-After`, real network errors still fail the run, and `shedding_residues_per_second` is readable for calibration during the steady-state window + +#### Scenario: A run that never sheds fails + +- **WHEN** the saturation run completes without observing any 503 under sustained over-cap enforce-mode load +- **THEN** the run fails, signalling either that enforce was not active or that the load did not exceed the resolved SLO + +### Requirement: Shedding enforcement is runtime-configurable without restart + +The shedding control plane SHALL be togglable at runtime: the `shedding.enabled` and `shedding.enforce` flag overrides take effect within the flag cache window with no redeploy, and an account's shed threshold SHALL be the per-account resolved `sloSeconds` (from the `user.limits` override) overlaid on the plan-class default, not a static per-plan constant. The saturation run SHALL exercise this surface: enabling enforce engages shedding and disabling it resumes admission. + +#### Scenario: Enforce toggle engages and disengages shedding + +- **WHEN** an operator flips `shedding.enforce` on and then off around a saturation run +- **THEN** over-cap submissions shed (503) while enforce is on and resume admitting (202) within the flag cache window after it is turned off + +#### Scenario: Per-account SLO override sets the shed threshold + +- **WHEN** the saturation account carries a `sloSeconds` override in `user.limits` +- **THEN** shedding for that account trips against the override value, allowing a deterministic saturation threshold without changing global config ### Requirement: Submission accounting is reconcilable end to end diff --git a/openspec/changes/simplify-load-testing/tasks.md b/openspec/changes/simplify-load-testing/tasks.md index ce82437..a16a135 100644 --- a/openspec/changes/simplify-load-testing/tasks.md +++ b/openspec/changes/simplify-load-testing/tasks.md @@ -23,7 +23,7 @@ ## 5. Finalize pipeline.js (after prediction-latency-observability + fix-shedding-residue-leak) -- [ ] 5.1 Remove the concurrent-cap 429 back-off path; size VUs ≤ the pro concurrent-job cap (read from env, conservative default; source the cap from `packages/shared/src/plan.ts`). +- [ ] 5.1 Remove the concurrent-cap 429 back-off path; size VUs ≤ the pro concurrent-job cap (read from env, conservative default; the cap is now per-account resolved `EffectiveLimits` from `per-account-limit-overrides`, not a static `PLAN_LIMITS[plan]`). - [ ] 5.2 Treat an unexpected cap-429 as a failed check ("run mis-sized — lower VUs"), not a tolerated outcome. - [ ] 5.3 Trim in-script measurement to client-perceived `e2e_latency` + `submit_latency`; document that per-model latency / drain are read from `/metrics` (`triton_model_infer_duration_seconds`, `shedding_residues_per_second`, queue depth). - [ ] 5.4 Header documents this doubles as the `bound-prediction-fanout` tuning harness. @@ -33,11 +33,15 @@ - [ ] 6.1 Rename/replace `tests/load/shedding.js` → `tests/load/saturate.js`; keep over-cap ramp + 503 tolerance + mixed lengths. - [ ] 6.2 Update the calibration note to read the now-trustworthy `shedding_residues_per_second` aggregate drain rate. - [ ] 6.3 Header documents this is the ONLY 503/shedding coverage and is run on demand only (rare tier). +- [ ] 6.4 Provision a dedicated saturation account on prod with a tight `sloSeconds` override (`user.limits`, via `PUT /admin/accounts/{userId}/limits`) so shedding trips at low, deterministic load; inject its bearer plus an admin bearer via env. +- [ ] 6.5 In the run, enable enforce via `PUT /admin/flags/shedding.enforce` (effective ≤ flag cache window), assert ≥1 `503`+`Retry-After` under sustained over-cap load, then revert (clear the flag override + the account override); a run with zero 503s fails. +- [ ] 6.6 Assert the configurability round-trip: with enforce off, the same over-cap load admits (202) within the flag cache window. ## 7. Verification - [ ] 7.1 `K6_SMOKE=true` syntax-validate every script (`admission.js`, `pipeline.js`, `saturate.js`). - [ ] 7.2 Run `admission.js` against prod: free-plan 429 fires with header, pro cached submits non-5xx, no real inference triggered. - [ ] 7.3 Run `pipeline.js` under the cap: jobs reach terminal state, no cap-429, and confirm `submissions_total` ≈ bullmq enqueued ≈ completed (accounting gap closed/visible). -- [ ] 7.4 Run `saturate.js`: 503s tolerated, `shedding_residues_per_second` readable at steady state. +- [ ] 7.4 Run `saturate.js` with enforce on: ≥1 `503`+`Retry-After` observed, 503s tolerated (not failures), `shedding_residues_per_second` readable at steady state; enforce reverted on completion. +- [ ] 7.6 Confirm the per-account `sloSeconds` override drives the shed threshold (the saturation account sheds where a plan-default account would not at the same load). - [ ] 7.5 Repo gates: `bun run lint` (k6 scripts are `.js`, eslint-only), `bun run typecheck`/`test` for the gateway counter change. diff --git a/openspec/specs/plan-limits/spec.md b/openspec/specs/plan-limits/spec.md new file mode 100644 index 0000000..abd210d --- /dev/null +++ b/openspec/specs/plan-limits/spec.md @@ -0,0 +1,75 @@ +# plan-limits Specification + +## Purpose + +TBD - created by archiving change per-account-limit-overrides. Update Purpose after archive. + +## Requirements + +### Requirement: Plan class versus quota separation + +The system SHALL treat an account's `plan` as a scheduling **class** (queue priority and SLO bucket) and SHALL resolve numeric **quota** limits separately, so that two accounts on the same class MAY enforce different quotas. + +The quota envelope SHALL consist of: `submissionsPerMinute`, `maxConcurrentJobs`, `maxSequenceLength`, and `sloSeconds`. + +#### Scenario: Class still drives scheduling + +- **WHEN** an account's queue priority or SLO bucket is selected +- **THEN** the system uses the `plan` enum (`free` | `pro` | `enterprise`) and ignores any per-account quota override + +#### Scenario: Quota resolved independently of class + +- **WHEN** a quota limit is enforced for an account +- **THEN** the system uses the resolved effective limit, not a direct `PLAN_LIMITS[plan]` lookup at the call site + +### Requirement: Effective limit resolution + +The plan resolver SHALL return an `EffectiveLimits` object computed by merging a per-account override over the plan-class default on a field-by-field basis. An absent override object, an absent field, or a resolution failure SHALL fall back to the plan-class default for that field. + +#### Scenario: No override present + +- **WHEN** an account has no `limits` override stored +- **THEN** every effective limit equals the corresponding `PLAN_LIMITS[plan]` (and SLO default) value + +#### Scenario: Partial override merges field-by-field + +- **WHEN** an account's override sets only `maxConcurrentJobs` +- **THEN** `maxConcurrentJobs` uses the override value and all other limits fall back to the plan-class default + +#### Scenario: Resolution failure is safe + +- **WHEN** the override store read fails or returns an unparseable value +- **THEN** the resolver logs a warning and returns the plan-class defaults without throwing + +### Requirement: Override storage and validation + +Per-account overrides SHALL be stored in a nullable `limits jsonb` column on the Postgres `user` table as a sparse partial object. The system SHALL validate override values against a schema before applying them: each present field MUST be a positive integer (or zero where the underlying default permits zero, e.g. an SLO bucket), and unknown fields SHALL be rejected. + +#### Scenario: Valid sparse override accepted + +- **WHEN** an override `{ "submissionsPerMinute": 1000 }` is validated +- **THEN** validation passes and only `submissionsPerMinute` is overridden + +#### Scenario: Invalid override rejected + +- **WHEN** an override contains a negative number, a non-integer, or an unknown field +- **THEN** validation fails and the override is not persisted + +### Requirement: Admin-only override management + +The system SHALL expose admin-only endpoints to set and clear an account's limit override, mirroring the existing admin flag-override authorization. Overrides SHALL NOT be settable or viewable through any user-facing (non-admin) route. + +#### Scenario: Admin sets an override + +- **WHEN** an authenticated admin sends a valid override for an account via the admin route +- **THEN** the override is persisted and subsequent submissions for that account enforce the merged effective limits + +#### Scenario: Admin clears an override + +- **WHEN** an admin clears an account's override +- **THEN** the `limits` column is reset and the account reverts to plan-class defaults + +#### Scenario: Non-admin cannot set an override + +- **WHEN** a non-admin caller attempts to set or read an account override +- **THEN** the request is rejected by admin authorization diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts index f7df4d3..7343ab5 100644 --- a/packages/shared/src/index.ts +++ b/packages/shared/src/index.ts @@ -37,11 +37,20 @@ export { sweepFilesystemBudget } from './storage-evict.ts' export { PLAN_LIMITS, MAX_SEQUENCE_LENGTH, + MAX_SEQUENCE_LENGTH_CAP, + OverrideLimitsSchema, + mergeLimits, DEFAULT_PLAN_PRIORITY, SHEDDING_DEFAULTS, loadSheddingConfig, } from './plan.ts' -export type { PlanResolver, SheddingConfig } from './plan.ts' +export type { + PlanResolver, + SheddingConfig, + EffectiveLimits, + ResolvedAccount, + LimitsOverride, +} from './plan.ts' export * from './queue.ts' // sentry.ts: `_resetSentryForTests` stays internal (tested via direct path). export { initSentry } from './sentry.ts' diff --git a/packages/shared/src/plan.test.ts b/packages/shared/src/plan.test.ts index 862fa05..f550ac5 100644 --- a/packages/shared/src/plan.test.ts +++ b/packages/shared/src/plan.test.ts @@ -2,11 +2,16 @@ import { describe, it, expect } from 'vitest' import { DEFAULT_PLAN_PRIORITY, + MAX_SEQUENCE_LENGTH, + MAX_SEQUENCE_LENGTH_CAP, + OverrideLimitsSchema, PLAN_LIMITS, SHEDDING_DEFAULTS, loadSheddingConfig, + mergeLimits, parseBoolean, } from './plan.ts' +import type { EffectiveLimits } from './plan.ts' describe('parseBoolean', () => { it('returns the fallback when the value is undefined', () => { @@ -36,6 +41,56 @@ describe('PLAN_LIMITS', () => { }) }) +describe('OverrideLimitsSchema', () => { + it('accepts a sparse override', () => { + const r = OverrideLimitsSchema.safeParse({ maxConcurrentJobs: 25 }) + expect(r.success).toBe(true) + }) + + it('rejects negative, non-integer, and over-cap values', () => { + expect( + OverrideLimitsSchema.safeParse({ maxConcurrentJobs: -1 }).success, + ).toBe(false) + expect( + OverrideLimitsSchema.safeParse({ submissionsPerMinute: 1.5 }).success, + ).toBe(false) + expect( + OverrideLimitsSchema.safeParse({ + maxSequenceLength: MAX_SEQUENCE_LENGTH_CAP + 1, + }).success, + ).toBe(false) + }) + + it('allows sloSeconds of zero', () => { + expect(OverrideLimitsSchema.safeParse({ sloSeconds: 0 }).success).toBe(true) + }) + + it('rejects unknown fields', () => { + expect(OverrideLimitsSchema.safeParse({ bogus: 1 }).success).toBe(false) + }) +}) + +describe('mergeLimits', () => { + const defaults: EffectiveLimits = { + submissionsPerMinute: 10, + maxConcurrentJobs: 2, + maxSequenceLength: MAX_SEQUENCE_LENGTH, + sloSeconds: 30, + } + + it('returns defaults when no override', () => { + expect(mergeLimits(defaults, null)).toEqual(defaults) + expect(mergeLimits(defaults, undefined)).toEqual(defaults) + }) + + it('merges a partial override field-by-field', () => { + expect(mergeLimits(defaults, { maxConcurrentJobs: 25 })).toEqual({ + ...defaults, + maxConcurrentJobs: 25, + }) + }) +}) + describe('DEFAULT_PLAN_PRIORITY', () => { it('enterprise drains before pro before free (lower = higher)', () => { expect(DEFAULT_PLAN_PRIORITY.enterprise).toBeLessThan( diff --git a/packages/shared/src/plan.ts b/packages/shared/src/plan.ts index 3ed9c59..86d100a 100644 --- a/packages/shared/src/plan.ts +++ b/packages/shared/src/plan.ts @@ -1,8 +1,22 @@ +import { z } from 'zod' + import { zBooleanString } from './config.ts' import type { Plan } from './types.ts' +export interface EffectiveLimits { + submissionsPerMinute: number + maxConcurrentJobs: number + maxSequenceLength: number + sloSeconds: number +} + +export interface ResolvedAccount { + plan: Plan + limits: EffectiveLimits +} + export interface PlanResolver { - resolve(userId: string, email: string): Promise + resolve(userId: string, email: string): Promise } export const PLAN_LIMITS: Record< @@ -27,6 +41,50 @@ export const PLAN_LIMITS: Record< */ export const MAX_SEQUENCE_LENGTH = 4096 +/** + * Absolute upper bound an account override may raise `maxSequenceLength` to. + * The static submit schema rejects above this; the per-account resolved limit + * (default `MAX_SEQUENCE_LENGTH`) is enforced per request below the cap. + */ +export const MAX_SEQUENCE_LENGTH_CAP = 16384 + +/** + * Sparse per-account override of {@link EffectiveLimits}. Each present field + * replaces the plan-class default; absent fields fall back. `.strict()` + * rejects unknown keys; `.max()` caps guard against widening beyond infra. + */ +export const OverrideLimitsSchema = z + .object({ + submissionsPerMinute: z.number().int().positive().max(100_000).optional(), + maxConcurrentJobs: z.number().int().positive().max(10_000).optional(), + maxSequenceLength: z + .number() + .int() + .positive() + .max(MAX_SEQUENCE_LENGTH_CAP) + .optional(), + sloSeconds: z.number().int().min(0).max(86_400).optional(), + }) + .strict() + +export type LimitsOverride = z.infer + +export function mergeLimits( + classDefaults: EffectiveLimits, + override?: LimitsOverride | null, +): EffectiveLimits { + if (!override) return { ...classDefaults } + return { + submissionsPerMinute: + override.submissionsPerMinute ?? classDefaults.submissionsPerMinute, + maxConcurrentJobs: + override.maxConcurrentJobs ?? classDefaults.maxConcurrentJobs, + maxSequenceLength: + override.maxSequenceLength ?? classDefaults.maxSequenceLength, + sloSeconds: override.sloSeconds ?? classDefaults.sloSeconds, + } +} + /** * BullMQ job priority per plan — lower integer = higher priority * (drained first). Admitted jobs carry this on both the parent prediction diff --git a/packages/shared/src/types.ts b/packages/shared/src/types.ts index 3f79eba..d4eb450 100644 --- a/packages/shared/src/types.ts +++ b/packages/shared/src/types.ts @@ -1,11 +1,14 @@ import { z } from 'zod' +import type { EffectiveLimits } from './plan.ts' + export type Plan = 'free' | 'pro' | 'enterprise' export interface AuthContext { sub: string email: string plan: Plan + limits: EffectiveLimits method: 'api-key' | 'session' role?: 'admin' | 'user' } diff --git a/services/api-gateway/openapi.v1.json b/services/api-gateway/openapi.v1.json index bf01920..10941f0 100644 --- a/services/api-gateway/openapi.v1.json +++ b/services/api-gateway/openapi.v1.json @@ -33,7 +33,7 @@ "sequence": { "type": "string", "minLength": 1, - "maxLength": 4096, + "maxLength": 16384, "example": "MKTVRQERLK" }, "accession": { @@ -210,7 +210,7 @@ "sequence": { "type": "string", "minLength": 1, - "maxLength": 4096, + "maxLength": 16384, "example": "MKTVRQERLK" }, "accession": { diff --git a/services/api-gateway/scripts/migrate.ts b/services/api-gateway/scripts/migrate.ts index 7c01d03..75e4553 100644 --- a/services/api-gateway/scripts/migrate.ts +++ b/services/api-gateway/scripts/migrate.ts @@ -62,6 +62,20 @@ try { END $$; `) console.log('[migrate] Phase 22: ensured users.role column (D-10).') + + // Per-account quota override (nullable jsonb, no backfill — NULL = plan + // class defaults). Idempotent via pg_attribute existence check. + await pool.query(` + DO $$ BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_attribute + WHERE attrelid = '"user"'::regclass AND attname = 'limits' AND NOT attisdropped + ) THEN + ALTER TABLE "user" ADD COLUMN limits jsonb; + END IF; + END $$; + `) + console.log('[migrate] ensured user.limits column.') } finally { await pool.end() } diff --git a/services/api-gateway/src/admin/limits.test.ts b/services/api-gateway/src/admin/limits.test.ts new file mode 100644 index 0000000..37baa15 --- /dev/null +++ b/services/api-gateway/src/admin/limits.test.ts @@ -0,0 +1,102 @@ +import type { AuthContext, EffectiveLimits } from '@protifer/shared' +import { Hono } from 'hono' +import { describe, expect, it, vi } from 'vitest' + +import { createLimitsAdminRouter } from './limits.ts' +import { createAdminRoleMiddleware } from '../middleware/admin-role.ts' +import type { Variables } from '../types/hono.ts' + +const defaultLimits: EffectiveLimits = { + submissionsPerMinute: 300, + maxConcurrentJobs: 50, + maxSequenceLength: 4096, + sloSeconds: 0, +} + +function makeApp(opts: { + auth?: Partial | null + setLimits?: (userId: string, limits: unknown) => Promise + clearLimits?: (userId: string) => Promise +}) { + const setLimits = vi.fn(opts.setLimits ?? (() => Promise.resolve(1))) + const clearLimits = vi.fn(opts.clearLimits ?? (() => Promise.resolve(1))) + const app = new Hono<{ Variables: Variables }>() + if (opts.auth !== null) { + app.use('*', async (c, next) => { + c.set('auth', { + sub: 'admin1', + email: 'admin@x.com', + plan: 'enterprise', + limits: defaultLimits, + method: 'session', + role: 'admin', + ...opts.auth, + }) + await next() + }) + } + app.use('*', createAdminRoleMiddleware()) + app.route( + '/admin', + createLimitsAdminRouter({ + setLimits: setLimits as never, + clearLimits, + }), + ) + return { app, setLimits, clearLimits } +} + +describe('createLimitsAdminRouter', () => { + it('sets a valid override', async () => { + const { app, setLimits } = makeApp({}) + const res = await app.request('/admin/accounts/u1/limits', { + method: 'PUT', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ maxConcurrentJobs: 25 }), + }) + expect(res.status).toBe(200) + expect(setLimits).toHaveBeenCalledWith('u1', { maxConcurrentJobs: 25 }) + }) + + it('rejects an invalid override', async () => { + const { app, setLimits } = makeApp({}) + const res = await app.request('/admin/accounts/u1/limits', { + method: 'PUT', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ maxConcurrentJobs: -1, bogus: 1 }), + }) + expect(res.status).toBe(400) + expect(setLimits).not.toHaveBeenCalled() + }) + + it('returns 404 when the user does not exist', async () => { + const { app } = makeApp({ setLimits: () => Promise.resolve(0) }) + const res = await app.request('/admin/accounts/missing/limits', { + method: 'PUT', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ maxConcurrentJobs: 25 }), + }) + expect(res.status).toBe(404) + }) + + it('clears an override', async () => { + const { app, clearLimits } = makeApp({}) + const res = await app.request('/admin/accounts/u1/limits', { + method: 'DELETE', + }) + expect(res.status).toBe(200) + expect(clearLimits).toHaveBeenCalledWith('u1') + expect(await res.json()).toEqual({ userId: 'u1', limits: null }) + }) + + it('rejects a non-admin caller', async () => { + const { app, setLimits } = makeApp({ auth: { role: 'user' } }) + const res = await app.request('/admin/accounts/u1/limits', { + method: 'PUT', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ maxConcurrentJobs: 25 }), + }) + expect(res.status).toBe(403) + expect(setLimits).not.toHaveBeenCalled() + }) +}) diff --git a/services/api-gateway/src/admin/limits.ts b/services/api-gateway/src/admin/limits.ts new file mode 100644 index 0000000..88b7ee7 --- /dev/null +++ b/services/api-gateway/src/admin/limits.ts @@ -0,0 +1,67 @@ +import type { AuthContext, LimitsOverride, Logger } from '@protifer/shared' +import { OverrideLimitsSchema } from '@protifer/shared' +import { Hono } from 'hono' + +import type { Variables } from '../types/hono.ts' + +export interface LimitsAdminDeps { + /** Persist a full-replace override; returns affected row count. */ + setLimits: (userId: string, limits: LimitsOverride) => Promise + /** Clear the override (column → NULL); returns affected row count. */ + clearLimits: (userId: string) => Promise + logger?: Logger +} + +function adminIdentity(c: { get: (k: 'auth') => AuthContext }): string { + const auth = c.get('auth') + return auth.email || auth.sub +} + +export function createLimitsAdminRouter(deps: LimitsAdminDeps) { + const { setLimits, clearLimits, logger } = deps + const router = new Hono<{ Variables: Variables }>() + + router.put('/accounts/:userId/limits', async (c) => { + const userId = c.req.param('userId') + let body: unknown + try { + body = await c.req.json() + } catch { + return c.json({ error: 'Invalid JSON', code: 'VALIDATION_ERROR' }, 400) + } + const parsed = OverrideLimitsSchema.safeParse(body) + if (!parsed.success) { + return c.json( + { + error: parsed.error.issues[0]?.message ?? 'Invalid override', + code: 'VALIDATION_ERROR', + }, + 400, + ) + } + const affected = await setLimits(userId, parsed.data) + if (affected === 0) { + return c.json({ error: 'Unknown user', code: 'USER_NOT_FOUND' }, 404) + } + logger?.info( + { targetUserId: userId, adminId: adminIdentity(c), limits: parsed.data }, + 'admin: set account limits override', + ) + return c.json({ userId, limits: parsed.data }, 200) + }) + + router.delete('/accounts/:userId/limits', async (c) => { + const userId = c.req.param('userId') + const affected = await clearLimits(userId) + if (affected === 0) { + return c.json({ error: 'Unknown user', code: 'USER_NOT_FOUND' }, 404) + } + logger?.info( + { targetUserId: userId, adminId: adminIdentity(c) }, + 'admin: cleared account limits override', + ) + return c.json({ userId, limits: null }, 200) + }) + + return router +} diff --git a/services/api-gateway/src/app.ts b/services/api-gateway/src/app.ts index 5916f55..2fe073e 100644 --- a/services/api-gateway/src/app.ts +++ b/services/api-gateway/src/app.ts @@ -3,9 +3,11 @@ import { OpenAPIHono } from '@hono/zod-openapi' import { OpenFeature } from '@openfeature/server-sdk' import type { AuthContext, + EffectiveLimits, FlagOverrideStore, Logger, ObjectStore, + Plan, PredictionSuiteConfig, Redis, } from '@protifer/shared' @@ -13,6 +15,8 @@ import { AppError, CachedFlagOverrideStore, FLAG_REGISTRY, + MAX_SEQUENCE_LENGTH, + PLAN_LIMITS, QUEUE_NAMES, QueueEvents, RedisFlagOverrideStore, @@ -36,6 +40,7 @@ import pino from 'pino' import { createBullBoardRouter } from './admin/bull-board.ts' import { createCleanupAdminRouter } from './admin/cleanup.ts' import { createFlagsAdminRouter, createFlagsProvider } from './admin/flags.ts' +import { createLimitsAdminRouter } from './admin/limits.ts' import { createSheddingAdminRouter } from './admin/shedding.ts' import { createAuth } from './auth/index.ts' import { createUserDirectory } from './auth/user-directory.ts' @@ -280,24 +285,40 @@ export function createApp(overrides?: { sharedPool, ) const userDirectory = createUserDirectory(sharedPool) + const planClassDefaults: Record = { + free: { + submissionsPerMinute: config.rateLimit.submissionsFree, + maxConcurrentJobs: PLAN_LIMITS.free.maxConcurrentJobs, + maxSequenceLength: MAX_SEQUENCE_LENGTH, + sloSeconds: sheddingConfig.sloSeconds.free, + }, + pro: { + submissionsPerMinute: config.rateLimit.submissionsPro, + maxConcurrentJobs: PLAN_LIMITS.pro.maxConcurrentJobs, + maxSequenceLength: MAX_SEQUENCE_LENGTH, + sloSeconds: sheddingConfig.sloSeconds.pro, + }, + enterprise: { + submissionsPerMinute: config.rateLimit.submissionsEnterprise, + maxConcurrentJobs: PLAN_LIMITS.enterprise.maxConcurrentJobs, + maxSequenceLength: MAX_SEQUENCE_LENGTH, + sloSeconds: sheddingConfig.sloSeconds.enterprise, + }, + } const planResolver = new DbPlanResolver({ logger, + classDefaults: planClassDefaults, getUser: async (userId) => { - const result = await sharedPool.query<{ plan?: string }>( - 'SELECT plan FROM "user" WHERE id = $1 LIMIT 1', - [userId], - ) + const result = await sharedPool.query<{ + plan?: string + limits?: unknown + }>('SELECT plan, limits FROM "user" WHERE id = $1 LIMIT 1', [userId]) return result.rows[0] ?? null }, }) const rateLimitConnection = connection as unknown as Redis const submissionRL = createSubmissionRateLimiter({ connection: rateLimitConnection, - submissionsPerMinute: { - free: config.rateLimit.submissionsFree, - pro: config.rateLimit.submissionsPro, - enterprise: config.rateLimit.submissionsEnterprise, - }, }) const pollRL = createPollRateLimiter({ connection: rateLimitConnection }) @@ -418,6 +439,26 @@ export function createApp(overrides?: { '/admin/flags', createFlagsAdminRouter({ registry: FLAG_REGISTRY, store: flagsStore }), ) + app.route( + '/admin', + createLimitsAdminRouter({ + logger, + setLimits: async (userId, limits) => { + const result = await sharedPool.query( + 'UPDATE "user" SET limits = $1::jsonb WHERE id = $2', + [JSON.stringify(limits), userId], + ) + return result.rowCount ?? 0 + }, + clearLimits: async (userId) => { + const result = await sharedPool.query( + 'UPDATE "user" SET limits = NULL WHERE id = $1', + [userId], + ) + return result.rowCount ?? 0 + }, + }), + ) if (serveStatic) { // Bull Board ships its own static asset bundle; only mount when the // serve-static helper is wired in. diff --git a/services/api-gateway/src/contracts/job-contracts.test.ts b/services/api-gateway/src/contracts/job-contracts.test.ts index ea77eb6..5bacff5 100644 --- a/services/api-gateway/src/contracts/job-contracts.test.ts +++ b/services/api-gateway/src/contracts/job-contracts.test.ts @@ -15,7 +15,17 @@ import type { RedisCommands } from '../queue.ts' import { createPredictionsRouter } from '../routes/predictions.ts' import type { Variables } from '../types/hono.ts' -const proResolver: PlanResolver = { resolve: vi.fn().mockResolvedValue('pro') } +const proResolver: PlanResolver = { + resolve: vi.fn().mockResolvedValue({ + plan: 'pro' as const, + limits: { + submissionsPerMinute: 60, + maxConcurrentJobs: 10, + maxSequenceLength: 4096, + sloSeconds: 120, + }, + }), +} const mockAuth = { api: { diff --git a/services/api-gateway/src/middleware/auth/auth.test.ts b/services/api-gateway/src/middleware/auth/auth.test.ts index 580531f..9d4b3eb 100644 --- a/services/api-gateway/src/middleware/auth/auth.test.ts +++ b/services/api-gateway/src/middleware/auth/auth.test.ts @@ -1,3 +1,4 @@ +import type { EffectiveLimits } from '@protifer/shared' import { Hono } from 'hono' import { describe, it, expect, vi } from 'vitest' @@ -6,8 +7,21 @@ import type { Auth } from '../../auth/index.ts' import type { UserDirectory, UserRecord } from '../../auth/user-directory.ts' import type { Variables } from '../../types/hono.ts' -const freeResolver = { resolve: () => Promise.resolve('free' as const) } -const proResolver = { resolve: () => Promise.resolve('pro' as const) } +const limitsFor = (submissionsPerMinute: number): EffectiveLimits => ({ + submissionsPerMinute, + maxConcurrentJobs: 2, + maxSequenceLength: 4096, + sloSeconds: 30, +}) + +const freeResolver = { + resolve: () => + Promise.resolve({ plan: 'free' as const, limits: limitsFor(10) }), +} +const proResolver = { + resolve: () => + Promise.resolve({ plan: 'pro' as const, limits: limitsFor(60) }), +} function makeAuth( session: { user: { id: string; email: string; role?: string } } | null, diff --git a/services/api-gateway/src/middleware/auth/authenticate.ts b/services/api-gateway/src/middleware/auth/authenticate.ts index f928f92..32d5cf8 100644 --- a/services/api-gateway/src/middleware/auth/authenticate.ts +++ b/services/api-gateway/src/middleware/auth/authenticate.ts @@ -47,13 +47,14 @@ export function createAuthenticateMiddleware({ if (!user) { return c.json({ error: 'Unauthorized', code: 'UNAUTHORIZED' }, 401) } - const plan = await resolver.resolve(user.id, user.email) + const { plan, limits } = await resolver.resolve(user.id, user.email) const role = user.role === 'admin' || user.role === 'user' ? user.role : undefined c.set('auth', { sub: user.id, email: user.email, plan, + limits, method: 'api-key', ...(role ? { role } : {}), }) @@ -65,12 +66,16 @@ export function createAuthenticateMiddleware({ if (!session?.user) { return c.json({ error: 'Unauthorized', code: 'UNAUTHORIZED' }, 401) } - const plan = await resolver.resolve(session.user.id, session.user.email) + const { plan, limits } = await resolver.resolve( + session.user.id, + session.user.email, + ) const role = (session.user as { role?: string }).role c.set('auth', { sub: session.user.id, email: session.user.email, plan, + limits, method: 'session', ...(role === 'admin' || role === 'user' ? { role } : {}), }) diff --git a/services/api-gateway/src/middleware/rate-limit.test.ts b/services/api-gateway/src/middleware/rate-limit.test.ts index 64ac98f..5ff349e 100644 --- a/services/api-gateway/src/middleware/rate-limit.test.ts +++ b/services/api-gateway/src/middleware/rate-limit.test.ts @@ -77,10 +77,23 @@ beforeEach(async () => { function makeAuthApp( plan: AuthContext['plan'], rateLimiter: ReturnType, + submissionsPerMinute?: number, ) { + const spm = submissionsPerMinute ?? PLAN_LIMITS[plan].submissionsPerMinute const app = new OpenAPIHono<{ Variables: Variables }>() app.use('*', (c, next) => { - c.set('auth', { sub: 'user-001', email: 'u@test.com', plan }) + c.set('auth', { + sub: 'user-001', + email: 'u@test.com', + plan, + limits: { + submissionsPerMinute: spm, + maxConcurrentJobs: 2, + maxSequenceLength: 4096, + sloSeconds: 30, + }, + method: 'session', + }) return next() }) app.use('*', rateLimiter) @@ -129,13 +142,11 @@ describe('createSubmissionRateLimiter', () => { ) }) - it('honors a configured per-plan submissions ceiling', async () => { + it('honors the resolved per-account submissions ceiling', async () => { const app = makeAuthApp( 'free', - createSubmissionRateLimiter({ - connection, - submissionsPerMinute: { free: 999, pro: 60, enterprise: 300 }, - }), + createSubmissionRateLimiter({ connection }), + 999, ) const res = await app.request('/test') expect(res.headers.get('ratelimit-policy')).toContain('999') diff --git a/services/api-gateway/src/middleware/rate-limit.ts b/services/api-gateway/src/middleware/rate-limit.ts index 197a984..b435330 100644 --- a/services/api-gateway/src/middleware/rate-limit.ts +++ b/services/api-gateway/src/middleware/rate-limit.ts @@ -1,5 +1,4 @@ -import type { Plan, Redis } from '@protifer/shared' -import { PLAN_LIMITS } from '@protifer/shared' +import type { Redis } from '@protifer/shared' import type { Store } from 'hono-rate-limiter' import { rateLimiter } from 'hono-rate-limiter' import { RedisStore } from 'rate-limit-redis' @@ -36,23 +35,10 @@ export interface RateLimitDeps { connection: Redis } -export interface SubmissionRateLimitDeps extends RateLimitDeps { - /** Per-plan submissions/min ceiling. Defaults to PLAN_LIMITS. */ - submissionsPerMinute?: Record -} - -export function createSubmissionRateLimiter({ - connection, - submissionsPerMinute, -}: SubmissionRateLimitDeps) { +export function createSubmissionRateLimiter({ connection }: RateLimitDeps) { return rateLimiter<{ Variables: Variables }>({ windowMs: 60 * 1000, - limit: (c) => { - const plan = c.get('auth').plan - return ( - submissionsPerMinute?.[plan] ?? PLAN_LIMITS[plan].submissionsPerMinute - ) - }, + limit: (c) => c.get('auth').limits.submissionsPerMinute, keyGenerator: (c) => c.get('auth').sub, store: makeRedisStore(connection, 'rl:submit:'), standardHeaders: 'draft-7', diff --git a/services/api-gateway/src/middleware/shedding.test.ts b/services/api-gateway/src/middleware/shedding.test.ts index 5ef192c..7bced0d 100644 --- a/services/api-gateway/src/middleware/shedding.test.ts +++ b/services/api-gateway/src/middleware/shedding.test.ts @@ -83,7 +83,18 @@ function makeApp({ const app = new Hono<{ Variables: Variables }>() if (auth) { app.use('*', async (c, next) => { - c.set('auth', { sub: 'u1', email: 'u1@x.com', plan }) + c.set('auth', { + sub: 'u1', + email: 'u1@x.com', + plan, + limits: { + submissionsPerMinute: 10, + maxConcurrentJobs: 2, + maxSequenceLength: 4096, + sloSeconds: config.sloSeconds[plan], + }, + method: 'session', + }) await next() }) } @@ -276,7 +287,18 @@ describe('createSheddingMiddleware', () => { }) const app = new Hono<{ Variables: Variables }>() app.use('*', async (c, next) => { - c.set('auth', { sub: 'u1', email: 'u1@x.com', plan: 'free' }) + c.set('auth', { + sub: 'u1', + email: 'u1@x.com', + plan: 'free', + limits: { + submissionsPerMinute: 10, + maxConcurrentJobs: 2, + maxSequenceLength: 4096, + sloSeconds: config.sloSeconds.free, + }, + method: 'session', + }) await next() }) app.use('*', middleware) diff --git a/services/api-gateway/src/middleware/shedding.ts b/services/api-gateway/src/middleware/shedding.ts index 9073f6a..f75ff1d 100644 --- a/services/api-gateway/src/middleware/shedding.ts +++ b/services/api-gateway/src/middleware/shedding.ts @@ -132,6 +132,7 @@ export function createSheddingMiddleware(deps: SheddingMiddlewareDeps) { state: snapshot, config, plan: auth.plan, + sloSeconds: auth.limits.sloSeconds, sequenceResidues: residues, nowMs: now(), }) diff --git a/services/api-gateway/src/plan/db-resolver.test.ts b/services/api-gateway/src/plan/db-resolver.test.ts index 48c8c41..097c332 100644 --- a/services/api-gateway/src/plan/db-resolver.test.ts +++ b/services/api-gateway/src/plan/db-resolver.test.ts @@ -1,38 +1,85 @@ +import type { EffectiveLimits, Plan } from '@protifer/shared' import { describe, it, expect, vi } from 'vitest' import { DbPlanResolver } from './db-resolver.ts' +const classDefaults: Record = { + free: { + submissionsPerMinute: 10, + maxConcurrentJobs: 2, + maxSequenceLength: 4096, + sloSeconds: 30, + }, + pro: { + submissionsPerMinute: 60, + maxConcurrentJobs: 10, + maxSequenceLength: 4096, + sloSeconds: 120, + }, + enterprise: { + submissionsPerMinute: 300, + maxConcurrentJobs: 50, + maxSequenceLength: 4096, + sloSeconds: 0, + }, +} + describe('DbPlanResolver', () => { - it('returns the user.plan field verbatim when recognised', async () => { - const getUser = vi - .fn() - .mockResolvedValue({ id: 'u1', plan: 'pro' as const }) - const r = new DbPlanResolver({ getUser }) - await expect(r.resolve('u1', 'a@b.c')).resolves.toBe('pro') + it('returns plan + class defaults when no override is present', async () => { + const getUser = vi.fn().mockResolvedValue({ id: 'u1', plan: 'pro' }) + const r = new DbPlanResolver({ getUser, classDefaults }) + await expect(r.resolve('u1', 'a@b.c')).resolves.toEqual({ + plan: 'pro', + limits: classDefaults.pro, + }) expect(getUser).toHaveBeenCalledWith('u1') }) - it('defaults to free when the plan field is missing', async () => { - const getUser = vi.fn().mockResolvedValue({ id: 'u1' }) - const r = new DbPlanResolver({ getUser }) - await expect(r.resolve('u1', 'a@b.c')).resolves.toBe('free') + it('merges a partial override over class defaults', async () => { + const getUser = vi.fn().mockResolvedValue({ + id: 'u1', + plan: 'enterprise', + limits: { maxConcurrentJobs: 25, submissionsPerMinute: 1000 }, + }) + const r = new DbPlanResolver({ getUser, classDefaults }) + await expect(r.resolve('u1', 'a@b.c')).resolves.toEqual({ + plan: 'enterprise', + limits: { + ...classDefaults.enterprise, + maxConcurrentJobs: 25, + submissionsPerMinute: 1000, + }, + }) }) - it('defaults to free when the user row is missing entirely', async () => { - const getUser = vi.fn().mockResolvedValue(null) - const r = new DbPlanResolver({ getUser }) - await expect(r.resolve('u1', 'a@b.c')).resolves.toBe('free') + it('falls back to class defaults when the override is invalid', async () => { + const getUser = vi.fn().mockResolvedValue({ + id: 'u1', + plan: 'pro', + limits: { maxConcurrentJobs: -5, bogus: 1 }, + }) + const r = new DbPlanResolver({ getUser, classDefaults }) + await expect(r.resolve('u1', 'a@b.c')).resolves.toEqual({ + plan: 'pro', + limits: classDefaults.pro, + }) }) - it('defaults to free when the stored value is unrecognised', async () => { + it('defaults to free + class defaults when the plan is unrecognised', async () => { const getUser = vi.fn().mockResolvedValue({ id: 'u1', plan: 'gold' }) - const r = new DbPlanResolver({ getUser }) - await expect(r.resolve('u1', 'a@b.c')).resolves.toBe('free') + const r = new DbPlanResolver({ getUser, classDefaults }) + await expect(r.resolve('u1', 'a@b.c')).resolves.toEqual({ + plan: 'free', + limits: classDefaults.free, + }) }) - it('defaults to free when getUser throws', async () => { + it('defaults to free + class defaults when getUser throws', async () => { const getUser = vi.fn().mockRejectedValue(new Error('db down')) - const r = new DbPlanResolver({ getUser }) - await expect(r.resolve('u1', 'a@b.c')).resolves.toBe('free') + const r = new DbPlanResolver({ getUser, classDefaults }) + await expect(r.resolve('u1', 'a@b.c')).resolves.toEqual({ + plan: 'free', + limits: classDefaults.free, + }) }) }) diff --git a/services/api-gateway/src/plan/db-resolver.ts b/services/api-gateway/src/plan/db-resolver.ts index f299028..7ae5c79 100644 --- a/services/api-gateway/src/plan/db-resolver.ts +++ b/services/api-gateway/src/plan/db-resolver.ts @@ -1,17 +1,28 @@ -import type { Logger, Plan, PlanResolver } from '@protifer/shared' +import type { + EffectiveLimits, + LimitsOverride, + Logger, + Plan, + PlanResolver, + ResolvedAccount, +} from '@protifer/shared' +import { OverrideLimitsSchema, mergeLimits } from '@protifer/shared' const VALID_PLANS: readonly Plan[] = ['free', 'pro', 'enterprise'] export interface DbPlanResolverDeps { - getUser: (userId: string) => Promise<{ plan?: string } | null> + getUser: ( + userId: string, + ) => Promise<{ plan?: string; limits?: unknown } | null> + classDefaults: Record logger?: Logger } export class DbPlanResolver implements PlanResolver { constructor(private readonly deps: DbPlanResolverDeps) {} - async resolve(userId: string): Promise { - let user: { plan?: string } | null + async resolve(userId: string): Promise { + let user: { plan?: string; limits?: unknown } | null try { user = await this.deps.getUser(userId) } catch (err) { @@ -19,12 +30,31 @@ export class DbPlanResolver implements PlanResolver { { err, userId }, 'DbPlanResolver: getUser failed, defaulting to free', ) - return 'free' + return { plan: 'free', limits: this.deps.classDefaults.free } } + const raw = user?.plan - if (raw && (VALID_PLANS as readonly string[]).includes(raw)) { - return raw as Plan + const plan: Plan = + raw && (VALID_PLANS as readonly string[]).includes(raw) + ? (raw as Plan) + : 'free' + + let override: LimitsOverride | undefined + if (user?.limits != null) { + const parsed = OverrideLimitsSchema.safeParse(user.limits) + if (parsed.success) { + override = parsed.data + } else { + this.deps.logger?.warn( + { userId, issues: parsed.error.issues }, + 'DbPlanResolver: invalid limits override, using class defaults', + ) + } + } + + return { + plan, + limits: mergeLimits(this.deps.classDefaults[plan], override), } - return 'free' } } diff --git a/services/api-gateway/src/routes/_utils.ts b/services/api-gateway/src/routes/_utils.ts index cabf9d8..0c6d600 100644 --- a/services/api-gateway/src/routes/_utils.ts +++ b/services/api-gateway/src/routes/_utils.ts @@ -1,5 +1,4 @@ -import { PLAN_LIMITS } from '@protifer/shared' -import type { Plan } from '@protifer/shared' +import type { EffectiveLimits } from '@protifer/shared' import type { Context } from 'hono' import { ACTIVE_JOBS_KEY } from '../cleanup.ts' @@ -17,10 +16,10 @@ export const DEFAULT_TIMEOUT_MS = 2_000 export async function withinConcurrentJobLimit( c: Context, redis: RedisCommands, - auth: { sub: string; plan: Plan }, + auth: { sub: string; limits: EffectiveLimits }, ): Promise { const concurrentCount = await redis.zcard(ACTIVE_JOBS_KEY(auth.sub)) - const { maxConcurrentJobs } = PLAN_LIMITS[auth.plan] + const { maxConcurrentJobs } = auth.limits if (concurrentCount >= maxConcurrentJobs) { c.header('X-RateLimit-Concurrent', String(concurrentCount)) return false diff --git a/services/api-gateway/src/routes/embeddings.test.ts b/services/api-gateway/src/routes/embeddings.test.ts index 9dcbc4b..9f4bdf6 100644 --- a/services/api-gateway/src/routes/embeddings.test.ts +++ b/services/api-gateway/src/routes/embeddings.test.ts @@ -11,7 +11,17 @@ import { createAuthenticateMiddleware } from '../middleware/auth/index.ts' import type { RedisCommands } from '../queue.ts' import type { Variables } from '../types/hono.ts' -const proResolver: PlanResolver = { resolve: vi.fn().mockResolvedValue('pro') } +const proResolver: PlanResolver = { + resolve: vi.fn().mockResolvedValue({ + plan: 'pro' as const, + limits: { + submissionsPerMinute: 60, + maxConcurrentJobs: 10, + maxSequenceLength: 4096, + sloSeconds: 120, + }, + }), +} const mockAuth = { api: { diff --git a/services/api-gateway/src/routes/embeddings.ts b/services/api-gateway/src/routes/embeddings.ts index 1f5b9de..6f49103 100644 --- a/services/api-gateway/src/routes/embeddings.ts +++ b/services/api-gateway/src/routes/embeddings.ts @@ -112,6 +112,15 @@ export function createEmbeddingsRouter( const { sequence, accession } = c.req.valid('json') const { embeddingModel } = suite const auth = c.get('auth') + if (sequence.length > auth.limits.maxSequenceLength) { + return c.json( + { + error: `sequence must be at most ${String(auth.limits.maxSequenceLength)} residues`, + code: 'VALIDATION_ERROR', + }, + 400, + ) + } const sequenceHash = computeSequenceHash(sequence) const jobId = computeEmbeddingJobId(sequence, embeddingModel) const statusUrl = `/v1/embeddings/${jobId}` diff --git a/services/api-gateway/src/routes/predictions.test.ts b/services/api-gateway/src/routes/predictions.test.ts index 055af71..116155f 100644 --- a/services/api-gateway/src/routes/predictions.test.ts +++ b/services/api-gateway/src/routes/predictions.test.ts @@ -12,7 +12,18 @@ import type { RedisCommands } from '../queue.ts' import { PredictionPollResponseSchema } from '../schemas/predictions.ts' import type { Variables } from '../types/hono.ts' -const proResolver: PlanResolver = { resolve: vi.fn().mockResolvedValue('pro') } +const proAccount = { + plan: 'pro' as const, + limits: { + submissionsPerMinute: 60, + maxConcurrentJobs: 10, + maxSequenceLength: 4096, + sloSeconds: 120, + }, +} +const proResolver: PlanResolver = { + resolve: vi.fn().mockResolvedValue(proAccount), +} const mockAuth = { api: { @@ -204,7 +215,7 @@ describe('GET /v1/predictions/:jobId', () => { session: {}, user: { id: 'user-001', email: 'user@example.com' }, }) - proResolver.resolve = vi.fn().mockResolvedValue('pro') + proResolver.resolve = vi.fn().mockResolvedValue(proAccount) }) it('returns 404 when job does not exist', async () => { diff --git a/services/api-gateway/src/routes/predictions.ts b/services/api-gateway/src/routes/predictions.ts index 629e8d7..22d41ab 100644 --- a/services/api-gateway/src/routes/predictions.ts +++ b/services/api-gateway/src/routes/predictions.ts @@ -115,6 +115,15 @@ export function createPredictionsRouter( const { sequence, accession } = c.req.valid('json') const { embeddingModel, predictionModels } = suite const auth = c.get('auth') + if (sequence.length > auth.limits.maxSequenceLength) { + return c.json( + { + error: `sequence must be at most ${String(auth.limits.maxSequenceLength)} residues`, + code: 'VALIDATION_ERROR', + }, + 400, + ) + } const sequenceHash = computeSequenceHash(sequence) const embJobId = computeEmbeddingJobId(sequence, embeddingModel) const predJobId = computePredictionJobId( diff --git a/services/api-gateway/src/schemas/common.ts b/services/api-gateway/src/schemas/common.ts index 56d4fbb..624254e 100644 --- a/services/api-gateway/src/schemas/common.ts +++ b/services/api-gateway/src/schemas/common.ts @@ -1,14 +1,19 @@ import { z } from '@hono/zod-openapi' -import { MAX_SEQUENCE_LENGTH } from '@protifer/shared' +import { MAX_SEQUENCE_LENGTH_CAP } from '@protifer/shared' -/** Shared submit body (sequence + optional accession). Domains attach their own `.openapi()` label. */ +/** + * Shared submit body (sequence + optional accession). The static `.max()` is + * the absolute hard ceiling; the per-account resolved `maxSequenceLength` is + * enforced per request in the route handlers. Domains attach their own + * `.openapi()` label. + */ export const SubmitBodySchema = z.object({ sequence: z .string() .min(1, 'sequence is required and must be non-empty') .max( - MAX_SEQUENCE_LENGTH, - `sequence must be at most ${String(MAX_SEQUENCE_LENGTH)} residues`, + MAX_SEQUENCE_LENGTH_CAP, + `sequence must be at most ${String(MAX_SEQUENCE_LENGTH_CAP)} residues`, ) .openapi({ example: 'MKTVRQERLK' }), accession: z.string().optional().openapi({ diff --git a/services/api-gateway/src/schemas/schemas.test.ts b/services/api-gateway/src/schemas/schemas.test.ts index 5fda107..f5b5783 100644 --- a/services/api-gateway/src/schemas/schemas.test.ts +++ b/services/api-gateway/src/schemas/schemas.test.ts @@ -1,4 +1,4 @@ -import { MAX_SEQUENCE_LENGTH } from '@protifer/shared' +import { MAX_SEQUENCE_LENGTH_CAP } from '@protifer/shared' import { describe, it, expect } from 'vitest' import { @@ -27,14 +27,14 @@ describe('PredictionSubmitBodySchema', () => { it('accepts a sequence at the max length', () => { expect( PredictionSubmitBodySchema.safeParse({ - sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH), + sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH_CAP), }).success, ).toBe(true) }) it('rejects a sequence longer than the max length', () => { expect( PredictionSubmitBodySchema.safeParse({ - sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH + 1), + sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH_CAP + 1), }).success, ).toBe(false) }) @@ -54,14 +54,14 @@ describe('EmbeddingSubmitBodySchema', () => { it('accepts a sequence at the max length', () => { expect( EmbeddingSubmitBodySchema.safeParse({ - sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH), + sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH_CAP), }).success, ).toBe(true) }) it('rejects a sequence longer than the max length', () => { expect( EmbeddingSubmitBodySchema.safeParse({ - sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH + 1), + sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH_CAP + 1), }).success, ).toBe(false) }) diff --git a/services/api-gateway/src/shedding/decide.ts b/services/api-gateway/src/shedding/decide.ts index dcc6cff..f17e501 100644 --- a/services/api-gateway/src/shedding/decide.ts +++ b/services/api-gateway/src/shedding/decide.ts @@ -15,6 +15,8 @@ export interface DecideInput { state: SheddingStateSnapshot config: SheddingConfig plan: Plan + /** Account's resolved SLO seconds; falls back to `config.sloSeconds[plan]`. */ + sloSeconds?: number sequenceResidues: number nowMs: number jitter?: () => number @@ -79,7 +81,7 @@ export function decideAdmission(input: DecideInput): AdmissionDecision { } } - const slo = config.sloSeconds[plan] + const slo = input.sloSeconds ?? config.sloSeconds[plan] if (slo === 0) { return { admit: true, estimatedWaitSeconds } } diff --git a/tests/backend-e2e/helpers.ts b/tests/backend-e2e/helpers.ts index 66212b1..afb7de7 100644 --- a/tests/backend-e2e/helpers.ts +++ b/tests/backend-e2e/helpers.ts @@ -67,6 +67,16 @@ export async function deleteTestUser(userId: string): Promise { await pool.query('DELETE FROM "user" WHERE id = $1', [userId]) } +export async function setUserLimits( + userId: string, + limits: Record | null, +): Promise { + await pool.query('UPDATE "user" SET limits = $1::jsonb WHERE id = $2', [ + limits === null ? null : JSON.stringify(limits), + userId, + ]) +} + export async function apiRequest( method: string, path: string, diff --git a/tests/backend-e2e/limits-override.test.ts b/tests/backend-e2e/limits-override.test.ts new file mode 100644 index 0000000..bd24c48 --- /dev/null +++ b/tests/backend-e2e/limits-override.test.ts @@ -0,0 +1,55 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest' + +import { + apiRequest, + cleanQueues, + createTestUser, + deleteTestUser, + setUserLimits, + type TestUser, +} from './helpers' + +const SEQUENCE = 'MKTVRQERLKSIVRILERSKEPVSGAQL' // 28 residues + +describe('Per-account limit overrides E2E', () => { + const created: string[] = [] + + beforeEach(async () => { + await cleanQueues() + }) + + afterEach(async () => { + for (const id of created.splice(0)) await deleteTestUser(id) + }) + + async function freeUser(): Promise { + const u = await createTestUser('free') + created.push(u.userId) + return u + } + + it('enforces a per-account maxSequenceLength override below the plan default', async () => { + const u = await freeUser() + await setUserLimits(u.userId, { maxSequenceLength: 10 }) + + const res = await apiRequest('POST', '/v1/embeddings', { + body: { sequence: SEQUENCE }, + bearer: u.key, + }) + + expect(res.status).toBe(400) + const body = (await res.json()) as { code?: string } + expect(body.code).toBe('VALIDATION_ERROR') + }) + + it('admits the same sequence for an account on the plan default', async () => { + const u = await freeUser() + + const res = await apiRequest('POST', '/v1/embeddings', { + body: { sequence: SEQUENCE }, + bearer: u.key, + }) + + expect(res.status).toBe(202) + }) +})