From 64a2a61256825e5b77e44822e2e61f3b1742669d Mon Sep 17 00:00:00 2001
From: Tobias O <tobias.olenyi@tum.de>
Date: Fri, 19 Jun 2026 11:51:44 +0200
Subject: [PATCH] feat(plan): per-account limit overrides resolved at the auth
 seam
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split the `plan` enum into a scheduling class (priority + SLO bucket)
and per-account numeric quota. `PlanResolver.resolve` now returns
`ResolvedAccount { plan, limits }`, merging a sparse `user.limits jsonb`
override over plan-class defaults composed from existing config sources.

- shared: `EffectiveLimits`/`ResolvedAccount`, `OverrideLimitsSchema`
  (strict, capped positive ints), `mergeLimits`, `MAX_SEQUENCE_LENGTH_CAP`
- gateway: `DbPlanResolver` reads `plan, limits` and parses defensively
  (parse failure → class defaults); `AuthContext` carries `limits`
- consumers read `auth.limits.*`: submissions/min, concurrency,
  per-request maxSequenceLength, and shedding's per-account sloSeconds
- additive nullable `user.limits` migration + dev seed example
- admin-only `PUT/DELETE /admin/accounts/{userId}/limits` (full-replace /
  clear), audit-logged, behind the existing admin gate
- backend E2E proving an override enforces limits differing from the plan

Archive the completed change; create the canonical `plan-limits` spec.
Fold the prod shedding-validation details (enforce toggle, configurability
round-trip, per-account sloSeconds knob) into `simplify-load-testing`.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                                     |   3 +-
 infra/postgres/seeds/dev-users-plan.sql       |   5 +
 .../.openspec.yaml                            |   2 +
 .../design.md                                 |  77 +++++++++++++
 .../proposal.md                               |  31 ++++++
 .../specs/plan-limits/spec.md                 |  69 ++++++++++++
 .../tasks.md                                  |  40 +++++++
 .../changes/simplify-load-testing/design.md   |   9 +-
 .../specs/load-testing/spec.md                |  27 ++++-
 .../changes/simplify-load-testing/tasks.md    |   8 +-
 openspec/specs/plan-limits/spec.md            |  75 +++++++++++++
 packages/shared/src/index.ts                  |  11 +-
 packages/shared/src/plan.test.ts              |  55 ++++++++++
 packages/shared/src/plan.ts                   |  60 ++++++++++-
 packages/shared/src/types.ts                  |   3 +
 services/api-gateway/openapi.v1.json          |   4 +-
 services/api-gateway/scripts/migrate.ts       |  14 +++
 services/api-gateway/src/admin/limits.test.ts | 102 ++++++++++++++++++
 services/api-gateway/src/admin/limits.ts      |  67 ++++++++++++
 services/api-gateway/src/app.ts               |  59 ++++++++--
 .../src/contracts/job-contracts.test.ts       |  12 ++-
 .../src/middleware/auth/auth.test.ts          |  18 +++-
 .../src/middleware/auth/authenticate.ts       |   9 +-
 .../src/middleware/rate-limit.test.ts         |  23 ++--
 .../api-gateway/src/middleware/rate-limit.ts  |  20 +---
 .../src/middleware/shedding.test.ts           |  26 ++++-
 .../api-gateway/src/middleware/shedding.ts    |   1 +
 .../api-gateway/src/plan/db-resolver.test.ts  |  87 +++++++++++----
 services/api-gateway/src/plan/db-resolver.ts  |  46 ++++++--
 services/api-gateway/src/routes/_utils.ts     |   7 +-
 .../api-gateway/src/routes/embeddings.test.ts |  12 ++-
 services/api-gateway/src/routes/embeddings.ts |   9 ++
 .../src/routes/predictions.test.ts            |  15 ++-
 .../api-gateway/src/routes/predictions.ts     |   9 ++
 services/api-gateway/src/schemas/common.ts    |  13 ++-
 .../api-gateway/src/schemas/schemas.test.ts   |  10 +-
 services/api-gateway/src/shedding/decide.ts   |   4 +-
 tests/backend-e2e/helpers.ts                  |  10 ++
 tests/backend-e2e/limits-override.test.ts     |  55 ++++++++++
 39 files changed, 1012 insertions(+), 95 deletions(-)
 create mode 100644 openspec/changes/archive/2026-06-19-per-account-limit-overrides/.openspec.yaml
 create mode 100644 openspec/changes/archive/2026-06-19-per-account-limit-overrides/design.md
 create mode 100644 openspec/changes/archive/2026-06-19-per-account-limit-overrides/proposal.md
 create mode 100644 openspec/changes/archive/2026-06-19-per-account-limit-overrides/specs/plan-limits/spec.md
 create mode 100644 openspec/changes/archive/2026-06-19-per-account-limit-overrides/tasks.md
 create mode 100644 openspec/specs/plan-limits/spec.md
 create mode 100644 services/api-gateway/src/admin/limits.test.ts
 create mode 100644 services/api-gateway/src/admin/limits.ts
 create mode 100644 tests/backend-e2e/limits-override.test.ts

diff --git a/CLAUDE.md b/CLAUDE.md
index 3b423e2..3a4a96e 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -79,7 +79,8 @@ Workspaces: `packages/*`, `services/*`, `apps/*`, `infra/*`, `tests/*`, `scripts
 
 - better-auth + GitHub OAuth. Middleware enriches Hono context with the user; frontend `AuthProvider` (`apps/web/src/features/auth/context.tsx`) mirrors session state and sends `credentials: 'include'`.
 - Rate limits (submission + poll tiers) defined in `services/api-gateway/src/middleware/rate-limit.ts`; Redis-backed via `hono-rate-limiter`.
-- Per-plan quotas in `packages/shared/src/plan.ts`.
+- `plan` is the scheduling **class** (queue priority + SLO bucket); numeric **quota** is resolved separately. `PlanResolver.resolve` returns `ResolvedAccount = { plan, limits }` where `limits` is `EffectiveLimits` (`submissionsPerMinute`, `maxConcurrentJobs`, `maxSequenceLength`, `sloSeconds`) — class defaults merged with a per-account override from the `user.limits jsonb` column (`mergeLimits` + `OverrideLimitsSchema` in `packages/shared/src/plan.ts`; absent/invalid → class default). Consumers read `auth.limits.*` off the auth context, never `PLAN_LIMITS[plan]` at the call site.
+- Overrides are admin-only: `PUT/DELETE /admin/accounts/{userId}/limits` (`services/api-gateway/src/admin/limits.ts`); never user-editable. PUT is full-replace; DELETE clears to class defaults.
 
 ## Errors
 
diff --git a/infra/postgres/seeds/dev-users-plan.sql b/infra/postgres/seeds/dev-users-plan.sql
index c259db4..8faace9 100644
--- a/infra/postgres/seeds/dev-users-plan.sql
+++ b/infra/postgres/seeds/dev-users-plan.sql
@@ -1,3 +1,8 @@
 -- Seed dev-only user plan overrides. Safe to re-run.
 UPDATE "user" SET plan = 'pro'  WHERE email = 'dev-pro@example.com';
 UPDATE "user" SET plan = 'free' WHERE email = 'dev-free@example.com';
+
+-- Example per-account limit override: a pro account with raised quotas.
+UPDATE "user"
+SET limits = '{"submissionsPerMinute": 120, "maxConcurrentJobs": 25}'::jsonb
+WHERE email = 'dev-pro@example.com';
diff --git a/openspec/changes/archive/2026-06-19-per-account-limit-overrides/.openspec.yaml b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/.openspec.yaml
new file mode 100644
index 0000000..95ae5a2
--- /dev/null
+++ b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/.openspec.yaml
@@ -0,0 +1,2 @@
+schema: spec-driven
+created: 2026-06-18
diff --git a/openspec/changes/archive/2026-06-19-per-account-limit-overrides/design.md b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/design.md
new file mode 100644
index 0000000..6df2de4
--- /dev/null
+++ b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/design.md
@@ -0,0 +1,77 @@
+## Context
+
+`DbPlanResolver.resolve(userId)` returns a bare `Plan` enum read from `SELECT plan FROM "user"`. Every quota decision then does its own `PLAN_LIMITS[plan]` lookup:
+
+- `middleware/rate-limit.ts` → `submissionsPerMinute`
+- `routes/_utils.ts` (`withinConcurrentJobLimit`) → `maxConcurrentJobs`
+- submission validation → `MAX_SEQUENCE_LENGTH` (a single global constant, not yet per-plan)
+- shedding → `sloSeconds[plan]` from `loadSheddingConfig`
+
+Because the only DB-derived input is the class name, all accounts on a class share identical quotas. The `fix/shedding-residue-leak` branch added `RATE_LIMIT_SUBMISSIONS_*` env config, but those are global-per-class defaults (load-test driven) — they cannot express two enterprise customers with different agreements. This design introduces per-account overrides resolved at the existing resolver seam.
+
+## Goals / Non-Goals
+
+**Goals:**
+
+- A single resolution seam returns merged `EffectiveLimits`; call sites stop indexing `PLAN_LIMITS` directly for quota.
+- Per-account overrides stored in `user.limits jsonb`, sparse and partial.
+- Override envelope: `submissionsPerMinute`, `maxConcurrentJobs`, `maxSequenceLength`, `sloSeconds`.
+- Admin-only management surface; never user-editable.
+- Plan enum preserved for priority + SLO class.
+
+**Non-Goals:**
+
+- Enterprise self-serve UI or billing/entitlement-provider integration.
+- Removing or replacing the `RATE_LIMIT_SUBMISSIONS_*` env knobs (they remain the per-class defaults).
+- Per-account _priority_ or _class_ overrides — scheduling stays class-based.
+
+## Decisions
+
+### 1. Resolver returns `EffectiveLimits`, not `Plan`
+
+`PlanResolver.resolve` becomes `resolve(userId): Promise<ResolvedAccount>` where `ResolvedAccount = { plan: Plan; limits: EffectiveLimits }`. `limits` is computed as `mergeLimits(PLAN_LIMITS[plan]+slo, override)`. Call sites read `auth.limits.*`; scheduling reads `auth.plan`.
+
+_Alternative considered:_ keep returning `Plan` and add a parallel `resolveLimits`. Rejected — two lookups invite drift and a second DB round-trip; one resolved object keeps the merge in one place.
+
+### 2. Carry resolved limits on the auth context
+
+The auth context already exposes `{ sub, plan }`. Extend it to `{ sub, plan, limits }` so middleware and route utils read the pre-merged object without re-querying. The DB read already happens during plan resolution; the override travels in the same row (`SELECT plan, limits FROM "user"`), so no extra query.
+
+### 3. jsonb sparse override, validated with Zod
+
+`limits jsonb` (nullable). Stored value is a partial object; a Zod schema (`OverrideLimitsSchema`) validates positive integers per field and `.strict()` rejects unknown keys. jsonb chosen for flexibility — the agreement-shaped envelope is expected to grow and is rarely touched, so typed columns + migrations per field would be churn for little gain. The merge is `{ ...defaults, ...parsed }`; a parse failure is logged and treated as no override (fail-safe to class defaults).
+
+_Alternative considered:_ typed columns. Rejected for now — premature rigidity; revisit if the envelope stabilizes or needs DB-level constraints.
+
+### 4. `maxSequenceLength` becomes plan-aware
+
+Today `MAX_SEQUENCE_LENGTH` is one global constant. The default per-class value seeds from that constant for all classes (no behavior change by default); the override can raise it per account. Submission validation reads `auth.limits.maxSequenceLength`.
+
+### 5. SLO seconds resolved through the same object
+
+`loadSheddingConfig().sloSeconds` provides the class default. The resolver overlays a per-account `sloSeconds` when present. The shedding decision reads the account's resolved value rather than indexing the global config by plan. Default behavior unchanged when no override is set.
+
+### 6. Admin route mirrors flag overrides
+
+`PUT /admin/accounts/{userId}/limits` (validate + persist) and `DELETE` (clear → NULL), guarded by the same admin authorization as `PUT /admin/flags/<name>`. No GET on a user-facing route; an admin GET is optional and in scope for the admin surface only.
+
+**PUT is full-replace**: the request body becomes the entire stored override object (any field not present is dropped, not merged). `DELETE` sets the column NULL to revert to class defaults. Chosen over PATCH/merge for predictability — there is one obvious way to read or reset an account's override, and "clear one field" needs no null-sentinel convention. PATCH deferred unless an operator workflow demands it.
+
+## Risks / Trade-offs
+
+- **Auth-context shape change ripples to every consumer** → Centralize the type in one place (`Variables`/auth type) and let the typechecker enumerate call sites; the compiler turns the ripple into a checklist.
+- **jsonb lets malformed data reach the row out-of-band** (manual SQL, bad seed) → Resolver parses defensively and falls back to class defaults on any parse failure; never throws into the request path.
+- **Override silently widens limits beyond infra capacity** (e.g. someone sets `submissionsPerMinute: 10^6`) → Validation caps with sane `.max()` bounds; shedding remains the backstop since it operates on observed throughput, not declared limits.
+- **Drift between class default sources** (`PLAN_LIMITS`, `RATE_LIMIT_SUBMISSIONS_*` env, SLO config) → Resolver composes the class default from the same sources the gateway already loads at boot; no new default table.
+- **Stale resolved limits within a request** → Limits resolve per request at auth time (same cadence as plan today); an override change takes effect on the next request, consistent with current plan-change behavior.
+
+## Migration Plan
+
+1. Add nullable `user.limits jsonb` column (additive migration; no backfill — NULL = class defaults).
+2. Ship resolver + auth-context change; all defaults equal current behavior, so deploying with zero overrides is a no-op.
+3. Add admin route last; until it exists, overrides can only be set via seed/SQL (used for the dev seed example).
+4. **Rollback:** revert code (resolver falls back to class defaults regardless of column); the nullable column can remain unused with no effect.
+
+## Open Questions
+
+- Do we want an audit-log line on override set/clear? Likely yes (reuse the submission/admin logging idiom), but not blocking.
diff --git a/openspec/changes/archive/2026-06-19-per-account-limit-overrides/proposal.md b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/proposal.md
new file mode 100644
index 0000000..27809b1
--- /dev/null
+++ b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/proposal.md
@@ -0,0 +1,31 @@
+## Why
+
+Today `DbPlanResolver` reads only the `plan` enum name from Postgres, and every downstream consumer does a static `PLAN_LIMITS[plan]` lookup. As a result "enterprise" is just a bigger static tier — a generous pro — and two enterprise customers with different contractual limits are impossible to express. The original intent was that enterprise limits follow the per-customer agreement. This change wires that intent without turning every quota into a deployment-global env knob (which is what the load-test-motivated `RATE_LIMIT_SUBMISSIONS_*` config does, and which cannot differentiate two accounts on the same plan).
+
+## What Changes
+
+- Split the meaning carried by the `plan` enum into two concerns:
+  - **Class** — queue priority and SLO bucket. Stays an enum (`free` | `pro` | `enterprise`); a small ordered set is correct for scheduling.
+  - **Quota** — numeric entitlements (`submissionsPerMinute`, `maxConcurrentJobs`, `maxSequenceLength`, SLO seconds). Becomes per-account overridable.
+- Add a `limits jsonb` column to the Postgres `user` table holding a sparse, partial override object. Absent column / absent field = fall back to the plan default.
+- Change the plan-resolution seam: the resolver returns **EffectiveLimits** (`account.overrides ?? PLAN_LIMITS[class]`, merged field-by-field) instead of a bare `Plan` enum. The enum is still exposed for priority + SLO-class selection.
+- Update consumers to read resolved limits: `middleware/rate-limit.ts` (submissions/min), `routes/_utils.ts` (concurrency), submission validation (`MAX_SEQUENCE_LENGTH`), and shedding SLO seconds.
+- Add an **admin-only** endpoint to set/clear an account's override (mirrors the existing `PUT /admin/flags/<name>` pattern). No enterprise self-serve — overrides are never user-editable.
+
+## Capabilities
+
+### New Capabilities
+
+- `plan-limits`: Resolution of an account's effective quota limits — merging per-account DB overrides over plan-class defaults — and the admin surface for managing those overrides. Establishes the class-vs-quota boundary that scheduling (priority/SLO) and quota enforcement (rate limit/concurrency/sequence length) both consume.
+
+### Modified Capabilities
+
+<!-- request-shedding lives only in the unarchived fix-shedding-residue-leak change delta; no published spec to amend. SLO-seconds resolution is captured as a requirement in the new plan-limits capability. -->
+
+## Impact
+
+- **DB**: new `user.limits jsonb` column (nullable) + migration; dev seed (`infra/postgres/seeds/dev-users-plan.sql`) gains an example override.
+- **Code**: `packages/shared/src/plan.ts` (`PlanResolver` contract → `EffectiveLimits`, override-merge helper, Zod override schema), `services/api-gateway/src/plan/db-resolver.ts`, `middleware/rate-limit.ts`, `routes/_utils.ts`, shedding SLO lookup, submission length validation, new `admin/limits.ts` route.
+- **API**: new admin route `PUT/DELETE /admin/accounts/{userId}/limits`; no change to public submission contracts (behavior differs only by resolved numbers).
+- **Auth/PII**: override values are non-PII operational numbers; admin route reuses existing admin authorization.
+- **Out of scope**: enterprise self-serve UI, billing integration, the `RATE_LIMIT_SUBMISSIONS_*` env knobs (they remain as global-per-class defaults feeding `PLAN_LIMITS`).
diff --git a/openspec/changes/archive/2026-06-19-per-account-limit-overrides/specs/plan-limits/spec.md b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/specs/plan-limits/spec.md
new file mode 100644
index 0000000..3f6155c
--- /dev/null
+++ b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/specs/plan-limits/spec.md
@@ -0,0 +1,69 @@
+## ADDED Requirements
+
+### Requirement: Plan class versus quota separation
+
+The system SHALL treat an account's `plan` as a scheduling **class** (queue priority and SLO bucket) and SHALL resolve numeric **quota** limits separately, so that two accounts on the same class MAY enforce different quotas.
+
+The quota envelope SHALL consist of: `submissionsPerMinute`, `maxConcurrentJobs`, `maxSequenceLength`, and `sloSeconds`.
+
+#### Scenario: Class still drives scheduling
+
+- **WHEN** an account's queue priority or SLO bucket is selected
+- **THEN** the system uses the `plan` enum (`free` | `pro` | `enterprise`) and ignores any per-account quota override
+
+#### Scenario: Quota resolved independently of class
+
+- **WHEN** a quota limit is enforced for an account
+- **THEN** the system uses the resolved effective limit, not a direct `PLAN_LIMITS[plan]` lookup at the call site
+
+### Requirement: Effective limit resolution
+
+The plan resolver SHALL return an `EffectiveLimits` object computed by merging a per-account override over the plan-class default on a field-by-field basis. An absent override object, an absent field, or a resolution failure SHALL fall back to the plan-class default for that field.
+
+#### Scenario: No override present
+
+- **WHEN** an account has no `limits` override stored
+- **THEN** every effective limit equals the corresponding `PLAN_LIMITS[plan]` (and SLO default) value
+
+#### Scenario: Partial override merges field-by-field
+
+- **WHEN** an account's override sets only `maxConcurrentJobs`
+- **THEN** `maxConcurrentJobs` uses the override value and all other limits fall back to the plan-class default
+
+#### Scenario: Resolution failure is safe
+
+- **WHEN** the override store read fails or returns an unparseable value
+- **THEN** the resolver logs a warning and returns the plan-class defaults without throwing
+
+### Requirement: Override storage and validation
+
+Per-account overrides SHALL be stored in a nullable `limits jsonb` column on the Postgres `user` table as a sparse partial object. The system SHALL validate override values against a schema before applying them: each present field MUST be a positive integer (or zero where the underlying default permits zero, e.g. an SLO bucket), and unknown fields SHALL be rejected.
+
+#### Scenario: Valid sparse override accepted
+
+- **WHEN** an override `{ "submissionsPerMinute": 1000 }` is validated
+- **THEN** validation passes and only `submissionsPerMinute` is overridden
+
+#### Scenario: Invalid override rejected
+
+- **WHEN** an override contains a negative number, a non-integer, or an unknown field
+- **THEN** validation fails and the override is not persisted
+
+### Requirement: Admin-only override management
+
+The system SHALL expose admin-only endpoints to set and clear an account's limit override, mirroring the existing admin flag-override authorization. Overrides SHALL NOT be settable or viewable through any user-facing (non-admin) route.
+
+#### Scenario: Admin sets an override
+
+- **WHEN** an authenticated admin sends a valid override for an account via the admin route
+- **THEN** the override is persisted and subsequent submissions for that account enforce the merged effective limits
+
+#### Scenario: Admin clears an override
+
+- **WHEN** an admin clears an account's override
+- **THEN** the `limits` column is reset and the account reverts to plan-class defaults
+
+#### Scenario: Non-admin cannot set an override
+
+- **WHEN** a non-admin caller attempts to set or read an account override
+- **THEN** the request is rejected by admin authorization
diff --git a/openspec/changes/archive/2026-06-19-per-account-limit-overrides/tasks.md b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/tasks.md
new file mode 100644
index 0000000..d5409d1
--- /dev/null
+++ b/openspec/changes/archive/2026-06-19-per-account-limit-overrides/tasks.md
@@ -0,0 +1,40 @@
+## 1. Shared resolution layer (`packages/shared`)
+
+- [x] 1.1 Define `EffectiveLimits` type (`submissionsPerMinute`, `maxConcurrentJobs`, `maxSequenceLength`, `sloSeconds`) and `ResolvedAccount = { plan: Plan; limits: EffectiveLimits }` in `plan.ts`
+- [x] 1.2 Add `OverrideLimitsSchema` (Zod, `.strict()`, positive-int fields with sane `.max()` caps) and an exported `LimitsOverride` type
+- [x] 1.3 Add `mergeLimits(classDefaults, override)` field-by-field merge helper; absent/invalid → class default
+- [x] 1.4 Change `PlanResolver.resolve` contract to return `ResolvedAccount`; export `MAX_SEQUENCE_LENGTH` as the per-class default seed
+- [x] 1.5 Unit-test merge + schema (no override, partial override, invalid/unknown-field rejection)
+
+## 2. DB resolver + auth context (`services/api-gateway`)
+
+- [x] 2.1 Extend the user query to `SELECT plan, limits FROM "user"` and parse `limits` defensively in `DbPlanResolver`
+- [x] 2.2 Resolver composes class defaults from existing config sources (`PLAN_LIMITS`, `RATE_LIMIT_SUBMISSIONS_*`, shedding `sloSeconds`) and merges the override; parse failure logs + falls back
+- [x] 2.3 Extend the auth/`Variables` context type from `{ sub, plan }` to `{ sub, plan, limits }`; populate at resolution
+- [x] 2.4 Update `DbPlanResolver` unit tests (no override, partial override, read failure → defaults)
+
+## 3. Quota consumers read resolved limits
+
+- [x] 3.1 `middleware/rate-limit.ts`: read `auth.limits.submissionsPerMinute` instead of `PLAN_LIMITS[plan]`
+- [x] 3.2 `routes/_utils.ts` (`withinConcurrentJobLimit`): read `auth.limits.maxConcurrentJobs`
+- [x] 3.3 Submission validation: enforce `auth.limits.maxSequenceLength` per request
+- [x] 3.4 Shedding decision: use the account's resolved `sloSeconds` rather than indexing global config by plan
+- [x] 3.5 Update affected unit tests; confirm defaults reproduce current behavior with no override
+
+## 4. Persistence
+
+- [x] 4.1 Add additive migration: nullable `user.limits jsonb` column (no backfill)
+- [x] 4.2 Add an example override to `infra/postgres/seeds/dev-users-plan.sql`
+
+## 5. Admin management surface
+
+- [x] 5.1 Add `admin/limits.ts` route: `PUT /admin/accounts/{userId}/limits` (validate via `OverrideLimitsSchema`, persist) guarded by existing admin authorization
+- [x] 5.2 Add `DELETE /admin/accounts/{userId}/limits` (set column NULL → revert to class defaults)
+- [x] 5.3 Emit an audit log line on set/clear (reuse admin/submission logging idiom)
+- [x] 5.4 Add route tests: valid set, invalid rejected, clear reverts, non-admin rejected
+
+## 6. Verification
+
+- [x] 6.1 `bun run typecheck && bun run lint && bun run test` green
+- [x] 6.2 Backend E2E (`bun run test:int`) covering an account with an override enforcing different limits than its plan default
+- [x] 6.3 Update CLAUDE.md plan/auth notes if the auth-context shape change warrants it
diff --git a/openspec/changes/simplify-load-testing/design.md b/openspec/changes/simplify-load-testing/design.md
index cca9fa0..648248e 100644
--- a/openspec/changes/simplify-load-testing/design.md
+++ b/openspec/changes/simplify-load-testing/design.md
@@ -70,6 +70,13 @@ Emit a `submissions_total{route,plan}` (or equivalently-labeled) counter at the
 - **Why a counter, not just the log line:** the log line is per-event and not aggregable in the metrics pipeline; reconciliation needs a rate-able series next to `bullmq_*`.
 - **Alternative — infer submissions from `http_requests_total{route,status}`:** rejected as the primary signal. HTTP status counts conflate retries, polls, and non-submission routes; an explicit submission counter is unambiguous and plan-labeled.
 
+### Decision 7: The saturation run drives enforce mode and a per-account SLO knob
+
+Shedding defaults to shadow, so `saturate.js` MUST turn enforce on (the `shedding.enforce` flag, effective within the flag cache window, no restart) to exercise the 503 path, assert at least one shed, then revert. The shed threshold is the account's resolved `sloSeconds` (from `per-account-limit-overrides`' `user.limits`), not a static per-plan constant — so the run uses a dedicated saturation account with a tight `sloSeconds` override to trip shedding at low, deterministic load on prod rather than brute-forcing genuine over-cap throughput. This doubles as the configurability proof: flip enforce on → shed, flip off → admit.
+
+- **Alternative — assume prod already runs enforce:** rejected. Prod defaults to shadow; relying on ambient config silently turns the run into a no-op when enforce is off.
+- **Alternative — saturate by raw over-cap throughput only:** rejected for prod. Genuine saturation burns GPU and writes permanent S3; a tight per-account SLO trips the same code path far cheaper and deterministically.
+
 ## Risks / Trade-offs
 
 - **Reads metrics not yet built** → the compute/shed assertions depend on `prediction-latency-observability` and `fix-shedding-residue-leak`. Mitigation: sequence this change after them; land the admission tier + submission counter (both independent) first.
@@ -89,6 +96,6 @@ Rollback: restore `throughput.js` and the old scenario set; the submission count
 
 ## Open Questions
 
-- **Exact pro concurrent-cap value** for sizing `pipeline.js` VUs — read from env with a conservative default; pin once the plan-limits source (`packages/shared/src/plan.ts`) is confirmed during implementation.
+- **Exact pro concurrent-cap value** for sizing `pipeline.js` VUs — _Resolved:_ the cap and SLO are now per-account `EffectiveLimits` (resolved by `per-account-limit-overrides` from `user.limits` over the plan-class default), not a static `PLAN_LIMITS[plan]` constant. `pipeline.js` still reads a VU cap from env (conservative default); `saturate.js` provisions a dedicated account with a `sloSeconds` override as the saturation knob.
 - **`admission.js` structure** — one k6 script with two scenarios (free + pro via `exec`), or keep them as separate VU functions in one file? Default: one file, two scenario functions, to keep the cron a single step.
 - **Submission counter label set** — `{route,plan}` is the proposed minimum; confirm whether `outcome` (accepted/shed/rate-limited) is worth adding here or is already covered by `admitted_residues_total` + `requests_shed_total`. Default: `{route,plan}` only, lean on existing counters for outcomes.
diff --git a/openspec/changes/simplify-load-testing/specs/load-testing/spec.md b/openspec/changes/simplify-load-testing/specs/load-testing/spec.md
index 638b78b..9f02bd9 100644
--- a/openspec/changes/simplify-load-testing/specs/load-testing/spec.md
+++ b/openspec/changes/simplify-load-testing/specs/load-testing/spec.md
@@ -49,12 +49,33 @@ The end-to-end scenario SHALL submit unique sequences and poll each job to a ter
 
 ### Requirement: The saturation scenario is the only shedding/503 coverage
 
-The saturation scenario SHALL drive submissions above sustainable throughput with mixed sequence lengths, tolerate 503 responses, and read `shedding_residues_per_second` for steady-state calibration of the shedding EWMA. It SHALL be the sole scenario that exercises the 503 shed path, and SHALL run on demand only.
+The saturation scenario SHALL drive submissions above sustainable throughput with mixed sequence lengths, tolerate 503 responses during the ramp, and read `shedding_residues_per_second` for steady-state calibration of the shedding EWMA. It SHALL be the sole scenario that exercises the 503 shed path, and SHALL run on demand only.
+
+Because shedding defaults to shadow mode (`SHED_MODE=shadow`), the run SHALL ensure enforce mode is active before asserting the shed path — by setting the `shedding.enforce` flag override (effective within the flag cache window, no restart) — and SHALL revert it on completion. Under sustained over-cap load in enforce mode the run SHALL observe at least one `503` carrying `Retry-After`; a run that produces no `503` SHALL fail rather than pass, so a shadow-mode no-op is never mistaken for shedding coverage.
 
 #### Scenario: Over-cap load surfaces shedding for calibration
 
-- **WHEN** the saturation scenario sustains over-cap load
-- **THEN** 503 responses are tolerated (not counted as failures), real network errors still fail the run, and `shedding_residues_per_second` is readable for calibration during the steady-state window
+- **WHEN** the saturation scenario sustains over-cap load with enforce mode active
+- **THEN** 503 responses are tolerated (not counted as failures) and carry `Retry-After`, real network errors still fail the run, and `shedding_residues_per_second` is readable for calibration during the steady-state window
+
+#### Scenario: A run that never sheds fails
+
+- **WHEN** the saturation run completes without observing any 503 under sustained over-cap enforce-mode load
+- **THEN** the run fails, signalling either that enforce was not active or that the load did not exceed the resolved SLO
+
+### Requirement: Shedding enforcement is runtime-configurable without restart
+
+The shedding control plane SHALL be togglable at runtime: the `shedding.enabled` and `shedding.enforce` flag overrides take effect within the flag cache window with no redeploy, and an account's shed threshold SHALL be the per-account resolved `sloSeconds` (from the `user.limits` override) overlaid on the plan-class default, not a static per-plan constant. The saturation run SHALL exercise this surface: enabling enforce engages shedding and disabling it resumes admission.
+
+#### Scenario: Enforce toggle engages and disengages shedding
+
+- **WHEN** an operator flips `shedding.enforce` on and then off around a saturation run
+- **THEN** over-cap submissions shed (503) while enforce is on and resume admitting (202) within the flag cache window after it is turned off
+
+#### Scenario: Per-account SLO override sets the shed threshold
+
+- **WHEN** the saturation account carries a `sloSeconds` override in `user.limits`
+- **THEN** shedding for that account trips against the override value, allowing a deterministic saturation threshold without changing global config
 
 ### Requirement: Submission accounting is reconcilable end to end
 
diff --git a/openspec/changes/simplify-load-testing/tasks.md b/openspec/changes/simplify-load-testing/tasks.md
index ce82437..a16a135 100644
--- a/openspec/changes/simplify-load-testing/tasks.md
+++ b/openspec/changes/simplify-load-testing/tasks.md
@@ -23,7 +23,7 @@
 
 ## 5. Finalize pipeline.js (after prediction-latency-observability + fix-shedding-residue-leak)
 
-- [ ] 5.1 Remove the concurrent-cap 429 back-off path; size VUs ≤ the pro concurrent-job cap (read from env, conservative default; source the cap from `packages/shared/src/plan.ts`).
+- [ ] 5.1 Remove the concurrent-cap 429 back-off path; size VUs ≤ the pro concurrent-job cap (read from env, conservative default; the cap is now per-account resolved `EffectiveLimits` from `per-account-limit-overrides`, not a static `PLAN_LIMITS[plan]`).
 - [ ] 5.2 Treat an unexpected cap-429 as a failed check ("run mis-sized — lower VUs"), not a tolerated outcome.
 - [ ] 5.3 Trim in-script measurement to client-perceived `e2e_latency` + `submit_latency`; document that per-model latency / drain are read from `/metrics` (`triton_model_infer_duration_seconds`, `shedding_residues_per_second`, queue depth).
 - [ ] 5.4 Header documents this doubles as the `bound-prediction-fanout` tuning harness.
@@ -33,11 +33,15 @@
 - [ ] 6.1 Rename/replace `tests/load/shedding.js` → `tests/load/saturate.js`; keep over-cap ramp + 503 tolerance + mixed lengths.
 - [ ] 6.2 Update the calibration note to read the now-trustworthy `shedding_residues_per_second` aggregate drain rate.
 - [ ] 6.3 Header documents this is the ONLY 503/shedding coverage and is run on demand only (rare tier).
+- [ ] 6.4 Provision a dedicated saturation account on prod with a tight `sloSeconds` override (`user.limits`, via `PUT /admin/accounts/{userId}/limits`) so shedding trips at low, deterministic load; inject its bearer plus an admin bearer via env.
+- [ ] 6.5 In the run, enable enforce via `PUT /admin/flags/shedding.enforce` (effective ≤ flag cache window), assert ≥1 `503`+`Retry-After` under sustained over-cap load, then revert (clear the flag override + the account override); a run with zero 503s fails.
+- [ ] 6.6 Assert the configurability round-trip: with enforce off, the same over-cap load admits (202) within the flag cache window.
 
 ## 7. Verification
 
 - [ ] 7.1 `K6_SMOKE=true` syntax-validate every script (`admission.js`, `pipeline.js`, `saturate.js`).
 - [ ] 7.2 Run `admission.js` against prod: free-plan 429 fires with header, pro cached submits non-5xx, no real inference triggered.
 - [ ] 7.3 Run `pipeline.js` under the cap: jobs reach terminal state, no cap-429, and confirm `submissions_total` ≈ bullmq enqueued ≈ completed (accounting gap closed/visible).
-- [ ] 7.4 Run `saturate.js`: 503s tolerated, `shedding_residues_per_second` readable at steady state.
+- [ ] 7.4 Run `saturate.js` with enforce on: ≥1 `503`+`Retry-After` observed, 503s tolerated (not failures), `shedding_residues_per_second` readable at steady state; enforce reverted on completion.
+- [ ] 7.6 Confirm the per-account `sloSeconds` override drives the shed threshold (the saturation account sheds where a plan-default account would not at the same load).
 - [ ] 7.5 Repo gates: `bun run lint` (k6 scripts are `.js`, eslint-only), `bun run typecheck`/`test` for the gateway counter change.
diff --git a/openspec/specs/plan-limits/spec.md b/openspec/specs/plan-limits/spec.md
new file mode 100644
index 0000000..abd210d
--- /dev/null
+++ b/openspec/specs/plan-limits/spec.md
@@ -0,0 +1,75 @@
+# plan-limits Specification
+
+## Purpose
+
+TBD - created by archiving change per-account-limit-overrides. Update Purpose after archive.
+
+## Requirements
+
+### Requirement: Plan class versus quota separation
+
+The system SHALL treat an account's `plan` as a scheduling **class** (queue priority and SLO bucket) and SHALL resolve numeric **quota** limits separately, so that two accounts on the same class MAY enforce different quotas.
+
+The quota envelope SHALL consist of: `submissionsPerMinute`, `maxConcurrentJobs`, `maxSequenceLength`, and `sloSeconds`.
+
+#### Scenario: Class still drives scheduling
+
+- **WHEN** an account's queue priority or SLO bucket is selected
+- **THEN** the system uses the `plan` enum (`free` | `pro` | `enterprise`) and ignores any per-account quota override
+
+#### Scenario: Quota resolved independently of class
+
+- **WHEN** a quota limit is enforced for an account
+- **THEN** the system uses the resolved effective limit, not a direct `PLAN_LIMITS[plan]` lookup at the call site
+
+### Requirement: Effective limit resolution
+
+The plan resolver SHALL return an `EffectiveLimits` object computed by merging a per-account override over the plan-class default on a field-by-field basis. An absent override object, an absent field, or a resolution failure SHALL fall back to the plan-class default for that field.
+
+#### Scenario: No override present
+
+- **WHEN** an account has no `limits` override stored
+- **THEN** every effective limit equals the corresponding `PLAN_LIMITS[plan]` (and SLO default) value
+
+#### Scenario: Partial override merges field-by-field
+
+- **WHEN** an account's override sets only `maxConcurrentJobs`
+- **THEN** `maxConcurrentJobs` uses the override value and all other limits fall back to the plan-class default
+
+#### Scenario: Resolution failure is safe
+
+- **WHEN** the override store read fails or returns an unparseable value
+- **THEN** the resolver logs a warning and returns the plan-class defaults without throwing
+
+### Requirement: Override storage and validation
+
+Per-account overrides SHALL be stored in a nullable `limits jsonb` column on the Postgres `user` table as a sparse partial object. The system SHALL validate override values against a schema before applying them: each present field MUST be a positive integer (or zero where the underlying default permits zero, e.g. an SLO bucket), and unknown fields SHALL be rejected.
+
+#### Scenario: Valid sparse override accepted
+
+- **WHEN** an override `{ "submissionsPerMinute": 1000 }` is validated
+- **THEN** validation passes and only `submissionsPerMinute` is overridden
+
+#### Scenario: Invalid override rejected
+
+- **WHEN** an override contains a negative number, a non-integer, or an unknown field
+- **THEN** validation fails and the override is not persisted
+
+### Requirement: Admin-only override management
+
+The system SHALL expose admin-only endpoints to set and clear an account's limit override, mirroring the existing admin flag-override authorization. Overrides SHALL NOT be settable or viewable through any user-facing (non-admin) route.
+
+#### Scenario: Admin sets an override
+
+- **WHEN** an authenticated admin sends a valid override for an account via the admin route
+- **THEN** the override is persisted and subsequent submissions for that account enforce the merged effective limits
+
+#### Scenario: Admin clears an override
+
+- **WHEN** an admin clears an account's override
+- **THEN** the `limits` column is reset and the account reverts to plan-class defaults
+
+#### Scenario: Non-admin cannot set an override
+
+- **WHEN** a non-admin caller attempts to set or read an account override
+- **THEN** the request is rejected by admin authorization
diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts
index f7df4d3..7343ab5 100644
--- a/packages/shared/src/index.ts
+++ b/packages/shared/src/index.ts
@@ -37,11 +37,20 @@ export { sweepFilesystemBudget } from './storage-evict.ts'
 export {
   PLAN_LIMITS,
   MAX_SEQUENCE_LENGTH,
+  MAX_SEQUENCE_LENGTH_CAP,
+  OverrideLimitsSchema,
+  mergeLimits,
   DEFAULT_PLAN_PRIORITY,
   SHEDDING_DEFAULTS,
   loadSheddingConfig,
 } from './plan.ts'
-export type { PlanResolver, SheddingConfig } from './plan.ts'
+export type {
+  PlanResolver,
+  SheddingConfig,
+  EffectiveLimits,
+  ResolvedAccount,
+  LimitsOverride,
+} from './plan.ts'
 export * from './queue.ts'
 // sentry.ts: `_resetSentryForTests` stays internal (tested via direct path).
 export { initSentry } from './sentry.ts'
diff --git a/packages/shared/src/plan.test.ts b/packages/shared/src/plan.test.ts
index 862fa05..f550ac5 100644
--- a/packages/shared/src/plan.test.ts
+++ b/packages/shared/src/plan.test.ts
@@ -2,11 +2,16 @@ import { describe, it, expect } from 'vitest'
 
 import {
   DEFAULT_PLAN_PRIORITY,
+  MAX_SEQUENCE_LENGTH,
+  MAX_SEQUENCE_LENGTH_CAP,
+  OverrideLimitsSchema,
   PLAN_LIMITS,
   SHEDDING_DEFAULTS,
   loadSheddingConfig,
+  mergeLimits,
   parseBoolean,
 } from './plan.ts'
+import type { EffectiveLimits } from './plan.ts'
 
 describe('parseBoolean', () => {
   it('returns the fallback when the value is undefined', () => {
@@ -36,6 +41,56 @@ describe('PLAN_LIMITS', () => {
   })
 })
 
+describe('OverrideLimitsSchema', () => {
+  it('accepts a sparse override', () => {
+    const r = OverrideLimitsSchema.safeParse({ maxConcurrentJobs: 25 })
+    expect(r.success).toBe(true)
+  })
+
+  it('rejects negative, non-integer, and over-cap values', () => {
+    expect(
+      OverrideLimitsSchema.safeParse({ maxConcurrentJobs: -1 }).success,
+    ).toBe(false)
+    expect(
+      OverrideLimitsSchema.safeParse({ submissionsPerMinute: 1.5 }).success,
+    ).toBe(false)
+    expect(
+      OverrideLimitsSchema.safeParse({
+        maxSequenceLength: MAX_SEQUENCE_LENGTH_CAP + 1,
+      }).success,
+    ).toBe(false)
+  })
+
+  it('allows sloSeconds of zero', () => {
+    expect(OverrideLimitsSchema.safeParse({ sloSeconds: 0 }).success).toBe(true)
+  })
+
+  it('rejects unknown fields', () => {
+    expect(OverrideLimitsSchema.safeParse({ bogus: 1 }).success).toBe(false)
+  })
+})
+
+describe('mergeLimits', () => {
+  const defaults: EffectiveLimits = {
+    submissionsPerMinute: 10,
+    maxConcurrentJobs: 2,
+    maxSequenceLength: MAX_SEQUENCE_LENGTH,
+    sloSeconds: 30,
+  }
+
+  it('returns defaults when no override', () => {
+    expect(mergeLimits(defaults, null)).toEqual(defaults)
+    expect(mergeLimits(defaults, undefined)).toEqual(defaults)
+  })
+
+  it('merges a partial override field-by-field', () => {
+    expect(mergeLimits(defaults, { maxConcurrentJobs: 25 })).toEqual({
+      ...defaults,
+      maxConcurrentJobs: 25,
+    })
+  })
+})
+
 describe('DEFAULT_PLAN_PRIORITY', () => {
   it('enterprise drains before pro before free (lower = higher)', () => {
     expect(DEFAULT_PLAN_PRIORITY.enterprise).toBeLessThan(
diff --git a/packages/shared/src/plan.ts b/packages/shared/src/plan.ts
index 3ed9c59..86d100a 100644
--- a/packages/shared/src/plan.ts
+++ b/packages/shared/src/plan.ts
@@ -1,8 +1,22 @@
+import { z } from 'zod'
+
 import { zBooleanString } from './config.ts'
 import type { Plan } from './types.ts'
 
+export interface EffectiveLimits {
+  submissionsPerMinute: number
+  maxConcurrentJobs: number
+  maxSequenceLength: number
+  sloSeconds: number
+}
+
+export interface ResolvedAccount {
+  plan: Plan
+  limits: EffectiveLimits
+}
+
 export interface PlanResolver {
-  resolve(userId: string, email: string): Promise<Plan>
+  resolve(userId: string, email: string): Promise<ResolvedAccount>
 }
 
 export const PLAN_LIMITS: Record<
@@ -27,6 +41,50 @@ export const PLAN_LIMITS: Record<
  */
 export const MAX_SEQUENCE_LENGTH = 4096
 
+/**
+ * Absolute upper bound an account override may raise `maxSequenceLength` to.
+ * The static submit schema rejects above this; the per-account resolved limit
+ * (default `MAX_SEQUENCE_LENGTH`) is enforced per request below the cap.
+ */
+export const MAX_SEQUENCE_LENGTH_CAP = 16384
+
+/**
+ * Sparse per-account override of {@link EffectiveLimits}. Each present field
+ * replaces the plan-class default; absent fields fall back. `.strict()`
+ * rejects unknown keys; `.max()` caps guard against widening beyond infra.
+ */
+export const OverrideLimitsSchema = z
+  .object({
+    submissionsPerMinute: z.number().int().positive().max(100_000).optional(),
+    maxConcurrentJobs: z.number().int().positive().max(10_000).optional(),
+    maxSequenceLength: z
+      .number()
+      .int()
+      .positive()
+      .max(MAX_SEQUENCE_LENGTH_CAP)
+      .optional(),
+    sloSeconds: z.number().int().min(0).max(86_400).optional(),
+  })
+  .strict()
+
+export type LimitsOverride = z.infer<typeof OverrideLimitsSchema>
+
+export function mergeLimits(
+  classDefaults: EffectiveLimits,
+  override?: LimitsOverride | null,
+): EffectiveLimits {
+  if (!override) return { ...classDefaults }
+  return {
+    submissionsPerMinute:
+      override.submissionsPerMinute ?? classDefaults.submissionsPerMinute,
+    maxConcurrentJobs:
+      override.maxConcurrentJobs ?? classDefaults.maxConcurrentJobs,
+    maxSequenceLength:
+      override.maxSequenceLength ?? classDefaults.maxSequenceLength,
+    sloSeconds: override.sloSeconds ?? classDefaults.sloSeconds,
+  }
+}
+
 /**
  * BullMQ job priority per plan — lower integer = higher priority
  * (drained first). Admitted jobs carry this on both the parent prediction
diff --git a/packages/shared/src/types.ts b/packages/shared/src/types.ts
index 3f79eba..d4eb450 100644
--- a/packages/shared/src/types.ts
+++ b/packages/shared/src/types.ts
@@ -1,11 +1,14 @@
 import { z } from 'zod'
 
+import type { EffectiveLimits } from './plan.ts'
+
 export type Plan = 'free' | 'pro' | 'enterprise'
 
 export interface AuthContext {
   sub: string
   email: string
   plan: Plan
+  limits: EffectiveLimits
   method: 'api-key' | 'session'
   role?: 'admin' | 'user'
 }
diff --git a/services/api-gateway/openapi.v1.json b/services/api-gateway/openapi.v1.json
index bf01920..10941f0 100644
--- a/services/api-gateway/openapi.v1.json
+++ b/services/api-gateway/openapi.v1.json
@@ -33,7 +33,7 @@
           "sequence": {
             "type": "string",
             "minLength": 1,
-            "maxLength": 4096,
+            "maxLength": 16384,
             "example": "MKTVRQERLK"
           },
           "accession": {
@@ -210,7 +210,7 @@
           "sequence": {
             "type": "string",
             "minLength": 1,
-            "maxLength": 4096,
+            "maxLength": 16384,
             "example": "MKTVRQERLK"
           },
           "accession": {
diff --git a/services/api-gateway/scripts/migrate.ts b/services/api-gateway/scripts/migrate.ts
index 7c01d03..75e4553 100644
--- a/services/api-gateway/scripts/migrate.ts
+++ b/services/api-gateway/scripts/migrate.ts
@@ -62,6 +62,20 @@ try {
     END $$;
   `)
   console.log('[migrate] Phase 22: ensured users.role column (D-10).')
+
+  // Per-account quota override (nullable jsonb, no backfill — NULL = plan
+  // class defaults). Idempotent via pg_attribute existence check.
+  await pool.query(`
+    DO $$ BEGIN
+      IF NOT EXISTS (
+        SELECT 1 FROM pg_attribute
+        WHERE attrelid = '"user"'::regclass AND attname = 'limits' AND NOT attisdropped
+      ) THEN
+        ALTER TABLE "user" ADD COLUMN limits jsonb;
+      END IF;
+    END $$;
+  `)
+  console.log('[migrate] ensured user.limits column.')
 } finally {
   await pool.end()
 }
diff --git a/services/api-gateway/src/admin/limits.test.ts b/services/api-gateway/src/admin/limits.test.ts
new file mode 100644
index 0000000..37baa15
--- /dev/null
+++ b/services/api-gateway/src/admin/limits.test.ts
@@ -0,0 +1,102 @@
+import type { AuthContext, EffectiveLimits } from '@protifer/shared'
+import { Hono } from 'hono'
+import { describe, expect, it, vi } from 'vitest'
+
+import { createLimitsAdminRouter } from './limits.ts'
+import { createAdminRoleMiddleware } from '../middleware/admin-role.ts'
+import type { Variables } from '../types/hono.ts'
+
+const defaultLimits: EffectiveLimits = {
+  submissionsPerMinute: 300,
+  maxConcurrentJobs: 50,
+  maxSequenceLength: 4096,
+  sloSeconds: 0,
+}
+
+function makeApp(opts: {
+  auth?: Partial<AuthContext> | null
+  setLimits?: (userId: string, limits: unknown) => Promise<number>
+  clearLimits?: (userId: string) => Promise<number>
+}) {
+  const setLimits = vi.fn(opts.setLimits ?? (() => Promise.resolve(1)))
+  const clearLimits = vi.fn(opts.clearLimits ?? (() => Promise.resolve(1)))
+  const app = new Hono<{ Variables: Variables }>()
+  if (opts.auth !== null) {
+    app.use('*', async (c, next) => {
+      c.set('auth', {
+        sub: 'admin1',
+        email: 'admin@x.com',
+        plan: 'enterprise',
+        limits: defaultLimits,
+        method: 'session',
+        role: 'admin',
+        ...opts.auth,
+      })
+      await next()
+    })
+  }
+  app.use('*', createAdminRoleMiddleware())
+  app.route(
+    '/admin',
+    createLimitsAdminRouter({
+      setLimits: setLimits as never,
+      clearLimits,
+    }),
+  )
+  return { app, setLimits, clearLimits }
+}
+
+describe('createLimitsAdminRouter', () => {
+  it('sets a valid override', async () => {
+    const { app, setLimits } = makeApp({})
+    const res = await app.request('/admin/accounts/u1/limits', {
+      method: 'PUT',
+      headers: { 'content-type': 'application/json' },
+      body: JSON.stringify({ maxConcurrentJobs: 25 }),
+    })
+    expect(res.status).toBe(200)
+    expect(setLimits).toHaveBeenCalledWith('u1', { maxConcurrentJobs: 25 })
+  })
+
+  it('rejects an invalid override', async () => {
+    const { app, setLimits } = makeApp({})
+    const res = await app.request('/admin/accounts/u1/limits', {
+      method: 'PUT',
+      headers: { 'content-type': 'application/json' },
+      body: JSON.stringify({ maxConcurrentJobs: -1, bogus: 1 }),
+    })
+    expect(res.status).toBe(400)
+    expect(setLimits).not.toHaveBeenCalled()
+  })
+
+  it('returns 404 when the user does not exist', async () => {
+    const { app } = makeApp({ setLimits: () => Promise.resolve(0) })
+    const res = await app.request('/admin/accounts/missing/limits', {
+      method: 'PUT',
+      headers: { 'content-type': 'application/json' },
+      body: JSON.stringify({ maxConcurrentJobs: 25 }),
+    })
+    expect(res.status).toBe(404)
+  })
+
+  it('clears an override', async () => {
+    const { app, clearLimits } = makeApp({})
+    const res = await app.request('/admin/accounts/u1/limits', {
+      method: 'DELETE',
+    })
+    expect(res.status).toBe(200)
+    expect(clearLimits).toHaveBeenCalledWith('u1')
+    expect(await res.json()).toEqual({ userId: 'u1', limits: null })
+  })
+
+  it('rejects a non-admin caller', async () => {
+    const { app, setLimits } = makeApp({ auth: { role: 'user' } })
+    const res = await app.request('/admin/accounts/u1/limits', {
+      method: 'PUT',
+      headers: { 'content-type': 'application/json' },
+      body: JSON.stringify({ maxConcurrentJobs: 25 }),
+    })
+    expect(res.status).toBe(403)
+    expect(setLimits).not.toHaveBeenCalled()
+  })
+})
diff --git a/services/api-gateway/src/admin/limits.ts b/services/api-gateway/src/admin/limits.ts
new file mode 100644
index 0000000..88b7ee7
--- /dev/null
+++ b/services/api-gateway/src/admin/limits.ts
@@ -0,0 +1,67 @@
+import type { AuthContext, LimitsOverride, Logger } from '@protifer/shared'
+import { OverrideLimitsSchema } from '@protifer/shared'
+import { Hono } from 'hono'
+
+import type { Variables } from '../types/hono.ts'
+
+export interface LimitsAdminDeps {
+  /** Persist a full-replace override; returns affected row count. */
+  setLimits: (userId: string, limits: LimitsOverride) => Promise<number>
+  /** Clear the override (column → NULL); returns affected row count. */
+  clearLimits: (userId: string) => Promise<number>
+  logger?: Logger
+}
+
+function adminIdentity(c: { get: (k: 'auth') => AuthContext }): string {
+  const auth = c.get('auth')
+  return auth.email || auth.sub
+}
+
+export function createLimitsAdminRouter(deps: LimitsAdminDeps) {
+  const { setLimits, clearLimits, logger } = deps
+  const router = new Hono<{ Variables: Variables }>()
+
+  router.put('/accounts/:userId/limits', async (c) => {
+    const userId = c.req.param('userId')
+    let body: unknown
+    try {
+      body = await c.req.json()
+    } catch {
+      return c.json({ error: 'Invalid JSON', code: 'VALIDATION_ERROR' }, 400)
+    }
+    const parsed = OverrideLimitsSchema.safeParse(body)
+    if (!parsed.success) {
+      return c.json(
+        {
+          error: parsed.error.issues[0]?.message ?? 'Invalid override',
+          code: 'VALIDATION_ERROR',
+        },
+        400,
+      )
+    }
+    const affected = await setLimits(userId, parsed.data)
+    if (affected === 0) {
+      return c.json({ error: 'Unknown user', code: 'USER_NOT_FOUND' }, 404)
+    }
+    logger?.info(
+      { targetUserId: userId, adminId: adminIdentity(c), limits: parsed.data },
+      'admin: set account limits override',
+    )
+    return c.json({ userId, limits: parsed.data }, 200)
+  })
+
+  router.delete('/accounts/:userId/limits', async (c) => {
+    const userId = c.req.param('userId')
+    const affected = await clearLimits(userId)
+    if (affected === 0) {
+      return c.json({ error: 'Unknown user', code: 'USER_NOT_FOUND' }, 404)
+    }
+    logger?.info(
+      { targetUserId: userId, adminId: adminIdentity(c) },
+      'admin: cleared account limits override',
+    )
+    return c.json({ userId, limits: null }, 200)
+  })
+
+  return router
+}
diff --git a/services/api-gateway/src/app.ts b/services/api-gateway/src/app.ts
index 5916f55..2fe073e 100644
--- a/services/api-gateway/src/app.ts
+++ b/services/api-gateway/src/app.ts
@@ -3,9 +3,11 @@ import { OpenAPIHono } from '@hono/zod-openapi'
 import { OpenFeature } from '@openfeature/server-sdk'
 import type {
   AuthContext,
+  EffectiveLimits,
   FlagOverrideStore,
   Logger,
   ObjectStore,
+  Plan,
   PredictionSuiteConfig,
   Redis,
 } from '@protifer/shared'
@@ -13,6 +15,8 @@ import {
   AppError,
   CachedFlagOverrideStore,
   FLAG_REGISTRY,
+  MAX_SEQUENCE_LENGTH,
+  PLAN_LIMITS,
   QUEUE_NAMES,
   QueueEvents,
   RedisFlagOverrideStore,
@@ -36,6 +40,7 @@ import pino from 'pino'
 import { createBullBoardRouter } from './admin/bull-board.ts'
 import { createCleanupAdminRouter } from './admin/cleanup.ts'
 import { createFlagsAdminRouter, createFlagsProvider } from './admin/flags.ts'
+import { createLimitsAdminRouter } from './admin/limits.ts'
 import { createSheddingAdminRouter } from './admin/shedding.ts'
 import { createAuth } from './auth/index.ts'
 import { createUserDirectory } from './auth/user-directory.ts'
@@ -280,24 +285,40 @@ export function createApp(overrides?: {
     sharedPool,
   )
   const userDirectory = createUserDirectory(sharedPool)
+  const planClassDefaults: Record<Plan, EffectiveLimits> = {
+    free: {
+      submissionsPerMinute: config.rateLimit.submissionsFree,
+      maxConcurrentJobs: PLAN_LIMITS.free.maxConcurrentJobs,
+      maxSequenceLength: MAX_SEQUENCE_LENGTH,
+      sloSeconds: sheddingConfig.sloSeconds.free,
+    },
+    pro: {
+      submissionsPerMinute: config.rateLimit.submissionsPro,
+      maxConcurrentJobs: PLAN_LIMITS.pro.maxConcurrentJobs,
+      maxSequenceLength: MAX_SEQUENCE_LENGTH,
+      sloSeconds: sheddingConfig.sloSeconds.pro,
+    },
+    enterprise: {
+      submissionsPerMinute: config.rateLimit.submissionsEnterprise,
+      maxConcurrentJobs: PLAN_LIMITS.enterprise.maxConcurrentJobs,
+      maxSequenceLength: MAX_SEQUENCE_LENGTH,
+      sloSeconds: sheddingConfig.sloSeconds.enterprise,
+    },
+  }
   const planResolver = new DbPlanResolver({
     logger,
+    classDefaults: planClassDefaults,
     getUser: async (userId) => {
-      const result = await sharedPool.query<{ plan?: string }>(
-        'SELECT plan FROM "user" WHERE id = $1 LIMIT 1',
-        [userId],
-      )
+      const result = await sharedPool.query<{
+        plan?: string
+        limits?: unknown
+      }>('SELECT plan, limits FROM "user" WHERE id = $1 LIMIT 1', [userId])
       return result.rows[0] ?? null
     },
   })
   const rateLimitConnection = connection as unknown as Redis
   const submissionRL = createSubmissionRateLimiter({
     connection: rateLimitConnection,
-    submissionsPerMinute: {
-      free: config.rateLimit.submissionsFree,
-      pro: config.rateLimit.submissionsPro,
-      enterprise: config.rateLimit.submissionsEnterprise,
-    },
   })
   const pollRL = createPollRateLimiter({ connection: rateLimitConnection })
 
@@ -418,6 +439,26 @@ export function createApp(overrides?: {
     '/admin/flags',
     createFlagsAdminRouter({ registry: FLAG_REGISTRY, store: flagsStore }),
   )
+  app.route(
+    '/admin',
+    createLimitsAdminRouter({
+      logger,
+      setLimits: async (userId, limits) => {
+        const result = await sharedPool.query(
+          'UPDATE "user" SET limits = $1::jsonb WHERE id = $2',
+          [JSON.stringify(limits), userId],
+        )
+        return result.rowCount ?? 0
+      },
+      clearLimits: async (userId) => {
+        const result = await sharedPool.query(
+          'UPDATE "user" SET limits = NULL WHERE id = $1',
+          [userId],
+        )
+        return result.rowCount ?? 0
+      },
+    }),
+  )
   if (serveStatic) {
     // Bull Board ships its own static asset bundle; only mount when the
     // serve-static helper is wired in.
diff --git a/services/api-gateway/src/contracts/job-contracts.test.ts b/services/api-gateway/src/contracts/job-contracts.test.ts
index ea77eb6..5bacff5 100644
--- a/services/api-gateway/src/contracts/job-contracts.test.ts
+++ b/services/api-gateway/src/contracts/job-contracts.test.ts
@@ -15,7 +15,17 @@ import type { RedisCommands } from '../queue.ts'
 import { createPredictionsRouter } from '../routes/predictions.ts'
 import type { Variables } from '../types/hono.ts'
 
-const proResolver: PlanResolver = { resolve: vi.fn().mockResolvedValue('pro') }
+const proResolver: PlanResolver = {
+  resolve: vi.fn().mockResolvedValue({
+    plan: 'pro' as const,
+    limits: {
+      submissionsPerMinute: 60,
+      maxConcurrentJobs: 10,
+      maxSequenceLength: 4096,
+      sloSeconds: 120,
+    },
+  }),
+}
 
 const mockAuth = {
   api: {
diff --git a/services/api-gateway/src/middleware/auth/auth.test.ts b/services/api-gateway/src/middleware/auth/auth.test.ts
index 580531f..9d4b3eb 100644
--- a/services/api-gateway/src/middleware/auth/auth.test.ts
+++ b/services/api-gateway/src/middleware/auth/auth.test.ts
@@ -1,3 +1,4 @@
+import type { EffectiveLimits } from '@protifer/shared'
 import { Hono } from 'hono'
 import { describe, it, expect, vi } from 'vitest'
 
@@ -6,8 +7,21 @@ import type { Auth } from '../../auth/index.ts'
 import type { UserDirectory, UserRecord } from '../../auth/user-directory.ts'
 import type { Variables } from '../../types/hono.ts'
 
-const freeResolver = { resolve: () => Promise.resolve('free' as const) }
-const proResolver = { resolve: () => Promise.resolve('pro' as const) }
+const limitsFor = (submissionsPerMinute: number): EffectiveLimits => ({
+  submissionsPerMinute,
+  maxConcurrentJobs: 2,
+  maxSequenceLength: 4096,
+  sloSeconds: 30,
+})
+
+const freeResolver = {
+  resolve: () =>
+    Promise.resolve({ plan: 'free' as const, limits: limitsFor(10) }),
+}
+const proResolver = {
+  resolve: () =>
+    Promise.resolve({ plan: 'pro' as const, limits: limitsFor(60) }),
+}
 
 function makeAuth(
   session: { user: { id: string; email: string; role?: string } } | null,
diff --git a/services/api-gateway/src/middleware/auth/authenticate.ts b/services/api-gateway/src/middleware/auth/authenticate.ts
index f928f92..32d5cf8 100644
--- a/services/api-gateway/src/middleware/auth/authenticate.ts
+++ b/services/api-gateway/src/middleware/auth/authenticate.ts
@@ -47,13 +47,14 @@ export function createAuthenticateMiddleware({
       if (!user) {
         return c.json({ error: 'Unauthorized', code: 'UNAUTHORIZED' }, 401)
       }
-      const plan = await resolver.resolve(user.id, user.email)
+      const { plan, limits } = await resolver.resolve(user.id, user.email)
       const role =
         user.role === 'admin' || user.role === 'user' ? user.role : undefined
       c.set('auth', {
         sub: user.id,
         email: user.email,
         plan,
+        limits,
         method: 'api-key',
         ...(role ? { role } : {}),
       })
@@ -65,12 +66,16 @@ export function createAuthenticateMiddleware({
     if (!session?.user) {
       return c.json({ error: 'Unauthorized', code: 'UNAUTHORIZED' }, 401)
     }
-    const plan = await resolver.resolve(session.user.id, session.user.email)
+    const { plan, limits } = await resolver.resolve(
+      session.user.id,
+      session.user.email,
+    )
     const role = (session.user as { role?: string }).role
     c.set('auth', {
       sub: session.user.id,
       email: session.user.email,
       plan,
+      limits,
       method: 'session',
       ...(role === 'admin' || role === 'user' ? { role } : {}),
     })
diff --git a/services/api-gateway/src/middleware/rate-limit.test.ts b/services/api-gateway/src/middleware/rate-limit.test.ts
index 64ac98f..5ff349e 100644
--- a/services/api-gateway/src/middleware/rate-limit.test.ts
+++ b/services/api-gateway/src/middleware/rate-limit.test.ts
@@ -77,10 +77,23 @@ beforeEach(async () => {
 function makeAuthApp(
   plan: AuthContext['plan'],
   rateLimiter: ReturnType<typeof createSubmissionRateLimiter>,
+  submissionsPerMinute?: number,
 ) {
+  const spm = submissionsPerMinute ?? PLAN_LIMITS[plan].submissionsPerMinute
   const app = new OpenAPIHono<{ Variables: Variables }>()
   app.use('*', (c, next) => {
-    c.set('auth', { sub: 'user-001', email: 'u@test.com', plan })
+    c.set('auth', {
+      sub: 'user-001',
+      email: 'u@test.com',
+      plan,
+      limits: {
+        submissionsPerMinute: spm,
+        maxConcurrentJobs: 2,
+        maxSequenceLength: 4096,
+        sloSeconds: 30,
+      },
+      method: 'session',
+    })
     return next()
   })
   app.use('*', rateLimiter)
@@ -129,13 +142,11 @@ describe('createSubmissionRateLimiter', () => {
     )
   })
 
-  it('honors a configured per-plan submissions ceiling', async () => {
+  it('honors the resolved per-account submissions ceiling', async () => {
     const app = makeAuthApp(
       'free',
-      createSubmissionRateLimiter({
-        connection,
-        submissionsPerMinute: { free: 999, pro: 60, enterprise: 300 },
-      }),
+      createSubmissionRateLimiter({ connection }),
+      999,
     )
     const res = await app.request('/test')
     expect(res.headers.get('ratelimit-policy')).toContain('999')
diff --git a/services/api-gateway/src/middleware/rate-limit.ts b/services/api-gateway/src/middleware/rate-limit.ts
index 197a984..b435330 100644
--- a/services/api-gateway/src/middleware/rate-limit.ts
+++ b/services/api-gateway/src/middleware/rate-limit.ts
@@ -1,5 +1,4 @@
-import type { Plan, Redis } from '@protifer/shared'
-import { PLAN_LIMITS } from '@protifer/shared'
+import type { Redis } from '@protifer/shared'
 import type { Store } from 'hono-rate-limiter'
 import { rateLimiter } from 'hono-rate-limiter'
 import { RedisStore } from 'rate-limit-redis'
@@ -36,23 +35,10 @@ export interface RateLimitDeps {
   connection: Redis
 }
 
-export interface SubmissionRateLimitDeps extends RateLimitDeps {
-  /** Per-plan submissions/min ceiling. Defaults to PLAN_LIMITS. */
-  submissionsPerMinute?: Record<Plan, number>
-}
-
-export function createSubmissionRateLimiter({
-  connection,
-  submissionsPerMinute,
-}: SubmissionRateLimitDeps) {
+export function createSubmissionRateLimiter({ connection }: RateLimitDeps) {
   return rateLimiter<{ Variables: Variables }>({
     windowMs: 60 * 1000,
-    limit: (c) => {
-      const plan = c.get('auth').plan
-      return (
-        submissionsPerMinute?.[plan] ?? PLAN_LIMITS[plan].submissionsPerMinute
-      )
-    },
+    limit: (c) => c.get('auth').limits.submissionsPerMinute,
     keyGenerator: (c) => c.get('auth').sub,
     store: makeRedisStore(connection, 'rl:submit:'),
     standardHeaders: 'draft-7',
diff --git a/services/api-gateway/src/middleware/shedding.test.ts b/services/api-gateway/src/middleware/shedding.test.ts
index 5ef192c..7bced0d 100644
--- a/services/api-gateway/src/middleware/shedding.test.ts
+++ b/services/api-gateway/src/middleware/shedding.test.ts
@@ -83,7 +83,18 @@ function makeApp({
   const app = new Hono<{ Variables: Variables }>()
   if (auth) {
     app.use('*', async (c, next) => {
-      c.set('auth', { sub: 'u1', email: 'u1@x.com', plan })
+      c.set('auth', {
+        sub: 'u1',
+        email: 'u1@x.com',
+        plan,
+        limits: {
+          submissionsPerMinute: 10,
+          maxConcurrentJobs: 2,
+          maxSequenceLength: 4096,
+          sloSeconds: config.sloSeconds[plan],
+        },
+        method: 'session',
+      })
       await next()
     })
   }
@@ -276,7 +287,18 @@ describe('createSheddingMiddleware', () => {
     })
     const app = new Hono<{ Variables: Variables }>()
     app.use('*', async (c, next) => {
-      c.set('auth', { sub: 'u1', email: 'u1@x.com', plan: 'free' })
+      c.set('auth', {
+        sub: 'u1',
+        email: 'u1@x.com',
+        plan: 'free',
+        limits: {
+          submissionsPerMinute: 10,
+          maxConcurrentJobs: 2,
+          maxSequenceLength: 4096,
+          sloSeconds: config.sloSeconds.free,
+        },
+        method: 'session',
+      })
       await next()
     })
     app.use('*', middleware)
diff --git a/services/api-gateway/src/middleware/shedding.ts b/services/api-gateway/src/middleware/shedding.ts
index 9073f6a..f75ff1d 100644
--- a/services/api-gateway/src/middleware/shedding.ts
+++ b/services/api-gateway/src/middleware/shedding.ts
@@ -132,6 +132,7 @@ export function createSheddingMiddleware(deps: SheddingMiddlewareDeps) {
       state: snapshot,
       config,
       plan: auth.plan,
+      sloSeconds: auth.limits.sloSeconds,
       sequenceResidues: residues,
       nowMs: now(),
     })
diff --git a/services/api-gateway/src/plan/db-resolver.test.ts b/services/api-gateway/src/plan/db-resolver.test.ts
index 48c8c41..097c332 100644
--- a/services/api-gateway/src/plan/db-resolver.test.ts
+++ b/services/api-gateway/src/plan/db-resolver.test.ts
@@ -1,38 +1,85 @@
+import type { EffectiveLimits, Plan } from '@protifer/shared'
 import { describe, it, expect, vi } from 'vitest'
 
 import { DbPlanResolver } from './db-resolver.ts'
 
+const classDefaults: Record<Plan, EffectiveLimits> = {
+  free: {
+    submissionsPerMinute: 10,
+    maxConcurrentJobs: 2,
+    maxSequenceLength: 4096,
+    sloSeconds: 30,
+  },
+  pro: {
+    submissionsPerMinute: 60,
+    maxConcurrentJobs: 10,
+    maxSequenceLength: 4096,
+    sloSeconds: 120,
+  },
+  enterprise: {
+    submissionsPerMinute: 300,
+    maxConcurrentJobs: 50,
+    maxSequenceLength: 4096,
+    sloSeconds: 0,
+  },
+}
+
 describe('DbPlanResolver', () => {
-  it('returns the user.plan field verbatim when recognised', async () => {
-    const getUser = vi
-      .fn()
-      .mockResolvedValue({ id: 'u1', plan: 'pro' as const })
-    const r = new DbPlanResolver({ getUser })
-    await expect(r.resolve('u1', 'a@b.c')).resolves.toBe('pro')
+  it('returns plan + class defaults when no override is present', async () => {
+    const getUser = vi.fn().mockResolvedValue({ id: 'u1', plan: 'pro' })
+    const r = new DbPlanResolver({ getUser, classDefaults })
+    await expect(r.resolve('u1', 'a@b.c')).resolves.toEqual({
+      plan: 'pro',
+      limits: classDefaults.pro,
+    })
     expect(getUser).toHaveBeenCalledWith('u1')
   })
 
-  it('defaults to free when the plan field is missing', async () => {
-    const getUser = vi.fn().mockResolvedValue({ id: 'u1' })
-    const r = new DbPlanResolver({ getUser })
-    await expect(r.resolve('u1', 'a@b.c')).resolves.toBe('free')
+  it('merges a partial override over class defaults', async () => {
+    const getUser = vi.fn().mockResolvedValue({
+      id: 'u1',
+      plan: 'enterprise',
+      limits: { maxConcurrentJobs: 25, submissionsPerMinute: 1000 },
+    })
+    const r = new DbPlanResolver({ getUser, classDefaults })
+    await expect(r.resolve('u1', 'a@b.c')).resolves.toEqual({
+      plan: 'enterprise',
+      limits: {
+        ...classDefaults.enterprise,
+        maxConcurrentJobs: 25,
+        submissionsPerMinute: 1000,
+      },
+    })
   })
 
-  it('defaults to free when the user row is missing entirely', async () => {
-    const getUser = vi.fn().mockResolvedValue(null)
-    const r = new DbPlanResolver({ getUser })
-    await expect(r.resolve('u1', 'a@b.c')).resolves.toBe('free')
+  it('falls back to class defaults when the override is invalid', async () => {
+    const getUser = vi.fn().mockResolvedValue({
+      id: 'u1',
+      plan: 'pro',
+      limits: { maxConcurrentJobs: -5, bogus: 1 },
+    })
+    const r = new DbPlanResolver({ getUser, classDefaults })
+    await expect(r.resolve('u1', 'a@b.c')).resolves.toEqual({
+      plan: 'pro',
+      limits: classDefaults.pro,
+    })
   })
 
-  it('defaults to free when the stored value is unrecognised', async () => {
+  it('defaults to free + class defaults when the plan is unrecognised', async () => {
     const getUser = vi.fn().mockResolvedValue({ id: 'u1', plan: 'gold' })
-    const r = new DbPlanResolver({ getUser })
-    await expect(r.resolve('u1', 'a@b.c')).resolves.toBe('free')
+    const r = new DbPlanResolver({ getUser, classDefaults })
+    await expect(r.resolve('u1', 'a@b.c')).resolves.toEqual({
+      plan: 'free',
+      limits: classDefaults.free,
+    })
   })
 
-  it('defaults to free when getUser throws', async () => {
+  it('defaults to free + class defaults when getUser throws', async () => {
     const getUser = vi.fn().mockRejectedValue(new Error('db down'))
-    const r = new DbPlanResolver({ getUser })
-    await expect(r.resolve('u1', 'a@b.c')).resolves.toBe('free')
+    const r = new DbPlanResolver({ getUser, classDefaults })
+    await expect(r.resolve('u1', 'a@b.c')).resolves.toEqual({
+      plan: 'free',
+      limits: classDefaults.free,
+    })
   })
 })
diff --git a/services/api-gateway/src/plan/db-resolver.ts b/services/api-gateway/src/plan/db-resolver.ts
index f299028..7ae5c79 100644
--- a/services/api-gateway/src/plan/db-resolver.ts
+++ b/services/api-gateway/src/plan/db-resolver.ts
@@ -1,17 +1,28 @@
-import type { Logger, Plan, PlanResolver } from '@protifer/shared'
+import type {
+  EffectiveLimits,
+  LimitsOverride,
+  Logger,
+  Plan,
+  PlanResolver,
+  ResolvedAccount,
+} from '@protifer/shared'
+import { OverrideLimitsSchema, mergeLimits } from '@protifer/shared'
 
 const VALID_PLANS: readonly Plan[] = ['free', 'pro', 'enterprise']
 
 export interface DbPlanResolverDeps {
-  getUser: (userId: string) => Promise<{ plan?: string } | null>
+  getUser: (
+    userId: string,
+  ) => Promise<{ plan?: string; limits?: unknown } | null>
+  classDefaults: Record<Plan, EffectiveLimits>
   logger?: Logger
 }
 
 export class DbPlanResolver implements PlanResolver {
   constructor(private readonly deps: DbPlanResolverDeps) {}
 
-  async resolve(userId: string): Promise<Plan> {
-    let user: { plan?: string } | null
+  async resolve(userId: string): Promise<ResolvedAccount> {
+    let user: { plan?: string; limits?: unknown } | null
     try {
       user = await this.deps.getUser(userId)
     } catch (err) {
@@ -19,12 +30,31 @@ export class DbPlanResolver implements PlanResolver {
         { err, userId },
         'DbPlanResolver: getUser failed, defaulting to free',
       )
-      return 'free'
+      return { plan: 'free', limits: this.deps.classDefaults.free }
     }
+
     const raw = user?.plan
-    if (raw && (VALID_PLANS as readonly string[]).includes(raw)) {
-      return raw as Plan
+    const plan: Plan =
+      raw && (VALID_PLANS as readonly string[]).includes(raw)
+        ? (raw as Plan)
+        : 'free'
+
+    let override: LimitsOverride | undefined
+    if (user?.limits != null) {
+      const parsed = OverrideLimitsSchema.safeParse(user.limits)
+      if (parsed.success) {
+        override = parsed.data
+      } else {
+        this.deps.logger?.warn(
+          { userId, issues: parsed.error.issues },
+          'DbPlanResolver: invalid limits override, using class defaults',
+        )
+      }
+    }
+
+    return {
+      plan,
+      limits: mergeLimits(this.deps.classDefaults[plan], override),
     }
-    return 'free'
   }
 }
diff --git a/services/api-gateway/src/routes/_utils.ts b/services/api-gateway/src/routes/_utils.ts
index cabf9d8..0c6d600 100644
--- a/services/api-gateway/src/routes/_utils.ts
+++ b/services/api-gateway/src/routes/_utils.ts
@@ -1,5 +1,4 @@
-import { PLAN_LIMITS } from '@protifer/shared'
-import type { Plan } from '@protifer/shared'
+import type { EffectiveLimits } from '@protifer/shared'
 import type { Context } from 'hono'
 
 import { ACTIVE_JOBS_KEY } from '../cleanup.ts'
@@ -17,10 +16,10 @@ export const DEFAULT_TIMEOUT_MS = 2_000
 export async function withinConcurrentJobLimit(
   c: Context,
   redis: RedisCommands,
-  auth: { sub: string; plan: Plan },
+  auth: { sub: string; limits: EffectiveLimits },
 ): Promise<boolean> {
   const concurrentCount = await redis.zcard(ACTIVE_JOBS_KEY(auth.sub))
-  const { maxConcurrentJobs } = PLAN_LIMITS[auth.plan]
+  const { maxConcurrentJobs } = auth.limits
   if (concurrentCount >= maxConcurrentJobs) {
     c.header('X-RateLimit-Concurrent', String(concurrentCount))
     return false
diff --git a/services/api-gateway/src/routes/embeddings.test.ts b/services/api-gateway/src/routes/embeddings.test.ts
index 9dcbc4b..9f4bdf6 100644
--- a/services/api-gateway/src/routes/embeddings.test.ts
+++ b/services/api-gateway/src/routes/embeddings.test.ts
@@ -11,7 +11,17 @@ import { createAuthenticateMiddleware } from '../middleware/auth/index.ts'
 import type { RedisCommands } from '../queue.ts'
 import type { Variables } from '../types/hono.ts'
 
-const proResolver: PlanResolver = { resolve: vi.fn().mockResolvedValue('pro') }
+const proResolver: PlanResolver = {
+  resolve: vi.fn().mockResolvedValue({
+    plan: 'pro' as const,
+    limits: {
+      submissionsPerMinute: 60,
+      maxConcurrentJobs: 10,
+      maxSequenceLength: 4096,
+      sloSeconds: 120,
+    },
+  }),
+}
 
 const mockAuth = {
   api: {
diff --git a/services/api-gateway/src/routes/embeddings.ts b/services/api-gateway/src/routes/embeddings.ts
index 1f5b9de..6f49103 100644
--- a/services/api-gateway/src/routes/embeddings.ts
+++ b/services/api-gateway/src/routes/embeddings.ts
@@ -112,6 +112,15 @@ export function createEmbeddingsRouter(
     const { sequence, accession } = c.req.valid('json')
     const { embeddingModel } = suite
     const auth = c.get('auth')
+    if (sequence.length > auth.limits.maxSequenceLength) {
+      return c.json(
+        {
+          error: `sequence must be at most ${String(auth.limits.maxSequenceLength)} residues`,
+          code: 'VALIDATION_ERROR',
+        },
+        400,
+      )
+    }
     const sequenceHash = computeSequenceHash(sequence)
     const jobId = computeEmbeddingJobId(sequence, embeddingModel)
     const statusUrl = `/v1/embeddings/${jobId}`
diff --git a/services/api-gateway/src/routes/predictions.test.ts b/services/api-gateway/src/routes/predictions.test.ts
index 055af71..116155f 100644
--- a/services/api-gateway/src/routes/predictions.test.ts
+++ b/services/api-gateway/src/routes/predictions.test.ts
@@ -12,7 +12,18 @@ import type { RedisCommands } from '../queue.ts'
 import { PredictionPollResponseSchema } from '../schemas/predictions.ts'
 import type { Variables } from '../types/hono.ts'
 
-const proResolver: PlanResolver = { resolve: vi.fn().mockResolvedValue('pro') }
+const proAccount = {
+  plan: 'pro' as const,
+  limits: {
+    submissionsPerMinute: 60,
+    maxConcurrentJobs: 10,
+    maxSequenceLength: 4096,
+    sloSeconds: 120,
+  },
+}
+const proResolver: PlanResolver = {
+  resolve: vi.fn().mockResolvedValue(proAccount),
+}
 
 const mockAuth = {
   api: {
@@ -204,7 +215,7 @@ describe('GET /v1/predictions/:jobId', () => {
       session: {},
       user: { id: 'user-001', email: 'user@example.com' },
     })
-    proResolver.resolve = vi.fn().mockResolvedValue('pro')
+    proResolver.resolve = vi.fn().mockResolvedValue(proAccount)
   })
 
   it('returns 404 when job does not exist', async () => {
diff --git a/services/api-gateway/src/routes/predictions.ts b/services/api-gateway/src/routes/predictions.ts
index 629e8d7..22d41ab 100644
--- a/services/api-gateway/src/routes/predictions.ts
+++ b/services/api-gateway/src/routes/predictions.ts
@@ -115,6 +115,15 @@ export function createPredictionsRouter(
     const { sequence, accession } = c.req.valid('json')
     const { embeddingModel, predictionModels } = suite
     const auth = c.get('auth')
+    if (sequence.length > auth.limits.maxSequenceLength) {
+      return c.json(
+        {
+          error: `sequence must be at most ${String(auth.limits.maxSequenceLength)} residues`,
+          code: 'VALIDATION_ERROR',
+        },
+        400,
+      )
+    }
     const sequenceHash = computeSequenceHash(sequence)
     const embJobId = computeEmbeddingJobId(sequence, embeddingModel)
     const predJobId = computePredictionJobId(
diff --git a/services/api-gateway/src/schemas/common.ts b/services/api-gateway/src/schemas/common.ts
index 56d4fbb..624254e 100644
--- a/services/api-gateway/src/schemas/common.ts
+++ b/services/api-gateway/src/schemas/common.ts
@@ -1,14 +1,19 @@
 import { z } from '@hono/zod-openapi'
-import { MAX_SEQUENCE_LENGTH } from '@protifer/shared'
+import { MAX_SEQUENCE_LENGTH_CAP } from '@protifer/shared'
 
-/** Shared submit body (sequence + optional accession). Domains attach their own `.openapi()` label. */
+/**
+ * Shared submit body (sequence + optional accession). The static `.max()` is
+ * the absolute hard ceiling; the per-account resolved `maxSequenceLength` is
+ * enforced per request in the route handlers. Domains attach their own
+ * `.openapi()` label.
+ */
 export const SubmitBodySchema = z.object({
   sequence: z
     .string()
     .min(1, 'sequence is required and must be non-empty')
     .max(
-      MAX_SEQUENCE_LENGTH,
-      `sequence must be at most ${String(MAX_SEQUENCE_LENGTH)} residues`,
+      MAX_SEQUENCE_LENGTH_CAP,
+      `sequence must be at most ${String(MAX_SEQUENCE_LENGTH_CAP)} residues`,
     )
     .openapi({ example: 'MKTVRQERLK' }),
   accession: z.string().optional().openapi({
diff --git a/services/api-gateway/src/schemas/schemas.test.ts b/services/api-gateway/src/schemas/schemas.test.ts
index 5fda107..f5b5783 100644
--- a/services/api-gateway/src/schemas/schemas.test.ts
+++ b/services/api-gateway/src/schemas/schemas.test.ts
@@ -1,4 +1,4 @@
-import { MAX_SEQUENCE_LENGTH } from '@protifer/shared'
+import { MAX_SEQUENCE_LENGTH_CAP } from '@protifer/shared'
 import { describe, it, expect } from 'vitest'
 
 import {
@@ -27,14 +27,14 @@ describe('PredictionSubmitBodySchema', () => {
   it('accepts a sequence at the max length', () => {
     expect(
       PredictionSubmitBodySchema.safeParse({
-        sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH),
+        sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH_CAP),
       }).success,
     ).toBe(true)
   })
   it('rejects a sequence longer than the max length', () => {
     expect(
       PredictionSubmitBodySchema.safeParse({
-        sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH + 1),
+        sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH_CAP + 1),
       }).success,
     ).toBe(false)
   })
@@ -54,14 +54,14 @@ describe('EmbeddingSubmitBodySchema', () => {
   it('accepts a sequence at the max length', () => {
     expect(
       EmbeddingSubmitBodySchema.safeParse({
-        sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH),
+        sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH_CAP),
       }).success,
     ).toBe(true)
   })
   it('rejects a sequence longer than the max length', () => {
     expect(
       EmbeddingSubmitBodySchema.safeParse({
-        sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH + 1),
+        sequence: 'A'.repeat(MAX_SEQUENCE_LENGTH_CAP + 1),
       }).success,
     ).toBe(false)
   })
diff --git a/services/api-gateway/src/shedding/decide.ts b/services/api-gateway/src/shedding/decide.ts
index dcc6cff..f17e501 100644
--- a/services/api-gateway/src/shedding/decide.ts
+++ b/services/api-gateway/src/shedding/decide.ts
@@ -15,6 +15,8 @@ export interface DecideInput {
   state: SheddingStateSnapshot
   config: SheddingConfig
   plan: Plan
+  /** Account's resolved SLO seconds; falls back to `config.sloSeconds[plan]`. */
+  sloSeconds?: number
   sequenceResidues: number
   nowMs: number
   jitter?: () => number
@@ -79,7 +81,7 @@ export function decideAdmission(input: DecideInput): AdmissionDecision {
     }
   }
 
-  const slo = config.sloSeconds[plan]
+  const slo = input.sloSeconds ?? config.sloSeconds[plan]
   if (slo === 0) {
     return { admit: true, estimatedWaitSeconds }
   }
diff --git a/tests/backend-e2e/helpers.ts b/tests/backend-e2e/helpers.ts
index 66212b1..afb7de7 100644
--- a/tests/backend-e2e/helpers.ts
+++ b/tests/backend-e2e/helpers.ts
@@ -67,6 +67,16 @@ export async function deleteTestUser(userId: string): Promise<void> {
   await pool.query('DELETE FROM "user" WHERE id = $1', [userId])
 }
 
+export async function setUserLimits(
+  userId: string,
+  limits: Record<string, number> | null,
+): Promise<void> {
+  await pool.query('UPDATE "user" SET limits = $1::jsonb WHERE id = $2', [
+    limits === null ? null : JSON.stringify(limits),
+    userId,
+  ])
+}
+
 export async function apiRequest(
   method: string,
   path: string,
diff --git a/tests/backend-e2e/limits-override.test.ts b/tests/backend-e2e/limits-override.test.ts
new file mode 100644
index 0000000..bd24c48
--- /dev/null
+++ b/tests/backend-e2e/limits-override.test.ts
@@ -0,0 +1,55 @@
+import { afterEach, beforeEach, describe, expect, it } from 'vitest'
+
+import {
+  apiRequest,
+  cleanQueues,
+  createTestUser,
+  deleteTestUser,
+  setUserLimits,
+  type TestUser,
+} from './helpers'
+
+const SEQUENCE = 'MKTVRQERLKSIVRILERSKEPVSGAQL' // 28 residues
+
+describe('Per-account limit overrides E2E', () => {
+  const created: string[] = []
+
+  beforeEach(async () => {
+    await cleanQueues()
+  })
+
+  afterEach(async () => {
+    for (const id of created.splice(0)) await deleteTestUser(id)
+  })
+
+  async function freeUser(): Promise<TestUser> {
+    const u = await createTestUser('free')
+    created.push(u.userId)
+    return u
+  }
+
+  it('enforces a per-account maxSequenceLength override below the plan default', async () => {
+    const u = await freeUser()
+    await setUserLimits(u.userId, { maxSequenceLength: 10 })
+
+    const res = await apiRequest('POST', '/v1/embeddings', {
+      body: { sequence: SEQUENCE },
+      bearer: u.key,
+    })
+
+    expect(res.status).toBe(400)
+    const body = (await res.json()) as { code?: string }
+    expect(body.code).toBe('VALIDATION_ERROR')
+  })
+
+  it('admits the same sequence for an account on the plan default', async () => {
+    const u = await freeUser()
+
+    const res = await apiRequest('POST', '/v1/embeddings', {
+      body: { sequence: SEQUENCE },
+      bearer: u.key,
+    })
+
+    expect(res.status).toBe(202)
+  })
+})