From dc0122b6f42c61fbbed13545ebfef4c4bd8bc7f5 Mon Sep 17 00:00:00 2001 From: Tale Agent Date: Tue, 23 Jun 2026 10:40:32 +0000 Subject: [PATCH] fix(platform): keep zero-cost fallback models after a credit-exhausted credential (#1454) --- .../convex/lib/agent_chat/internal_actions.ts | 3 + .../convex/providers/failure_scope.test.ts | 97 +++++++++++++++++-- .../convex/providers/failure_scope.ts | 65 +++++++++++-- 3 files changed, 152 insertions(+), 13 deletions(-) diff --git a/services/platform/convex/lib/agent_chat/internal_actions.ts b/services/platform/convex/lib/agent_chat/internal_actions.ts index fa95e74ef..5ecad7318 100644 --- a/services/platform/convex/lib/agent_chat/internal_actions.ts +++ b/services/platform/convex/lib/agent_chat/internal_actions.ts @@ -509,6 +509,9 @@ export async function runGenerationCore( // later model sharing that resource is skipped instead of waiting on a // doomed request. Keying by credential (not provider name) means a sibling // model with its own `secretsEnv` key is still tried after another key dies. + // An out-of-funds (credit) failure is the one exception that does NOT doom + // every sibling: a zero-cost model on the same credential (`:free` / priced + // at 0) draws no credits, so `isModelScopeRetired` still attempts it (#1454). let lastFallbackError: unknown; const deadScopes = new Set(); diff --git a/services/platform/convex/providers/failure_scope.test.ts b/services/platform/convex/providers/failure_scope.test.ts index 3cf098b47..b166070d1 100644 --- a/services/platform/convex/providers/failure_scope.test.ts +++ b/services/platform/convex/providers/failure_scope.test.ts @@ -1,15 +1,24 @@ import { describe, expect, it } from 'vitest'; import { + creditScopeKey, credentialScopeKey, endpointScopeKey, + isFreeModel, isModelScopeRetired, modelScopeKeys, retiredScopeKey, } from './failure_scope'; const model = ( - over: Partial<{ providerName: string; apiKey: string; baseUrl: string }> = {}, + over: Partial<{ + providerName: string; + apiKey: string; + baseUrl: string; + modelId: string; + inputCentsPerMillion: number; + outputCentsPerMillion: number; + }> = {}, ) => ({ providerName: 'openrouter', apiKey: 'key-A', @@ -47,15 +56,18 @@ describe('endpointScopeKey', () => { }); describe('retiredScopeKey', () => { - it('retires the CREDENTIAL for funds and auth failures', () => { - expect(retiredScopeKey('credit_exhausted', model())).toBe( - credentialScopeKey(model()), - ); + it('retires the AUTH credential for an auth failure', () => { expect(retiredScopeKey('auth_error', model())).toBe( credentialScopeKey(model()), ); }); + it('retires the CREDIT credential for an out-of-funds failure', () => { + expect(retiredScopeKey('credit_exhausted', model())).toBe( + creditScopeKey(model()), + ); + }); + it('retires the ENDPOINT for an unreachable host', () => { expect(retiredScopeKey('provider_unreachable', model())).toBe( endpointScopeKey(model()), @@ -93,10 +105,83 @@ describe('isModelScopeRetired', () => { expect(isModelScopeRetired(model(), new Set())).toBe(false); }); - it('exposes both scopes a model belongs to', () => { + it('exposes both unconditional scopes a model belongs to', () => { expect(modelScopeKeys(model())).toEqual([ credentialScopeKey(model()), endpointScopeKey(model()), ]); }); + + describe('credit retirement spares zero-cost siblings (#1454)', () => { + it('still skips a PAID model on a credit-dead credential', () => { + const dead = new Set([creditScopeKey(model())]); + expect( + isModelScopeRetired( + model({ modelId: 'openai/gpt-4o', inputCentsPerMillion: 250 }), + dead, + ), + ).toBe(true); + }); + + it('does NOT skip a `:free` sibling on a credit-dead credential', () => { + const dead = new Set([creditScopeKey(model())]); + expect( + isModelScopeRetired( + model({ modelId: 'meta-llama/llama-3.3-70b-instruct:free' }), + dead, + ), + ).toBe(false); + }); + + it('does NOT skip a zero-priced sibling on a credit-dead credential', () => { + const dead = new Set([creditScopeKey(model())]); + expect( + isModelScopeRetired( + model({ inputCentsPerMillion: 0, outputCentsPerMillion: 0 }), + dead, + ), + ).toBe(false); + }); + + it('STILL skips a free model when the credential died from AUTH', () => { + // A bad/expired key kills free models too — only credit exhaustion spares them. + const dead = new Set([credentialScopeKey(model())]); + expect(isModelScopeRetired(model({ modelId: 'x/y:free' }), dead)).toBe( + true, + ); + }); + + it('STILL skips a free model when the endpoint is unreachable', () => { + const dead = new Set([endpointScopeKey(model())]); + expect(isModelScopeRetired(model({ modelId: 'x/y:free' }), dead)).toBe( + true, + ); + }); + }); +}); + +describe('isFreeModel', () => { + it('treats the OpenRouter `:free` suffix as free', () => { + expect(isFreeModel(model({ modelId: 'deepseek/deepseek-r1:free' }))).toBe( + true, + ); + }); + + it('treats explicit zero token pricing on both sides as free', () => { + expect( + isFreeModel(model({ inputCentsPerMillion: 0, outputCentsPerMillion: 0 })), + ).toBe(true); + }); + + it('does NOT treat unconfigured pricing as free', () => { + expect(isFreeModel(model({ modelId: 'openai/gpt-4o' }))).toBe(false); + }); + + it('does NOT treat a paid model as free', () => { + expect( + isFreeModel( + model({ inputCentsPerMillion: 250, outputCentsPerMillion: 1000 }), + ), + ).toBe(false); + }); }); diff --git a/services/platform/convex/providers/failure_scope.ts b/services/platform/convex/providers/failure_scope.ts index ee87e9dcc..3a5740c5c 100644 --- a/services/platform/convex/providers/failure_scope.ts +++ b/services/platform/convex/providers/failure_scope.ts @@ -28,6 +28,33 @@ interface ScopeModelData { apiKey?: string; /** Endpoint base URL; absent when the provider default is used (treated as ''). */ baseUrl?: string; + /** Model identifier, e.g. `meta-llama/llama-3.3-70b-instruct:free`. */ + modelId?: string; + /** Per-million-token input price in cents; `0` marks a no-cost model. */ + inputCentsPerMillion?: number; + /** Per-million-token output price in cents; `0` marks a no-cost model. */ + outputCentsPerMillion?: number; +} + +/** + * Whether a model draws on NO provider credits — so an out-of-funds failure on a + * sibling that shares its credential must NOT retire it. Two independent signals: + * + * - the OpenRouter `:free` id suffix (its free variants never bill the account); + * - explicit zero token pricing on BOTH sides (a deliberately free/local model). + * + * Unconfigured pricing (`undefined`) is intentionally NOT treated as free — only + * an explicit `0` counts, so a model whose cost simply wasn't filled in stays + * subject to credit retirement. + */ +export function isFreeModel(data: ScopeModelData): boolean { + if ( + typeof data.modelId === 'string' && + data.modelId.toLowerCase().includes(':free') + ) { + return true; + } + return data.inputCentsPerMillion === 0 && data.outputCentsPerMillion === 0; } /** FNV-1a 32-bit → 8-char hex. Stable, non-reversible bucket id for an API key. */ @@ -40,17 +67,34 @@ function fingerprint(value: string): string { return (hash >>> 0).toString(16).padStart(8, '0'); } -/** Credential identity: provider + API key (funds / auth are key-scoped). */ +/** Credential identity for AUTH failures: provider + API key. */ export function credentialScopeKey(data: ScopeModelData): string { return `cred:${data.providerName}:${fingerprint(data.apiKey ?? '')}`; } +/** + * Credential identity for CREDIT (out-of-funds) failures: provider + API key. + * + * Distinct namespace from {@link credentialScopeKey} so the dead-set records WHY + * the credential died. An out-of-funds failure must spare zero-cost siblings on + * the same key (they don't draw credits); an auth failure must not. Same inputs, + * different prefix — they never collide. + */ +export function creditScopeKey(data: ScopeModelData): string { + return `credit:${data.providerName}:${fingerprint(data.apiKey ?? '')}`; +} + /** Endpoint identity: provider + baseUrl (reachability is endpoint-scoped). */ export function endpointScopeKey(data: ScopeModelData): string { return `host:${data.providerName}:${data.baseUrl ?? ''}`; } -/** Every scope a model belongs to — for dead-set membership tests. */ +/** + * The UNCONDITIONAL scopes a model belongs to — auth credential + endpoint. + * A dead entry in either retires the model regardless of its pricing. Credit + * retirement is conditional (free models are exempt) and handled separately in + * {@link isModelScopeRetired}. + */ export function modelScopeKeys(data: ScopeModelData): readonly string[] { return [credentialScopeKey(data), endpointScopeKey(data)]; } @@ -65,16 +109,23 @@ export function retiredScopeKey( data: ScopeModelData, ): string | null { if (code === 'provider_unreachable') return endpointScopeKey(data); - if (code === 'credit_exhausted' || code === 'auth_error') { - return credentialScopeKey(data); - } + if (code === 'auth_error') return credentialScopeKey(data); + if (code === 'credit_exhausted') return creditScopeKey(data); return null; } -/** True if any of the model's scopes has already been retired this turn. */ +/** + * True if any of the model's scopes has already been retired this turn. + * + * Auth-credential and endpoint deaths retire every model on the resource. A + * CREDIT (out-of-funds) death retires only the paid models on the credential — + * a zero-cost sibling ({@link isFreeModel}) is still attempted, because the + * account being out of funds doesn't stop a model that costs nothing. + */ export function isModelScopeRetired( data: ScopeModelData, deadScopes: ReadonlySet, ): boolean { - return modelScopeKeys(data).some((key) => deadScopes.has(key)); + if (modelScopeKeys(data).some((key) => deadScopes.has(key))) return true; + return deadScopes.has(creditScopeKey(data)) && !isFreeModel(data); }