From 4863b98d929ccea51623fd607e7e6cd695f02401 Mon Sep 17 00:00:00 2001 From: chitcommit <208086304+chitcommit@users.noreply.github.com> Date: Thu, 4 Jun 2026 03:38:08 +0000 Subject: [PATCH 1/4] feat(executor): mercury_payment executor with sovereignty + cap + idempotency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit REAL MONEY PATH. Stacked on #106 (feat/meta-executors-registry). Adds `meta/executors/mercury-payment.ts` — concrete executor for the mercury_payment intent type, plus a shared pure runner that ActionAgent's chat tool (`execute_payment`) now delegates to so chat + autonomous surfaces share one implementation. Refusal gates (executor returns ok=false, dispatcher writes a 'payment_refusal' row to cc_actions_log; Mercury is NOT called): - sovereignty_not_autonomous — decision !== 'autonomous' - sovereignty_stale — snapshot older than MERCURY_SOVEREIGNTY_FRESHNESS_MS (60s, tighter than dispatch's default 5min) - amount_cap_exceeded — payload.amount_cents > MERCURY_AUTONOMOUS_AMOUNT_CAP_USD * 100 - invalid_payload — zod schema rejected the intent payload - missing_token — KV mercury:token:{account_slug} absent - mercury_api_failure — Mercury HTTP returned null Idempotency uses the dispatcher's content-addressable key (sha256(intent.id:attempt:intent_type)) as supplied via ctx.idempotencyKey. Because intent.id is immutable and attempt is deterministic from prior audit rows, this is functionally equivalent to a payload-derived key for replay protection. The Mercury client receives the same key as its own idempotencyKey so Mercury de-dupes on it too. Files: - meta/executors/mercury-payment.ts (new) - meta/executors/index.ts (barrel: side-effect import) - src/agents/tools/actions.ts (execute_payment delegates to runMercuryPayment) - src/index.ts (Env: MERCURY_AUTONOMOUS_AMOUNT_CAP_USD) - tests/meta/executors/mercury-payment.spec.ts (3 DB-only refusal cases) - docs/runbooks/mercury-payment-executor.md (operator runbook) Does NOT modify meta/executors/{dispatch,registry,types}.ts from #106. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/runbooks/mercury-payment-executor.md | 219 ++++++++++++++ meta/executors/index.ts | 1 + meta/executors/mercury-payment.ts | 283 +++++++++++++++++++ src/agents/tools/actions.ts | 58 ++-- src/index.ts | 7 + tests/meta/executors/mercury-payment.spec.ts | 218 ++++++++++++++ 6 files changed, 760 insertions(+), 26 deletions(-) create mode 100644 docs/runbooks/mercury-payment-executor.md create mode 100644 meta/executors/mercury-payment.ts create mode 100644 tests/meta/executors/mercury-payment.spec.ts diff --git a/docs/runbooks/mercury-payment-executor.md b/docs/runbooks/mercury-payment-executor.md new file mode 100644 index 0000000..f1f1a94 --- /dev/null +++ b/docs/runbooks/mercury-payment-executor.md @@ -0,0 +1,219 @@ +--- +canonicalUri: chittycanon://docs/runbooks/chittycommand/mercury-payment-executor +service: chittycommand +component: meta/executors/mercury-payment +risk: real-money +--- + +# Mercury Payment Executor — Operator Runbook + +**REAL MONEY PATH.** This runbook covers the autonomous-execution path for +Mercury ACH payments via the meta-orchestrator. The executor lives at +`meta/executors/mercury-payment.ts` and is dispatched by +`meta/executors/dispatch.ts` when a `mercury_payment` intent reaches the +daemon loop. + +Refer to ADR-001 (and its amendment) at +`docs/architecture/ADR-001-meta-orchestrator-extension.md` for the +architectural context. + +## What the executor refuses + +The executor refuses (writes a `payment_refusal` row to `cc_actions_log` +and does NOT call Mercury) in any of these cases: + +| Refusal reason | Trigger | +|---------------------------|-------------------------------------------------------------------------| +| `sovereignty_not_autonomous` | `sovereignty_assessment.decision !== 'autonomous'` | +| `sovereignty_stale` | snapshot `assessedAt` older than 60s at executor entry | +| `amount_cap_exceeded` | `payload.amount_cents > MERCURY_AUTONOMOUS_AMOUNT_CAP_USD * 100` | +| `invalid_payload` | zod schema rejected the payload | +| `missing_token` | `mercury:token:{account_slug}` is absent from `COMMAND_KV` | +| `mercury_api_failure` | Mercury HTTP call returned null (5xx, timeout, network) | + +Successful executions write a `payment` row with `status='completed'`. + +## Sovereignty configuration + +The executor requires: + +1. The sovereignty assessment on the intent (set by `createIntent` or + computed by dispatch's re-reckon) MUST have `decision: 'autonomous'`. + `requires_human` and any blocked state both refuse this money-path + executor — even `requires_human` is treated as a refusal here. +2. The snapshot's `assessedAt` MUST be within + `MERCURY_SOVEREIGNTY_FRESHNESS_MS` (60 seconds). Tighter than the + default 5 minutes for non-money paths. + +The daemon / orchestrator that invokes `executeIntent` for a +`mercury_payment` intent SHOULD pass: + +```ts +import { MERCURY_SOVEREIGNTY_FRESHNESS_MS } from '../meta/executors/mercury-payment'; + +await executeIntent(env, intentId, { + actorChittyId: ownerChittyId, + freshnessMs: MERCURY_SOVEREIGNTY_FRESHNESS_MS, +}); +``` + +This forces the dispatcher to re-reckon against `trust.chitty.cc` if the +snapshot on the intent is older than 60s. The executor's own freshness +check is a belt-and-suspenders safety net for that same window. + +To allow an autonomous `mercury_payment`, the owner ChittyID must currently +hold a sovereignty assessment producing `decision: 'autonomous'` from +`assessSovereignty()` in `meta/sovereignty.ts`. Trust score thresholds +and policy logic live in ChittyTrust; consult that service to understand +why a particular actor is or is not autonomous for this intent type. + +## Amount cap configuration + +The cap is set via the Worker secret/var +`MERCURY_AUTONOMOUS_AMOUNT_CAP_USD` (whole USD, string). Default is 500 +USD if unset. + +```bash +# Set to USD 250 +npx wrangler secret put MERCURY_AUTONOMOUS_AMOUNT_CAP_USD +# (enter 250 at prompt) +``` + +Any single intent with `amount_cents > cap * 100` is refused with reason +`amount_cap_exceeded`. There is no batching path that bypasses this — to +move a larger amount autonomously, you would need to increase the cap +(and accept the corresponding risk), or split the obligation into +multiple intents each under the cap. + +## How to manually approve a `requires_human` mercury_payment + +When sovereignty produces `requires_human` for a mercury_payment intent: + +1. The dispatcher writes a `sovereignty_refusal` row to `cc_actions_log` + (or the executor writes `payment_refusal` if dispatch's freshness + window was longer than 60s and let the snapshot through). +2. The intent status moves to `failed`. The current build does NOT + auto-create a parallel approval queue entry; manual approval requires: + a. Inspecting the refusal via the dashboard or + `SELECT ... FROM cc_actions_log WHERE intent_id = ''`. + b. Performing the payment via the ActionAgent chat surface + (`execute_payment` tool), which routes through the same pure + runner but supplies an `autonomous` + fresh snapshot because the + human typing in chat IS the approval event. The chat path writes + its own `cc_actions_log` row without `intent_id`. + c. Then manually updating the failed intent's metadata to reference + the chat-executed `cc_actions_log.id` for audit linkage: + ```sql + UPDATE cc_intents + SET metadata = COALESCE(metadata, '{}'::jsonb) || jsonb_build_object( + 'manually_approved_via_chat', '', + 'approved_by', '', + 'approved_at', NOW()::text + ) + WHERE id = ''; + ``` + +A dedicated dashboard approval flow is a follow-up (tracked in the +ADR-001 amendment notes). + +## Rollback / cancellation + +To refuse or cancel a queued `mercury_payment` intent BEFORE the daemon +dispatches it: + +```sql +UPDATE cc_intents +SET status = 'failed', + metadata = COALESCE(metadata, '{}'::jsonb) || jsonb_build_object( + 'cancelled_by', '', + 'cancelled_at', NOW()::text, + 'cancel_reason', '' + ) +WHERE id = '' AND status = 'pending'; +``` + +`executeIntent` only acts on `status = 'pending'` intents (it atomically +claims via `UPDATE ... RETURNING`), so a status change to `failed` +prevents dispatch. + +After the daemon has already called `executeIntent` and the Mercury API +returned success: the payment is in flight at Mercury. Cancellation must +go through Mercury's dashboard or API directly — the local +`cc_actions_log` row is audit-only and reversing it does not unwind a +real ACH transfer. + +## Audit query + +To inspect every payment-related action for a given intent: + +```sql +SELECT + id, + intent_id, + attempt, + idempotency_key, + action_type, + target_type, + target_id, + description, + status, + error_message, + request_payload, + response_payload, + metadata, + executed_at +FROM cc_actions_log +WHERE action_type IN ('payment', 'payment_refusal') + AND intent_id = ''::uuid +ORDER BY executed_at ASC; +``` + +For a recipient-level audit: + +```sql +SELECT id, intent_id, status, description, executed_at +FROM cc_actions_log +WHERE action_type IN ('payment', 'payment_refusal') + AND target_id = '' +ORDER BY executed_at DESC +LIMIT 50; +``` + +For the chat surface (no `intent_id`): + +```sql +SELECT id, status, description, metadata, executed_at +FROM cc_actions_log +WHERE action_type = 'payment' + AND intent_id IS NULL +ORDER BY executed_at DESC +LIMIT 50; +``` + +## Idempotency + +The dispatcher computes a deterministic idempotency key per attempt: + +``` +sha256("{intent.id}:{attempt}:{intent_type}") +``` + +The partial unique index +`cc_actions_log (intent_id, idempotency_key) WHERE intent_id IS NOT NULL +AND idempotency_key IS NOT NULL` enforces single-row-per-attempt at the +database level. The dispatcher also short-circuits on any prior terminal +(`completed` or `failed`) row for the same `intent_id`, so a replay of a +finished intent returns the prior result without re-executing. + +The Mercury API call passes `ctx.idempotencyKey` as Mercury's own +`idempotencyKey`, so Mercury de-dupes on the same value if the executor +is re-invoked between writing the audit row and Mercury responding. + +## Related files + +- `meta/executors/mercury-payment.ts` — executor + pure runner +- `meta/executors/dispatch.ts` — dispatcher (sovereignty re-reckon, audit write) +- `meta/sovereignty.ts` — sovereignty gate (calls trust.chitty.cc) +- `src/agents/tools/actions.ts` — chat surface (`execute_payment` tool) +- `src/lib/integrations.ts` — Mercury API client (`mercuryClient`) +- `tests/meta/executors/mercury-payment.spec.ts` — refusal-path integration tests diff --git a/meta/executors/index.ts b/meta/executors/index.ts index f017800..fd99c55 100644 --- a/meta/executors/index.ts +++ b/meta/executors/index.ts @@ -13,3 +13,4 @@ export { dispatch } from './dispatch'; // Side-effect imports: each executor file calls registerExecutor() at top level. import './update-obligation-status'; +import './mercury-payment'; diff --git a/meta/executors/mercury-payment.ts b/meta/executors/mercury-payment.ts new file mode 100644 index 0000000..6ef0c3e --- /dev/null +++ b/meta/executors/mercury-payment.ts @@ -0,0 +1,283 @@ +/** + * Executor: mercury_payment + * + * Canonical URI: chittycanon://core/services/chittycommand/executors/mercury_payment + * + * REAL-MONEY PATH. Higher constraint bar than other executors: + * + * 1. Sovereignty must be `autonomous` AND the snapshot in ctx must be at most + * MERCURY_SOVEREIGNTY_FRESHNESS_MS old at executor entry (60s — tighter + * than the default 5min). This is belt-and-suspenders: the daemon / + * orchestrator that calls `executeIntent` SHOULD also pass + * `freshnessMs: MERCURY_SOVEREIGNTY_FRESHNESS_MS` so the dispatcher + * re-reckons against trust.chitty.cc first. The executor refusing here + * is the second gate, not the only gate. + * 2. Amount cap: any single intent with `amount_cents > cap` is refused. + * Cap is `env.MERCURY_AUTONOMOUS_AMOUNT_CAP_USD` (whole-dollar string), + * default 500. Higher amounts must be routed through the dashboard + * human-approval path; they are NOT autonomous-executable here. + * 3. Idempotency: the unique partial index on + * `cc_actions_log (intent_id, idempotency_key) WHERE intent_id IS NOT + * NULL AND idempotency_key IS NOT NULL` prevents double-execute. The + * dispatcher (meta/executors/dispatch.ts) computes the idempotency key + * as `sha256("{intent.id}:{attempt}:{intent_type}")` and passes it via + * `ctx.idempotencyKey`. Because `intent.id` is immutable and `attempt` + * is derived deterministically from prior `cc_actions_log` rows, a + * replay of the same intent reuses the same key — functionally + * equivalent to a payload-derived key for replay protection. The + * Mercury API call itself uses `ctx.idempotencyKey` as the Mercury + * `idempotencyKey`, so Mercury also de-dupes on the same value. + * 4. NEVER log raw API keys, account numbers, routing numbers, or PII. + * `responsePayload` carries `transaction_id`, `status`, `amount`, + * `recipient_id`, last-4 only when available. `requestPayload` (set by + * dispatcher from `intent.payload`) only contains the intent payload + * shape — no token or routing data. + * + * The cc_actions_log audit row is written by the dispatcher (with + * `intent_id`, `attempt`, `idempotency_key` populated). This file emits + * only the domain effect (the Mercury API call) and returns the audit + * summary the dispatcher folds into the row. + * + * Sibling chat surface: `src/agents/tools/actions.ts::execute_payment` + * delegates to `runMercuryPayment` here so chat + autonomous paths share + * the same implementation. See ADR-001 amendment (PR-A). + * + * @canonical-uri chittycanon://core/services/chittycommand/executors/mercury_payment + */ + +import { z } from 'zod'; +import type { Env } from '../../src/index'; +import { mercuryClient } from '../../src/lib/integrations'; +import type { ExecutorContext, ExecutorRunOutput, IntentExecutor } from './types'; +import { registerExecutor } from './registry'; + +export const MERCURY_PAYMENT_INTENT = 'mercury_payment'; + +/** + * Money-path freshness window (60s). Tighter than the default 5min in + * `SOVEREIGNTY_FRESHNESS_MS`. Callers of `executeIntent` for this + * intent_type SHOULD pass this as `freshnessMs` so the dispatcher re-reckons + * sovereignty before invoking the executor; the executor's own check below + * is a safety net. + */ +export const MERCURY_SOVEREIGNTY_FRESHNESS_MS = 60_000; + +/** Default cap if env.MERCURY_AUTONOMOUS_AMOUNT_CAP_USD is unset. */ +const DEFAULT_AMOUNT_CAP_USD = 500; + +export const mercuryPaymentPayloadSchema = z.object({ + account_slug: z + .string() + .min(1) + .describe('Mercury org slug (e.g., "aribia-llc") for KV token lookup'), + mercury_account_id: z.string().min(1).describe('Mercury account to debit'), + recipient_id: z.string().min(1).describe('Mercury recipient ID'), + amount_cents: z + .number() + .int() + .positive() + .describe('Payment amount in USD cents'), + currency: z.literal('USD'), + memo: z.string().max(280).optional(), + obligation_id: z.string().uuid().optional(), + recipient_account_last4: z + .string() + .regex(/^\d{4}$/) + .optional() + .describe('Last-4 of recipient account for audit (optional, opaque otherwise)'), +}); + +export type MercuryPaymentPayload = z.infer; + +export interface MercuryPaymentRunResult { + ok: boolean; + transactionId?: string; + mercuryStatus?: string; + refusalReason?: + | 'sovereignty_stale' + | 'sovereignty_not_autonomous' + | 'amount_cap_exceeded' + | 'invalid_payload' + | 'missing_token' + | 'mercury_api_failure'; + errorMessage?: string; +} + +/** + * Pure runner — shared by ActionAgent chat tool (`execute_payment`) and the + * executor below. Does NOT write `cc_actions_log`. Returns a structured + * result the caller folds into its own audit trail. + * + * Refusal semantics: + * - `sovereignty.decision !== 'autonomous'` → refusal, no Mercury call. + * - sovereignty snapshot older than `MERCURY_SOVEREIGNTY_FRESHNESS_MS` → + * refusal, no Mercury call. + * - amount > cap → refusal, no Mercury call. + * - Mercury returns null (HTTP error, network) → `mercury_api_failure`. + */ +export async function runMercuryPayment(args: { + env: Env; + payload: MercuryPaymentPayload; + sovereignty: { decision: string; assessedAt: string }; + idempotencyKey: string; + now?: number; +}): Promise { + const { env, payload, sovereignty, idempotencyKey } = args; + const now = args.now ?? Date.now(); + + if (sovereignty.decision !== 'autonomous') { + return { + ok: false, + refusalReason: 'sovereignty_not_autonomous', + errorMessage: `sovereignty: requires_human or blocked (decision='${sovereignty.decision}')`, + }; + } + + const assessedAt = Date.parse(sovereignty.assessedAt); + if (!Number.isFinite(assessedAt) || now - assessedAt > MERCURY_SOVEREIGNTY_FRESHNESS_MS) { + return { + ok: false, + refusalReason: 'sovereignty_stale', + errorMessage: `sovereignty snapshot older than ${MERCURY_SOVEREIGNTY_FRESHNESS_MS}ms — re-reckon required for money path`, + }; + } + + const capUsdRaw = env.MERCURY_AUTONOMOUS_AMOUNT_CAP_USD; + const capUsd = + capUsdRaw && !Number.isNaN(Number(capUsdRaw)) + ? Math.floor(Number(capUsdRaw)) + : DEFAULT_AMOUNT_CAP_USD; + const capCents = capUsd * 100; + if (payload.amount_cents > capCents) { + return { + ok: false, + refusalReason: 'amount_cap_exceeded', + errorMessage: `amount ${payload.amount_cents} cents exceeds autonomous cap ${capCents} cents (USD ${capUsd}); requires human approval`, + }; + } + + const token = await env.COMMAND_KV.get(`mercury:token:${payload.account_slug}`); + if (!token) { + return { + ok: false, + refusalReason: 'missing_token', + errorMessage: `no Mercury token in KV for account_slug='${payload.account_slug}'`, + }; + } + + const mercury = mercuryClient(token); + const amountUsd = payload.amount_cents / 100; + const result = await mercury.createPayment(payload.mercury_account_id, { + recipientId: payload.recipient_id, + amount: amountUsd, + paymentMethod: 'ach', + idempotencyKey, + note: payload.memo, + }); + + if (!result) { + return { + ok: false, + refusalReason: 'mercury_api_failure', + errorMessage: 'Mercury API returned null (HTTP error or network failure)', + }; + } + + return { + ok: true, + transactionId: result.id, + mercuryStatus: result.status, + }; +} + +/** + * Build a redacted response payload for `cc_actions_log.response_payload`. + * Never include token, full account number, or routing number. + */ +function buildResponsePayload( + payload: MercuryPaymentPayload, + run: MercuryPaymentRunResult, +): Record { + return { + intent_type: MERCURY_PAYMENT_INTENT, + account_slug: payload.account_slug, + recipient_id: payload.recipient_id, + recipient_account_last4: payload.recipient_account_last4 ?? null, + amount_cents: payload.amount_cents, + currency: payload.currency, + transaction_id: run.transactionId ?? null, + mercury_status: run.mercuryStatus ?? null, + refusal_reason: run.refusalReason ?? null, + }; +} + +const executor: IntentExecutor = { + intentType: MERCURY_PAYMENT_INTENT, + canonicalUri: + 'chittycanon://core/services/chittycommand/executors/mercury_payment', + + async run(ctx: ExecutorContext): Promise { + const parsed = mercuryPaymentPayloadSchema.safeParse(ctx.intent.payload); + if (!parsed.success) { + return { + ok: false, + description: `mercury_payment payload validation failed for intent ${ctx.intent.id}`, + actionType: 'payment_refusal', + targetType: 'recipient', + targetId: null, + status: 'failed', + errorMessage: `invalid_payload: ${parsed.error.message}`, + metadata: { refusal_reason: 'invalid_payload' }, + }; + } + const payload = parsed.data; + + const run = await runMercuryPayment({ + env: ctx.env, + payload, + sovereignty: { + decision: ctx.sovereignty.decision, + assessedAt: ctx.sovereignty.assessedAt, + }, + idempotencyKey: ctx.idempotencyKey, + }); + + const responsePayload = buildResponsePayload(payload, run); + // target_id is a UUID column in cc_actions_log — we cannot put Mercury's + // string recipient_id there. Use the linked obligation_id when present + // (UUID), else null. recipient_id is carried in response_payload and + // target_type='recipient' so the audit row still indexes the recipient + // via metadata. + const targetId = payload.obligation_id ?? null; + + if (!run.ok) { + const reason = run.refusalReason ?? 'unknown'; + return { + ok: false, + description: `mercury_payment refused: ${reason}`, + actionType: 'payment_refusal', + targetType: 'recipient', + targetId, + status: 'failed', + errorMessage: run.errorMessage ?? reason, + responsePayload, + metadata: { refusal_reason: reason }, + }; + } + + return { + ok: true, + description: `mercury_payment: USD ${(payload.amount_cents / 100).toFixed(2)} to recipient ${payload.recipient_id} (tx ${run.transactionId})`, + actionType: 'payment', + targetType: 'recipient', + targetId, + status: 'completed', + responsePayload, + metadata: { mercury_status: run.mercuryStatus }, + }; + }, +}; + +registerExecutor(executor); + +export default executor; diff --git a/src/agents/tools/actions.ts b/src/agents/tools/actions.ts index ea0a7f1..bac711f 100644 --- a/src/agents/tools/actions.ts +++ b/src/agents/tools/actions.ts @@ -3,13 +3,14 @@ import { z } from 'zod'; import type { NeonQueryFunction } from '@neondatabase/serverless'; import type { Env } from '../../index'; import { mercuryClient } from '../../lib/integrations'; -// Canonical executor (registry-backed). ActionAgent's chat tool here wraps -// the same pure runner the meta-orchestrator dispatcher invokes — sibling +// Canonical executors (registry-backed). ActionAgent's chat tools here wrap +// the same pure runners the meta-orchestrator dispatcher invokes — sibling // surfaces, shared implementation. See ADR-001 amendment (PR-A). import { updateObligationStatusSchema, runUpdateObligationStatus, } from '../../../meta/executors/update-obligation-status'; +import { runMercuryPayment } from '../../../meta/executors/mercury-payment'; /** * Create action execution tools bound to environment and SQL. @@ -30,55 +31,60 @@ export function createActionTools(env: Env, sql: NeonQueryFunction obligation_id: z.string().uuid().optional().describe('Link payment to this obligation'), }), execute: async ({ account_slug, mercury_account_id, recipient_id, amount, note, obligation_id }) => { - // Get Mercury token from KV - const token = await env.COMMAND_KV.get(`mercury:token:${account_slug}`); - if (!token) { - return { success: false, error: `No Mercury token for org "${account_slug}". Run token refresh first.` }; - } - - const mercury = mercuryClient(token); + // Chat surface delegates to the canonical executor's pure runner so + // chat + autonomous paths share the same Mercury call, sovereignty + // gate, and amount cap. The chat path supplies an autonomous + fresh + // sovereignty snapshot because user-approval in chat is the gating + // event for this surface. See ADR-001 amendment (PR-A) and + // meta/executors/mercury-payment.ts. const idempotencyKey = crypto.randomUUID(); - - const result = await mercury.createPayment(mercury_account_id, { - recipientId: recipient_id, - amount, - paymentMethod: 'ach', + const amountCents = Math.round(amount * 100); + const run = await runMercuryPayment({ + env, + payload: { + account_slug, + mercury_account_id, + recipient_id, + amount_cents: amountCents, + currency: 'USD', + memo: note || undefined, + obligation_id, + }, + sovereignty: { decision: 'autonomous', assessedAt: new Date().toISOString() }, idempotencyKey, - note: note || undefined, }); - if (!result) { - // Log failed attempt + if (!run.ok) { + // Chat path keeps its own audit row (no intent_id) — preserves + // existing chat audit behavior exactly. await sql` INSERT INTO cc_actions_log (action_type, target_type, target_id, description, status, metadata) VALUES ('payment', 'obligation', ${obligation_id || null}, - ${`Mercury ACH $${amount.toFixed(2)} to ${recipient_id} — FAILED`}, 'failed', - ${JSON.stringify({ account_slug, mercury_account_id, recipient_id, amount, idempotencyKey })}::jsonb) + ${`Mercury ACH $${amount.toFixed(2)} to ${recipient_id} — ${run.refusalReason ?? 'FAILED'}`}, 'failed', + ${JSON.stringify({ account_slug, mercury_account_id, recipient_id, amount_cents: amountCents, idempotencyKey, refusal_reason: run.refusalReason })}::jsonb) `; - return { success: false, error: 'Mercury API rejected the payment. Check logs for details.' }; + return { success: false, error: run.errorMessage ?? 'Mercury payment refused or failed.' }; } - // Log successful payment await sql` INSERT INTO cc_actions_log (action_type, target_type, target_id, description, status, metadata) VALUES ('payment', 'obligation', ${obligation_id || null}, - ${`Mercury ACH $${amount.toFixed(2)} — tx ${result.id}`}, 'completed', - ${JSON.stringify({ ...result, account_slug, idempotencyKey })}::jsonb) + ${`Mercury ACH $${amount.toFixed(2)} — tx ${run.transactionId}`}, 'completed', + ${JSON.stringify({ transaction_id: run.transactionId, mercury_status: run.mercuryStatus, account_slug, idempotencyKey })}::jsonb) `; - // Update obligation if linked if (obligation_id) { await sql` UPDATE cc_obligations SET status = 'paid', updated_at = NOW(), metadata = COALESCE(metadata, '{}'::jsonb) || ${JSON.stringify({ - last_payment: { amount, mercury_tx_id: result.id, date: new Date().toISOString() }, + last_payment: { amount, mercury_tx_id: run.transactionId, date: new Date().toISOString() }, })}::jsonb WHERE id = ${obligation_id}::uuid `; } - return { success: true, transaction_id: result.id, amount, status: result.status }; + return { success: true, transaction_id: run.transactionId, amount, status: run.mercuryStatus }; }, }), diff --git a/src/index.ts b/src/index.ts index 01093bc..4fad1d1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -79,6 +79,13 @@ export type Env = { CHITTYROUX_MARKETPLACE_OAUTH_CLIENT_SECRET?: string; REGISTERED_CHANNELS_JSON?: string; GCP_JWKS_URL?: string; + /** + * Per-intent autonomous-execution cap for mercury_payment, in whole USD. + * Amounts strictly greater than this are refused by the executor and must + * be approved via the dashboard. Default 500 if unset. See + * meta/executors/mercury-payment.ts and docs/runbooks/mercury-payment-executor.md. + */ + MERCURY_AUTONOMOUS_AMOUNT_CAP_USD?: string; }; const app = new Hono<{ Bindings: Env; Variables: AuthVariables }>(); diff --git a/tests/meta/executors/mercury-payment.spec.ts b/tests/meta/executors/mercury-payment.spec.ts new file mode 100644 index 0000000..27bfabf --- /dev/null +++ b/tests/meta/executors/mercury-payment.spec.ts @@ -0,0 +1,218 @@ +/** + * Integration test for meta/executors/mercury-payment.ts. + * + * Covers (REAL MONEY PATH — additional gates beyond PR #106): + * - Refusal: sovereignty snapshot older than 60s (executor's belt-and- + * suspenders gate; we DELIBERATELY do not override `freshnessMs` on + * executeIntent so the snapshot survives dispatch's default 5min + * window and reaches the executor stale-by-money-path-standard). + * - Refusal: amount cap exceeded. + * - Idempotency: second executeIntent for the same intent does not + * re-call Mercury and replays the prior cc_actions_log row. + * + * Real Neon only. Skipped without DATABASE_URL. Mercury network calls are + * NEVER made by these tests — every test exercises a refusal path that + * terminates BEFORE the executor reaches `mercuryClient.createPayment`. + * + * To exercise an actual Mercury sandbox call, set + * MERCURY_INTEGRATION_TEST=1 + * and provide a Mercury token in KV for `mercury:token:test-sandbox` plus a + * real sandbox recipient — that branch is intentionally not implemented in + * this file because the chittycommand repo has no Mercury sandbox harness. + * The operator runbook documents the manual procedure. + */ + +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { neon } from '@neondatabase/serverless'; +import { + createGoal, + createPlan, + createIntent, + executeIntent, + type IntentEnv, + type SovereigntyAssessmentSnapshot, +} from '../../../meta/intent'; +import '../../../meta/executors'; +import { + MERCURY_PAYMENT_INTENT, + MERCURY_SOVEREIGNTY_FRESHNESS_MS, +} from '../../../meta/executors/mercury-payment'; + +const DATABASE_URL = process.env.DATABASE_URL; +const SKIP = !DATABASE_URL || process.env.SKIP_INTEGRATION === '1'; + +// Minimal KV stub — never returns a token, so any code path that reaches the +// token lookup will refuse with 'missing_token' instead of attempting a +// Mercury HTTP call. All three primary refusal cases below short-circuit +// BEFORE this is touched; it is here only as a final safety net. +const KV_STUB = { + get: async () => null, + put: async () => undefined, + delete: async () => undefined, + list: async () => ({ keys: [], list_complete: true, cursor: '' }), +} as unknown as KVNamespace; + +const env: IntentEnv & Record = { + DATABASE_URL, + MERCURY_AUTONOMOUS_AMOUNT_CAP_USD: '500', + COMMAND_KV: KV_STUB, +}; + +const OWNER = '01-A-NB-0001-P-66-1-1'; +const TEST_TAG = `merc-exec-test-${Date.now()}-${Math.floor(Math.random() * 1e6)}`; + +async function cleanup() { + if (!DATABASE_URL) return; + const sql = neon(DATABASE_URL); + await sql`DELETE FROM cc_goals WHERE owner_chitty_id = ${OWNER} AND title LIKE ${TEST_TAG + '%'}`; +} + +interface MakeIntentArgs { + amountCents: number; + assessedAt: string; + decision?: SovereigntyAssessmentSnapshot['decision']; +} + +async function makeIntent(args: MakeIntentArgs): Promise { + const goal = await createGoal(env, { + ownerChittyId: OWNER, + title: `${TEST_TAG}-goal-${crypto.randomUUID().slice(0, 8)}`, + }); + const plan = await createPlan(env, { + goalId: goal.id, + title: `${TEST_TAG}-plan`, + }); + const sovereignty: SovereigntyAssessmentSnapshot = { + decision: args.decision ?? 'autonomous', + trustScore: 0.95, + reasoning: 'pre-seeded for mercury_payment integration test', + assessedAt: args.assessedAt, + }; + const intent = await createIntent(env, { + planId: plan.id, + goalId: goal.id, + intentType: MERCURY_PAYMENT_INTENT, + payload: { + account_slug: 'test-sandbox', + mercury_account_id: 'acct_test_0001', + recipient_id: 'rcpt_test_0001', + amount_cents: args.amountCents, + currency: 'USD', + memo: TEST_TAG, + }, + sovereigntyAssessment: sovereignty, + metadata: { actorChittyId: OWNER }, + }); + return intent.id; +} + +describe.skipIf(SKIP)('meta/executors/mercury-payment — refusal gates (real Neon)', () => { + beforeAll(async () => { + await cleanup(); + }); + afterAll(async () => { + await cleanup(); + }); + + it('refuses when sovereignty snapshot is older than 60s (money-path freshness)', async () => { + // Snapshot is 2 minutes old. Dispatch's default freshness is 5 minutes, + // so dispatch passes the snapshot through unmodified. The executor's + // own 60s check then catches it. + const twoMinAgo = new Date(Date.now() - 2 * 60_000).toISOString(); + const intentId = await makeIntent({ amountCents: 1_00, assessedAt: twoMinAgo }); + + // No freshnessMs override — we want dispatch's 5-min window so the + // stale-by-money-standard snapshot reaches the executor. + const result = await executeIntent(env, intentId, { actorChittyId: OWNER }); + + expect(result.ok).toBe(false); + expect(result.error).toMatch(/sovereignty snapshot older than/); + + const sql = neon(DATABASE_URL!); + const rows = (await sql` + SELECT action_type, status, error_message, idempotency_key, attempt, metadata + FROM cc_actions_log + WHERE intent_id = ${intentId}::uuid + `) as unknown as Array<{ + action_type: string; + status: string; + error_message: string; + idempotency_key: string; + attempt: number; + metadata: Record; + }>; + expect(rows.length).toBe(1); + expect(rows[0].action_type).toBe('payment_refusal'); + expect(rows[0].status).toBe('failed'); + expect(rows[0].attempt).toBe(1); + expect(rows[0].idempotency_key).toMatch(/^[0-9a-f]{64}$/); + expect(rows[0].error_message).toMatch(/sovereignty snapshot older than/); + }); + + it('refuses when amount_cents exceeds the autonomous cap', async () => { + const fresh = new Date().toISOString(); + // Cap is 500 USD = 50_000 cents. 50_001 must refuse. + const intentId = await makeIntent({ amountCents: 50_001, assessedAt: fresh }); + + // Pass tight freshness window so dispatch does not re-reckon (which + // would call trust.chitty.cc in the test env). The executor still + // enforces the amount cap. + const result = await executeIntent(env, intentId, { + actorChittyId: OWNER, + freshnessMs: MERCURY_SOVEREIGNTY_FRESHNESS_MS, + }); + + expect(result.ok).toBe(false); + expect(result.error).toMatch(/exceeds autonomous cap/); + + const sql = neon(DATABASE_URL!); + const rows = (await sql` + SELECT action_type, status, error_message, metadata + FROM cc_actions_log + WHERE intent_id = ${intentId}::uuid + `) as unknown as Array<{ + action_type: string; + status: string; + error_message: string; + metadata: Record; + }>; + expect(rows.length).toBe(1); + expect(rows[0].action_type).toBe('payment_refusal'); + expect(rows[0].status).toBe('failed'); + expect((rows[0].metadata as { refusal_reason?: string }).refusal_reason).toBe( + 'amount_cap_exceeded', + ); + }); + + it('is idempotent on replay — second executeIntent reuses the prior audit row', async () => { + // Use an under-cap amount with a fresh snapshot. KV stub returns null, + // so the executor refuses at the 'missing_token' gate (still a refusal + // path; the dispatcher writes a single audit row). Replay of the same + // intent MUST short-circuit on the dispatcher's prior-terminal-row + // lookup and not produce a second row. + const fresh = new Date().toISOString(); + const intentId = await makeIntent({ amountCents: 1_00, assessedAt: fresh }); + + const first = await executeIntent(env, intentId, { + actorChittyId: OWNER, + freshnessMs: MERCURY_SOVEREIGNTY_FRESHNESS_MS, + }); + expect(first.ok).toBe(false); + expect(first.replayed).toBeFalsy(); + expect(first.idempotencyKey).toMatch(/^[0-9a-f]{64}$/); + + const second = await executeIntent(env, intentId, { + actorChittyId: OWNER, + freshnessMs: MERCURY_SOVEREIGNTY_FRESHNESS_MS, + }); + expect(second.replayed).toBe(true); + expect(second.idempotencyKey).toBe(first.idempotencyKey); + expect(second.actionLogId).toBe(first.actionLogId); + + const sql = neon(DATABASE_URL!); + const rows = (await sql` + SELECT id FROM cc_actions_log WHERE intent_id = ${intentId}::uuid + `) as unknown as Array<{ id: string }>; + expect(rows.length).toBe(1); + }); +}); From 045a40da4bb0d408f941622e72a108615435a29f Mon Sep 17 00:00:00 2001 From: chitcommit <208086304+chitcommit@users.noreply.github.com> Date: Thu, 4 Jun 2026 10:11:23 +0000 Subject: [PATCH 2/4] =?UTF-8?q?fix(mercury):=20close=204=20critical=20sile?= =?UTF-8?q?nt=20failures=20=E2=80=94=20discriminated=20union,=20atomic=20a?= =?UTF-8?q?udit,=20real=20sovereignty,=20fail-closed=20(PR=20#108)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Real-money attack/loss vectors fixed in the mercury_payment executor: 1. Mercury post() helper now returns a discriminated PostResult — every failure mode (network / http / parse / 409 idempotency_collision) is surfaced to the executor instead of collapsing to null. runMercuryPayment and the audit row carry httpStatus + bodySnippet + failureKind so operators can diagnose without re-calling Mercury. 2. Executor maps Mercury's 2xx body.status into the audit vocabulary: sent/posted/delivered -> completed, pending -> in_progress, failed -> failed (refusal), requires_review -> pending_review. No more blanket "completed" stamp on 2xx-with-error-envelope. 3. Audit row is PRE-WRITTEN as in_flight BEFORE the Mercury call and UPDATED in place afterwards. Idempotency key is now deterministic on intent_id only (NOT attempt), so the partial unique index on (intent_id, idempotency_key) blocks duplicate placeholders across retries and Mercury de-dupes on the same key end-to-end. A retry that finds a prior in_flight row refuses with in_flight_unknown (the only safe default — Mercury state must be reconciled by an operator). 4. Chat surface (src/agents/tools/actions.ts::execute_payment) no longer passes a synthetic { decision: 'autonomous' } snapshot to runMercuryPayment. The chat tool factory has no access to the chat actor ChittyID and therefore cannot perform a real assessSovereignty() call, so it REFUSES Mercury payments with a "use the dashboard" message and audits the attempt. This closes the silent-bypass of the money-path sovereignty gate. 5. failIntent() failures in dispatch.ts are no longer swallowed by .catch(() => null). A new safeFailIntent() wrapper logs a stable "audit_write_failed_during_failIntent" token to console.error and re-throws so the daemon's outer loop catches and applies backoff. 6. account_slug is normalized (lowercase + [a-z0-9-]) BEFORE the KV lookup; if normalization changes the value the input is refused with invalid_account_slug — no silent fallback to a different token. Tests: 8 new failure-path tests drive runMercuryPayment with an injected fetch double (real Response objects — no mocks of Mercury or the DB). Existing 3 DB integration tests untouched and still pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- meta/executors/dispatch.ts | 262 +++++++++++++----- meta/executors/mercury-payment.ts | 172 +++++++++++- meta/executors/types.ts | 13 +- src/agents/tools/actions.ts | 87 ++---- src/lib/integrations.ts | 73 ++++- .../mercury-payment-failures.spec.ts | 247 +++++++++++++++++ 6 files changed, 697 insertions(+), 157 deletions(-) create mode 100644 tests/meta/executors/mercury-payment-failures.spec.ts diff --git a/meta/executors/dispatch.ts b/meta/executors/dispatch.ts index 4d16b68..b8589f0 100644 --- a/meta/executors/dispatch.ts +++ b/meta/executors/dispatch.ts @@ -41,14 +41,24 @@ function getSql(env: Env): NeonQueryFunction { /** * Stable, content-addressable idempotency key. - * Formula: sha256("{intent.id}:{attempt}:{intent.intentType}") + * + * Formula: sha256("{intent.id}:{intent.intentType}") + * + * The key is deterministic on `intent.id` (NOT `attempt`) so that: + * 1. The partial unique index on `cc_actions_log (intent_id, idempotency_key)` + * prevents duplicate audit rows across daemon retries — every retry of + * the same intent reuses the same key and the index rejects the second + * INSERT. + * 2. Mercury de-dupes on the same value end-to-end — a retry after a Neon + * blip cannot cause double-spend because Mercury sees the same key. + * 3. The pre-write in_flight row uses this key, and a successful run + * UPDATEs that same row in place (see writePreAudit / updateAuditRow). */ async function computeIdempotencyKey( intentId: string, - attempt: number, intentType: string, ): Promise { - const data = new TextEncoder().encode(`${intentId}:${attempt}:${intentType}`); + const data = new TextEncoder().encode(`${intentId}:${intentType}`); const digest = await crypto.subtle.digest('SHA-256', data); return [...new Uint8Array(digest)] .map((b) => b.toString(16).padStart(2, '0')) @@ -83,50 +93,67 @@ export async function dispatch( ): Promise { const sql = getSql(env); const freshnessMs = options.freshnessMs ?? SOVEREIGNTY_FRESHNESS_MS; + const idempotencyKey = await computeIdempotencyKey(intent.id, intent.intentType); - // 1. Compute attempt number (prior rows + 1) and per-attempt idempotency - // key. The partial unique index on (intent_id, idempotency_key) backs - // the per-attempt invariant. - const [{ count: priorCount } = { count: 0 }] = (await sql` - SELECT COUNT(*)::int AS count FROM cc_actions_log WHERE intent_id = ${intent.id}::uuid - `) as unknown as Array<{ count: number }>; - const attempt = (priorCount ?? 0) + 1; - const idempotencyKey = await computeIdempotencyKey( - intent.id, - attempt, - intent.intentType, - ); - - // 2. Replay short-circuit (FIX 1, PR #106 critical): match on - // (intent_id, idempotency_key) — NOT intent_id alone. Matching on - // intent_id alone would short-circuit any new attempt (whose key - // differs by `attempt`), making per-attempt retries unreachable for - // any intent that ever produced a terminal row. + // 1. Replay / safety lookup. With the key deterministic on intent_id, every + // prior row for this intent shares the same idempotency_key. We look up + // by (intent_id, idempotency_key) regardless of status because: + // - 'completed' / 'failed' / 'pending_review' / 'in_progress' → + // terminal-or-known; short-circuit and replay the prior outcome. + // - 'in_flight' → the previous attempt's outcome is UNKNOWN. Mercury + // may or may not have moved money. We MUST NOT re-call Mercury. + // Refuse with `in_flight_unknown`; the operator runbook resolves by + // querying Mercury directly using this idempotency key and then + // manually setting the row to its true terminal state. const priorRows = (await sql` - SELECT id, status, response_payload, error_message + SELECT id, status, response_payload, error_message, idempotency_key, attempt FROM cc_actions_log WHERE intent_id = ${intent.id}::uuid AND idempotency_key = ${idempotencyKey} - AND status IN ('completed', 'failed') + ORDER BY executed_at DESC LIMIT 1 `) as unknown as Array<{ id: string; status: string; response_payload: Record | null; error_message: string | null; + idempotency_key: string; + attempt: number; }>; if (priorRows[0]) { const prior = priorRows[0]; + if (prior.status === 'in_flight') { + const errMsg = + `prior attempt is in_flight (audit_log id=${prior.id}, idempotency_key=${idempotencyKey}) — ` + + `Mercury state is unknown; operator must reconcile before retry`; + console.error(`[meta/executors/dispatch] in_flight_unknown for intent ${intent.id}: ${errMsg}`); + return { + ok: false, + idempotencyKey, + actionLogId: prior.id, + error: errMsg, + replayed: true, + }; + } + // Any other status → known terminal outcome, replay it. return { ok: prior.status === 'completed', idempotencyKey, - actionLogId: String(prior.id), + actionLogId: prior.id, data: prior.response_payload ?? undefined, error: prior.error_message ?? undefined, replayed: true, }; } + // 2. Compute attempt number. With the deterministic key, `attempt` is now + // purely audit metadata (the unique partial index dedupes us, not the + // attempt number). Still useful for operators inspecting retry history. + const [{ count: priorCount } = { count: 0 }] = (await sql` + SELECT COUNT(*)::int AS count FROM cc_actions_log WHERE intent_id = ${intent.id}::uuid + `) as unknown as Array<{ count: number }>; + const attempt = (priorCount ?? 0) + 1; + // 3. Re-reckon sovereignty if snapshot stale. let sovereignty: SovereigntyAssessmentSnapshot; if (isAssessmentFresh(intent.sovereigntyAssessment, freshnessMs)) { @@ -157,11 +184,8 @@ export async function dispatch( requestPayload: intent.payload, metadata: { reason: 'no_actor_for_reckon' }, }); - await failIntent(env, intent.id, errMsg).catch(() => null); - // FIX 2 (PR #106 critical): replayed:true tells executeIntent's - // `!result.replayed` guard to skip its own failIntent — dispatch has - // already written the audit row + transitioned status. - return { ok: false, idempotencyKey, error: errMsg, replayed: true }; + await safeFailIntent(env, intent.id, errMsg); + return { ok: false, idempotencyKey, error: errMsg }; } const result = await assessSovereignty( actor, @@ -190,15 +214,12 @@ export async function dispatch( requestPayload: intent.payload, metadata: { sovereignty }, }); - await failIntent(env, intent.id, refusal).catch(() => null); - // FIX 2 (PR #106 critical): replayed:true tells executeIntent's - // `!result.replayed` guard to skip its own failIntent. + await safeFailIntent(env, intent.id, refusal); return { ok: false, idempotencyKey, actionLogId: auditId, error: refusal, - replayed: true, }; } } @@ -210,7 +231,43 @@ export async function dispatch( throw new Error(errMsg); } - // 5. Execute. + // 5. PRE-WRITE the audit row as `in_flight` BEFORE invoking the executor. + // This is the atomicity guarantee for the money path: if the executor + // moves money and the post-update fails (e.g., Neon outage), the + // in_flight row still exists and a retry will see it via the prior-row + // lookup above and refuse with `in_flight_unknown`. The operator runbook + // then reconciles by querying Mercury with `idempotencyKey`. + // + // The partial unique index on (intent_id, idempotency_key) prevents two + // concurrent dispatchers from racing to create the same in_flight row — + // the second INSERT will fail with a unique violation. We let that throw + // so the daemon's outer loop treats it as a retryable error. + let auditId: string; + try { + auditId = await writeAuditRow(sql, { + intentId: intent.id, + attempt, + idempotencyKey, + actionType: 'payment_in_flight', + targetType: 'intent', + targetId: intent.id, + description: `mercury_payment in_flight for intent ${intent.id}`, + status: 'in_flight', + errorMessage: null, + responsePayload: null, + requestPayload: intent.payload, + metadata: { sovereignty, canonicalUri: executor.canonicalUri, phase: 'pre_execute' }, + }); + } catch (err) { + // Pre-write failed — money has NOT moved. Surface to caller; daemon + // retries with backoff. failIntent is NOT called (the intent stays + // claimable on the next pass). + const errMsg = err instanceof Error ? err.message : String(err); + console.error(`[meta/executors/dispatch] pre-write audit failed for intent ${intent.id}: ${errMsg}`); + throw new Error(`audit_write_failed_pre_execute: ${errMsg}`); + } + + // 6. Execute. const ctx: ExecutorContext = { env, sql, @@ -224,42 +281,61 @@ export async function dispatch( runOutput = await executor.run(ctx); } catch (err) { const errMsg = err instanceof Error ? err.message : String(err); - const auditId = await writeAuditRow(sql, { - intentId: intent.id, - attempt, - idempotencyKey, - actionType: 'executor_error', - targetType: 'intent', - targetId: intent.id, - description: `executor threw: ${errMsg}`, - status: 'failed', - errorMessage: errMsg, - responsePayload: null, - requestPayload: intent.payload, - metadata: { sovereignty, canonicalUri: executor.canonicalUri }, - }); + // Executor threw — update the in_flight row to failed (NOT a new row; + // same idempotency key, same record). If the update itself throws, we + // surface to the daemon so the row remains `in_flight` and a retry will + // hit the `in_flight_unknown` branch. + try { + await updateAuditRow(sql, auditId, { + actionType: 'executor_error', + description: `executor threw: ${errMsg}`, + status: 'failed', + errorMessage: errMsg, + responsePayload: null, + metadata: { sovereignty, canonicalUri: executor.canonicalUri, phase: 'executor_threw' }, + }); + } catch (updateErr) { + const updateMsg = updateErr instanceof Error ? updateErr.message : String(updateErr); + console.error( + `[meta/executors/dispatch] AUDIT_UPDATE_FAILED_AFTER_EXECUTOR_THREW intent=${intent.id} key=${idempotencyKey}: ${updateMsg}`, + ); + throw new Error( + `audit_update_failed_after_executor_threw: original=${errMsg}; update_error=${updateMsg}`, + ); + } return { ok: false, idempotencyKey, actionLogId: auditId, error: errMsg }; } - // 6. Write audit row. - const auditId = await writeAuditRow(sql, { - intentId: intent.id, - attempt, - idempotencyKey, - actionType: runOutput.actionType, - targetType: runOutput.targetType, - targetId: runOutput.targetId ?? null, - description: runOutput.description, - status: runOutput.status, - errorMessage: runOutput.errorMessage ?? null, - responsePayload: runOutput.responsePayload ?? null, - requestPayload: intent.payload, - metadata: { - sovereignty, - canonicalUri: executor.canonicalUri, - ...(runOutput.metadata ?? {}), - }, - }); + // 7. UPDATE the same audit row in place with the executor result. This is + // the atomicity completion: in_flight → terminal status. Failure here + // leaves the row in_flight, which a retry will treat as + // `in_flight_unknown` (safe — Mercury de-dupes on idempotencyKey, so + // operator reconciliation reveals the true state). + try { + await updateAuditRow(sql, auditId, { + actionType: runOutput.actionType, + description: runOutput.description, + status: runOutput.status, + errorMessage: runOutput.errorMessage ?? null, + responsePayload: runOutput.responsePayload ?? null, + metadata: { + sovereignty, + canonicalUri: executor.canonicalUri, + phase: 'post_execute', + ...(runOutput.metadata ?? {}), + }, + }); + } catch (err) { + const errMsg = err instanceof Error ? err.message : String(err); + console.error( + `[meta/executors/dispatch] AUDIT_UPDATE_FAILED_POST_EXECUTE intent=${intent.id} key=${idempotencyKey} executor_ok=${runOutput.ok}: ${errMsg}`, + ); + // Surface to daemon — the row is still in_flight, so a retry will be + // refused as in_flight_unknown (correct, since money may have moved). + throw new Error( + `audit_update_failed_post_execute: executor_ok=${runOutput.ok}; update_error=${errMsg}`, + ); + } return { ok: runOutput.ok, @@ -270,6 +346,26 @@ export async function dispatch( }; } +/** + * failIntent wrapper that, if failIntent itself throws (e.g., Neon outage), + * (a) logs to console.error with a stable token for operator alerting and + * (b) re-throws so the daemon's outer loop catches and applies backoff. + * Replaces the prior `.catch(() => null)` pattern which silently dropped + * failure information. + */ +async function safeFailIntent(env: Env, intentId: string, reason: string): Promise { + try { + await failIntent(env, intentId, reason); + } catch (err) { + const errMsg = err instanceof Error ? err.message : String(err); + // Stable token so log aggregators / Alchemist alerting can match. + console.error( + `[meta/executors/dispatch] audit_write_failed_during_failIntent intent=${intentId} reason="${reason}" error=${errMsg}`, + ); + throw new Error(`failIntent_threw: intent=${intentId}: ${errMsg}`); + } +} + interface AuditRow { intentId: string; attempt: number; @@ -285,6 +381,38 @@ interface AuditRow { metadata: Record; } +interface AuditUpdate { + actionType: string; + description: string; + status: string; + errorMessage: string | null; + responsePayload: Record | null; + metadata: Record; +} + +/** + * UPDATE an existing audit row by id. Used to transition the in_flight + * placeholder into its terminal status after the executor returns. We + * intentionally do NOT touch intent_id / attempt / idempotency_key / + * request_payload — those were set at pre-write and are immutable. + */ +async function updateAuditRow( + sql: NeonQueryFunction, + id: string, + patch: AuditUpdate, +): Promise { + await sql` + UPDATE cc_actions_log + SET action_type = ${patch.actionType}, + description = ${patch.description}, + status = ${patch.status}, + error_message = ${patch.errorMessage}, + response_payload = ${patch.responsePayload ? JSON.stringify(patch.responsePayload) : null}::jsonb, + metadata = ${JSON.stringify(patch.metadata)}::jsonb + WHERE id = ${id}::uuid + `; +} + async function writeAuditRow( sql: NeonQueryFunction, row: AuditRow, diff --git a/meta/executors/mercury-payment.ts b/meta/executors/mercury-payment.ts index 6ef0c3e..c7585b2 100644 --- a/meta/executors/mercury-payment.ts +++ b/meta/executors/mercury-payment.ts @@ -47,7 +47,7 @@ import { z } from 'zod'; import type { Env } from '../../src/index'; -import { mercuryClient } from '../../src/lib/integrations'; +import { mercuryClient, type FetchImpl } from '../../src/lib/integrations'; import type { ExecutorContext, ExecutorRunOutput, IntentExecutor } from './types'; import { registerExecutor } from './registry'; @@ -93,16 +93,32 @@ export interface MercuryPaymentRunResult { ok: boolean; transactionId?: string; mercuryStatus?: string; + /** Mapped cc_actions_log status — only set when the Mercury call returned + * HTTP 2xx and a parseable body. Mirrors Mercury's status field through + * our audit vocabulary. */ + auditStatus?: 'completed' | 'in_progress' | 'pending_review' | 'failed'; refusalReason?: | 'sovereignty_stale' | 'sovereignty_not_autonomous' | 'amount_cap_exceeded' | 'invalid_payload' | 'missing_token' - | 'mercury_api_failure'; + | 'invalid_account_slug' + | 'mercury_api_failure' + | 'mercury_internal_failure' + | 'idempotency_collision'; errorMessage?: string; + /** HTTP status from Mercury (success or failure) — for audit visibility. */ + httpStatus?: number; + /** First 500 chars of Mercury's response body — never contains tokens. */ + bodySnippet?: string; + /** Failure kind from the discriminated MercuryPostResult, when ok=false. */ + failureKind?: 'network' | 'http' | 'parse' | 'idempotency_collision'; } +/** Allowed account_slug pattern: lowercase alnum + hyphens only. */ +const ACCOUNT_SLUG_RE = /^[a-z0-9-]+$/; + /** * Pure runner — shared by ActionAgent chat tool (`execute_payment`) and the * executor below. Does NOT write `cc_actions_log`. Returns a structured @@ -121,10 +137,34 @@ export async function runMercuryPayment(args: { sovereignty: { decision: string; assessedAt: string }; idempotencyKey: string; now?: number; + /** Test/DI hook: inject a custom fetch so integration tests can drive + * network failures and Mercury 2xx-with-error-envelope without mocking + * mercuryClient itself. Default is global fetch. */ + fetchImpl?: FetchImpl; }): Promise { - const { env, payload, sovereignty, idempotencyKey } = args; + const { env, payload, sovereignty, idempotencyKey, fetchImpl } = args; const now = args.now ?? Date.now(); + // Normalize the account_slug BEFORE any KV lookup. If normalization changes + // the value, the input was malformed — reject as `invalid_account_slug` + // rather than silently looking up a different token (which would have its + // own security implications: a slug "Aribia/../foo" must not silently + // collapse to "aribia-foo"). + const normalizedSlug = payload.account_slug + .toLowerCase() + .replace(/[^a-z0-9-]/g, ''); + if ( + normalizedSlug !== payload.account_slug || + normalizedSlug.length === 0 || + !ACCOUNT_SLUG_RE.test(normalizedSlug) + ) { + return { + ok: false, + refusalReason: 'invalid_account_slug', + errorMessage: `account_slug='${payload.account_slug}' is malformed (must match ${ACCOUNT_SLUG_RE.source})`, + }; + } + if (sovereignty.decision !== 'autonomous') { return { ok: false, @@ -156,16 +196,16 @@ export async function runMercuryPayment(args: { }; } - const token = await env.COMMAND_KV.get(`mercury:token:${payload.account_slug}`); + const token = await env.COMMAND_KV.get(`mercury:token:${normalizedSlug}`); if (!token) { return { ok: false, refusalReason: 'missing_token', - errorMessage: `no Mercury token in KV for account_slug='${payload.account_slug}'`, + errorMessage: `no Mercury token in KV for account_slug='${normalizedSlug}'`, }; } - const mercury = mercuryClient(token); + const mercury = mercuryClient(token, fetchImpl); const amountUsd = payload.amount_cents / 100; const result = await mercury.createPayment(payload.mercury_account_id, { recipientId: payload.recipient_id, @@ -175,21 +215,101 @@ export async function runMercuryPayment(args: { note: payload.memo, }); - if (!result) { + // Branch on the discriminated union — every failure mode has a distinct + // refusal reason and lands in the audit row with httpStatus + bodySnippet so + // operators can diagnose without re-running Mercury. + if (!result.ok) { + if (result.kind === 'idempotency_collision') { + return { + ok: false, + refusalReason: 'idempotency_collision', + failureKind: 'idempotency_collision', + httpStatus: result.httpStatus, + bodySnippet: result.bodySnippet, + errorMessage: + 'Mercury returned 409 idempotency collision — same idempotency key was previously used with a different payload (replay attempt or payload mutation)', + }; + } return { ok: false, refusalReason: 'mercury_api_failure', - errorMessage: 'Mercury API returned null (HTTP error or network failure)', + failureKind: result.kind, + httpStatus: result.httpStatus, + bodySnippet: result.bodySnippet, + errorMessage: `Mercury API ${result.kind} failure${ + result.httpStatus ? ` (HTTP ${result.httpStatus})` : '' + }: ${result.bodySnippet ?? 'no body'}`, + }; + } + + // Mercury can return 2xx with a body whose own `status` field indicates + // failure / pending / review. We MUST NOT blanket-stamp this as `completed`. + // Map Mercury's status into our audit vocabulary. + const rawStatus = (result.body.status ?? '').toLowerCase(); + const { auditStatus, refusalReason, errorMessage } = mapMercuryStatus(rawStatus); + + if (refusalReason) { + return { + ok: false, + refusalReason, + transactionId: result.body.id, + mercuryStatus: result.body.status, + auditStatus, + httpStatus: result.httpStatus, + bodySnippet: result.rawSnippet, + errorMessage, }; } return { ok: true, - transactionId: result.id, - mercuryStatus: result.status, + transactionId: result.body.id, + mercuryStatus: result.body.status, + auditStatus, + httpStatus: result.httpStatus, + bodySnippet: result.rawSnippet, }; } +/** + * Translate Mercury's transaction `status` field into our cc_actions_log + * `status` vocabulary. Per Mercury docs the status field can be one of: + * sent, posted, delivered → completed (money actually moved) + * pending → in_progress (queued, not yet sent) + * failed → failed (rejected by bank / Mercury) + * requires_review → pending_review (manual approval gate) + * Unknown values fall through as `in_progress` with a refusal reason so the + * operator must triage rather than the executor silently treating it as done. + */ +function mapMercuryStatus(rawStatus: string): { + auditStatus: 'completed' | 'in_progress' | 'pending_review' | 'failed'; + refusalReason?: 'mercury_internal_failure'; + errorMessage?: string; +} { + switch (rawStatus) { + case 'sent': + case 'posted': + case 'delivered': + return { auditStatus: 'completed' }; + case 'pending': + return { auditStatus: 'in_progress' }; + case 'failed': + return { + auditStatus: 'failed', + refusalReason: 'mercury_internal_failure', + errorMessage: 'Mercury returned 2xx with status="failed" — payment was not accepted', + }; + case 'requires_review': + return { auditStatus: 'pending_review' }; + default: + return { + auditStatus: 'in_progress', + refusalReason: 'mercury_internal_failure', + errorMessage: `Mercury returned 2xx with unrecognized status="${rawStatus}" — refusing to stamp completed`, + }; + } +} + /** * Build a redacted response payload for `cc_actions_log.response_payload`. * Never include token, full account number, or routing number. @@ -208,6 +328,11 @@ function buildResponsePayload( transaction_id: run.transactionId ?? null, mercury_status: run.mercuryStatus ?? null, refusal_reason: run.refusalReason ?? null, + // Discriminated-error context — present on every Mercury-call failure so + // operators don't need to re-call Mercury to diagnose. + failure_kind: run.failureKind ?? null, + http_status: run.httpStatus ?? null, + body_snippet: run.bodySnippet ?? null, }; } @@ -254,26 +379,43 @@ const executor: IntentExecutor = { const reason = run.refusalReason ?? 'unknown'; return { ok: false, - description: `mercury_payment refused: ${reason}`, + description: `mercury_payment refused: ${reason}${ + run.httpStatus ? ` (HTTP ${run.httpStatus})` : '' + }`, actionType: 'payment_refusal', targetType: 'recipient', targetId, + // Mercury said `failed` outright → record as failed. Anything else + // refused at our gate is also `failed` (refusal = not executed). status: 'failed', errorMessage: run.errorMessage ?? reason, responsePayload, - metadata: { refusal_reason: reason }, + metadata: { + refusal_reason: reason, + failure_kind: run.failureKind ?? null, + http_status: run.httpStatus ?? null, + }, }; } + // Mercury returned 2xx with a status we accept. Use the mapped auditStatus + // — NEVER blanket-stamp `completed`. `in_progress` (pending) and + // `pending_review` (requires_review) reach this branch with ok=true. + const auditStatus = run.auditStatus ?? 'in_progress'; return { ok: true, - description: `mercury_payment: USD ${(payload.amount_cents / 100).toFixed(2)} to recipient ${payload.recipient_id} (tx ${run.transactionId})`, + description: `mercury_payment: USD ${(payload.amount_cents / 100).toFixed( + 2, + )} to recipient ${payload.recipient_id} (tx ${run.transactionId}, mercury_status=${run.mercuryStatus})`, actionType: 'payment', targetType: 'recipient', targetId, - status: 'completed', + status: auditStatus, responsePayload, - metadata: { mercury_status: run.mercuryStatus }, + metadata: { + mercury_status: run.mercuryStatus, + http_status: run.httpStatus ?? null, + }, }; }, }; diff --git a/meta/executors/types.ts b/meta/executors/types.ts index 3c662ee..8f7b12b 100644 --- a/meta/executors/types.ts +++ b/meta/executors/types.ts @@ -70,8 +70,17 @@ export interface ExecutorRunOutput { targetType: string; /** target_id for cc_actions_log (nullable). */ targetId?: string | null; - /** status for cc_actions_log row. */ - status: 'completed' | 'failed' | 'pending_approval'; + /** status for cc_actions_log row. `in_progress` and `pending_review` are + * used by the money-path executor when Mercury returns 2xx but the + * transaction has not yet cleared (`pending`) or needs human review + * (`requires_review`) — see meta/executors/mercury-payment.ts. */ + status: + | 'completed' + | 'failed' + | 'pending_approval' + | 'in_progress' + | 'pending_review' + | 'in_flight'; responsePayload?: Record; errorMessage?: string; metadata?: Record; diff --git a/src/agents/tools/actions.ts b/src/agents/tools/actions.ts index bac711f..19e61b1 100644 --- a/src/agents/tools/actions.ts +++ b/src/agents/tools/actions.ts @@ -10,7 +10,6 @@ import { updateObligationStatusSchema, runUpdateObligationStatus, } from '../../../meta/executors/update-obligation-status'; -import { runMercuryPayment } from '../../../meta/executors/mercury-payment'; /** * Create action execution tools bound to environment and SQL. @@ -21,70 +20,38 @@ import { runMercuryPayment } from '../../../meta/executors/mercury-payment'; export function createActionTools(env: Env, sql: NeonQueryFunction) { return { execute_payment: tool({ - description: 'Execute a payment via Mercury Banking. Requires explicit user approval. Creates an ACH transfer from a Mercury account to a saved recipient. The payment is logged and the linked obligation is updated.', + description: + 'REFUSED in chat surface. Mercury payments must be initiated from the dashboard, where the authenticated user identity drives a real sovereignty assessment against trust.chitty.cc. The chat tool factory has no access to the chat actor ChittyID, so a real assessSovereignty() call cannot be made — and the prior synthetic `{ decision: "autonomous" }` snapshot was a silent bypass of the gate that protects the real-money path. Use the dashboard payments page instead.', inputSchema: z.object({ - account_slug: z.string().describe('Mercury org slug (e.g., "aribia-llc", "aribia-mgmt")'), - mercury_account_id: z.string().describe('Mercury account ID to pay from'), - recipient_id: z.string().describe('Mercury recipient ID to pay'), - amount: z.number().positive().describe('Payment amount in USD'), - note: z.string().optional().describe('Payment memo/note'), - obligation_id: z.string().uuid().optional().describe('Link payment to this obligation'), + account_slug: z.string().optional(), + mercury_account_id: z.string().optional(), + recipient_id: z.string().optional(), + amount: z.number().positive().optional(), + note: z.string().optional(), + obligation_id: z.string().uuid().optional(), }), - execute: async ({ account_slug, mercury_account_id, recipient_id, amount, note, obligation_id }) => { - // Chat surface delegates to the canonical executor's pure runner so - // chat + autonomous paths share the same Mercury call, sovereignty - // gate, and amount cap. The chat path supplies an autonomous + fresh - // sovereignty snapshot because user-approval in chat is the gating - // event for this surface. See ADR-001 amendment (PR-A) and - // meta/executors/mercury-payment.ts. - const idempotencyKey = crypto.randomUUID(); - const amountCents = Math.round(amount * 100); - const run = await runMercuryPayment({ - env, - payload: { - account_slug, - mercury_account_id, - recipient_id, - amount_cents: amountCents, - currency: 'USD', - memo: note || undefined, - obligation_id, - }, - sovereignty: { decision: 'autonomous', assessedAt: new Date().toISOString() }, - idempotencyKey, - }); - - if (!run.ok) { - // Chat path keeps its own audit row (no intent_id) — preserves - // existing chat audit behavior exactly. - await sql` - INSERT INTO cc_actions_log (action_type, target_type, target_id, description, status, metadata) - VALUES ('payment', 'obligation', ${obligation_id || null}, - ${`Mercury ACH $${amount.toFixed(2)} to ${recipient_id} — ${run.refusalReason ?? 'FAILED'}`}, 'failed', - ${JSON.stringify({ account_slug, mercury_account_id, recipient_id, amount_cents: amountCents, idempotencyKey, refusal_reason: run.refusalReason })}::jsonb) - `; - return { success: false, error: run.errorMessage ?? 'Mercury payment refused or failed.' }; - } - + execute: async ({ account_slug, recipient_id, amount, obligation_id }) => { + // Audit the refusal so attempted chat-initiated payments are visible + // to operators (signal: a model tried to move money from chat). await sql` INSERT INTO cc_actions_log (action_type, target_type, target_id, description, status, metadata) - VALUES ('payment', 'obligation', ${obligation_id || null}, - ${`Mercury ACH $${amount.toFixed(2)} — tx ${run.transactionId}`}, 'completed', - ${JSON.stringify({ transaction_id: run.transactionId, mercury_status: run.mercuryStatus, account_slug, idempotencyKey })}::jsonb) + VALUES ('payment_refusal', 'obligation', ${obligation_id || null}, + ${`Mercury payment refused in chat surface (use dashboard): ${account_slug ?? '?'} -> ${recipient_id ?? '?'} $${(amount ?? 0).toFixed(2)}`}, + 'failed', + ${JSON.stringify({ + refusal_reason: 'chat_surface_refuses_mercury', + surface: 'chat', + account_slug: account_slug ?? null, + recipient_id: recipient_id ?? null, + amount: amount ?? null, + })}::jsonb) `; - - if (obligation_id) { - await sql` - UPDATE cc_obligations - SET status = 'paid', updated_at = NOW(), - metadata = COALESCE(metadata, '{}'::jsonb) || ${JSON.stringify({ - last_payment: { amount, mercury_tx_id: run.transactionId, date: new Date().toISOString() }, - })}::jsonb - WHERE id = ${obligation_id}::uuid - `; - } - - return { success: true, transaction_id: run.transactionId, amount, status: run.mercuryStatus }; + return { + success: false, + error: + 'Mercury payments cannot be initiated from chat. Use the dashboard payments page — it has the authenticated chat actor identity needed for a real sovereignty assessment against trust.chitty.cc. This refusal is by design: a synthetic "autonomous" snapshot in chat would silently bypass the money-path sovereignty gate.', + refusal_reason: 'chat_surface_refuses_mercury', + }; }, }), diff --git a/src/lib/integrations.ts b/src/lib/integrations.ts index 5505918..9b5fe65 100644 --- a/src/lib/integrations.ts +++ b/src/lib/integrations.ts @@ -518,6 +518,22 @@ export interface PromptExecuteResponse { // ── Mercury ───────────────────────────────────────────────── // Direct Mercury API for multi-entity banking +/** + * Discriminated result type for Mercury API calls. Distinguishes every failure + * mode so callers (executor, audit row) can record the actual cause instead of + * collapsing everything to `null` / silent success. REAL MONEY PATH — every + * branch must be visible in cc_actions_log. + */ +export type MercuryPostResult = + | { ok: true; body: T; httpStatus: number; rawSnippet?: string } + | { + ok: false; + kind: 'network' | 'http' | 'parse' | 'idempotency_collision'; + httpStatus?: number; + bodySnippet?: string; + errorCode?: string; + }; + export interface MercuryAccount { id: string; name: string; @@ -548,12 +564,17 @@ export interface MercuryTransaction { status: string; } -export function mercuryClient(token: string) { +/** Optional fetch override — exists solely so integration tests can drive the + * Mercury client against an in-process fetch double WITHOUT mocking the + * client. Default is global fetch (real Mercury). */ +export type FetchImpl = typeof fetch; + +export function mercuryClient(token: string, fetchImpl: FetchImpl = fetch) { const baseUrl = 'https://api.mercury.com/api/v1'; async function get(path: string): Promise { try { - const res = await fetch(`${baseUrl}${path}`, { + const res = await fetchImpl(`${baseUrl}${path}`, { headers: { Authorization: `Bearer ${token}`, Accept: 'application/json' }, }); if (!res.ok) { @@ -568,23 +589,47 @@ export function mercuryClient(token: string) { } } - async function post(path: string, body: unknown): Promise { + /** + * POST with a discriminated result. Every failure mode is enumerated: + * - 409 → `idempotency_collision` (replay-attack tell — operator must see) + * - other 4xx/5xx → `http` + * - non-JSON body on 2xx → `parse` + * - thrown / network error → `network` + * Successful response includes `httpStatus` AND a body-text snippet so the + * audit row can show what Mercury returned without leaking secrets (body is + * Mercury's payment object — no token in it). + */ + async function post(path: string, body: unknown): Promise> { + let res: Response; try { - const res = await fetch(`${baseUrl}${path}`, { + res = await fetchImpl(`${baseUrl}${path}`, { method: 'POST', headers: { Authorization: `Bearer ${token}`, 'Content-Type': 'application/json', Accept: 'application/json' }, body: JSON.stringify(body), }); - if (!res.ok) { - const text = await res.text().catch(() => ''); - console.error(`[mercury] POST ${path} failed: ${res.status} — ${text.slice(0, 500)}`); - return null; - } - return await res.json() as T; } catch (err) { - console.error(`[mercury] POST ${path} error:`, err); - return null; + const msg = err instanceof Error ? err.message : String(err); + console.error(`[mercury] POST ${path} network error: ${msg}`); + return { ok: false, kind: 'network', bodySnippet: msg.slice(0, 500) }; + } + + const text = await res.text().catch(() => ''); + if (!res.ok) { + if (res.status === 409) { + console.error(`[mercury] POST ${path} idempotency collision (409): ${text.slice(0, 500)}`); + return { ok: false, kind: 'idempotency_collision', httpStatus: 409, bodySnippet: text.slice(0, 500) }; + } + console.error(`[mercury] POST ${path} failed: ${res.status} — ${text.slice(0, 500)}`); + return { ok: false, kind: 'http', httpStatus: res.status, bodySnippet: text.slice(0, 500) }; + } + let parsed: T; + try { + parsed = JSON.parse(text) as T; + } catch { + console.error(`[mercury] POST ${path} parse error: body was not JSON — ${text.slice(0, 200)}`); + return { ok: false, kind: 'parse', httpStatus: res.status, bodySnippet: text.slice(0, 500) }; } + return { ok: true, body: parsed, httpStatus: res.status, rawSnippet: text.slice(0, 500) }; } return { @@ -601,7 +646,9 @@ export function mercuryClient(token: string) { getRecipients: (accountId: string) => get<{ recipients: Array<{ id: string; name: string; accountNumber?: string; routingNumber?: string }> }>(`/account/${accountId}/recipients`), - /** Create an ACH payment from an account to a recipient */ + /** Create an ACH payment from an account to a recipient. Returns the + * discriminated `MercuryPostResult` so callers can branch on the exact + * failure mode (network / http / parse / idempotency_collision). */ createPayment: (accountId: string, payment: { recipientId: string; amount: number; diff --git a/tests/meta/executors/mercury-payment-failures.spec.ts b/tests/meta/executors/mercury-payment-failures.spec.ts new file mode 100644 index 0000000..c341f96 --- /dev/null +++ b/tests/meta/executors/mercury-payment-failures.spec.ts @@ -0,0 +1,247 @@ +/** + * Direct integration tests for the Mercury payment executor's failure-mode + * handling — discriminated `post()` result, body-status mapping, chat refusal, + * and account_slug normalization. + * + * These tests drive `runMercuryPayment` with an injected fetch double so we + * can deterministically exercise Mercury's 5xx / 409 / 2xx-with-status=failed + * / 2xx-with-status=pending responses WITHOUT mocking mercuryClient itself. + * The fetch double is a real fetch impl that constructs real Response objects + * — no `vi.mock(...)` is used on the client, the DB, or any service module. + * + * Per the PR-108 "no mocks of Mercury or DB" rule: dependency-injecting fetch + * is the only non-mock path to exercise non-2xx Mercury responses, and is + * therefore the canonical way to test these failure branches. + * + * @canonical-uri chittycanon://core/services/chittycommand/executors/mercury_payment + */ + +import { describe, it, expect } from 'vitest'; +import { + runMercuryPayment, + MERCURY_SOVEREIGNTY_FRESHNESS_MS, +} from '../../../meta/executors/mercury-payment'; +import type { Env } from '../../../src/index'; + +const KV_WITH_TOKEN = { + get: async (key: string) => + key === 'mercury:token:aribia-llc' ? 'sk_test_real_shape_token' : null, + put: async () => undefined, + delete: async () => undefined, + list: async () => ({ keys: [], list_complete: true, cursor: '' }), +} as unknown as KVNamespace; + +const KV_NO_TOKEN = { + get: async () => null, + put: async () => undefined, + delete: async () => undefined, + list: async () => ({ keys: [], list_complete: true, cursor: '' }), +} as unknown as KVNamespace; + +function envFor(kv: KVNamespace): Env { + return { + MERCURY_AUTONOMOUS_AMOUNT_CAP_USD: '500', + COMMAND_KV: kv, + } as unknown as Env; +} + +const FRESH_ASSESSMENT = { + decision: 'autonomous', + assessedAt: new Date().toISOString(), +}; + +const VALID_PAYLOAD = { + account_slug: 'aribia-llc', + mercury_account_id: 'acct_real_0001', + recipient_id: 'rcpt_real_0001', + amount_cents: 1_00, + currency: 'USD' as const, + memo: 'integration-test', +}; + +/** + * Build a fetch impl that returns the given status + body on the FIRST POST + * to Mercury, and refuses all other calls. Real Response objects, no mocks. + */ +function fetchReturning(status: number, body: string): typeof fetch { + return (async (_input: RequestInfo | URL, _init?: RequestInit) => { + return new Response(body, { + status, + headers: { 'Content-Type': 'application/json' }, + }); + }) as unknown as typeof fetch; +} + +function fetchThrowing(): typeof fetch { + return (async () => { + throw new TypeError('fetch failed: ECONNREFUSED'); + }) as unknown as typeof fetch; +} + +describe('mercury-payment — discriminated-error & body-status handling (real Response, injected fetch)', () => { + it('Mercury returns 5xx → refusal with mercury_api_failure, httpStatus + bodySnippet populated', async () => { + const run = await runMercuryPayment({ + env: envFor(KV_WITH_TOKEN), + payload: VALID_PAYLOAD, + sovereignty: FRESH_ASSESSMENT, + idempotencyKey: 'a'.repeat(64), + fetchImpl: fetchReturning(503, '{"error":"upstream timeout"}'), + }); + expect(run.ok).toBe(false); + expect(run.refusalReason).toBe('mercury_api_failure'); + expect(run.failureKind).toBe('http'); + expect(run.httpStatus).toBe(503); + expect(run.bodySnippet).toContain('upstream timeout'); + }); + + it('Mercury returns 409 → refusal with idempotency_collision (replay-attack tell)', async () => { + const run = await runMercuryPayment({ + env: envFor(KV_WITH_TOKEN), + payload: VALID_PAYLOAD, + sovereignty: FRESH_ASSESSMENT, + idempotencyKey: 'b'.repeat(64), + fetchImpl: fetchReturning(409, '{"error":"idempotency conflict with different payload"}'), + }); + expect(run.ok).toBe(false); + expect(run.refusalReason).toBe('idempotency_collision'); + expect(run.failureKind).toBe('idempotency_collision'); + expect(run.httpStatus).toBe(409); + expect(run.bodySnippet).toContain('idempotency conflict'); + }); + + it('Mercury returns 200 with {"status":"failed"} → refusal mercury_internal_failure, audit status=failed', async () => { + const run = await runMercuryPayment({ + env: envFor(KV_WITH_TOKEN), + payload: VALID_PAYLOAD, + sovereignty: FRESH_ASSESSMENT, + idempotencyKey: 'c'.repeat(64), + fetchImpl: fetchReturning( + 200, + JSON.stringify({ id: 'tx_failed_001', status: 'failed', amount: 1.0 }), + ), + }); + expect(run.ok).toBe(false); + expect(run.refusalReason).toBe('mercury_internal_failure'); + expect(run.auditStatus).toBe('failed'); + expect(run.transactionId).toBe('tx_failed_001'); + expect(run.mercuryStatus).toBe('failed'); + expect(run.httpStatus).toBe(200); + }); + + it('Mercury returns 200 with {"status":"pending"} → ok=true, audit status=in_progress (NOT completed)', async () => { + const run = await runMercuryPayment({ + env: envFor(KV_WITH_TOKEN), + payload: VALID_PAYLOAD, + sovereignty: FRESH_ASSESSMENT, + idempotencyKey: 'd'.repeat(64), + fetchImpl: fetchReturning( + 200, + JSON.stringify({ id: 'tx_pending_001', status: 'pending', amount: 1.0 }), + ), + }); + expect(run.ok).toBe(true); + expect(run.auditStatus).toBe('in_progress'); + expect(run.auditStatus).not.toBe('completed'); + expect(run.transactionId).toBe('tx_pending_001'); + expect(run.mercuryStatus).toBe('pending'); + }); + + it('Mercury returns 200 with {"status":"sent"} → ok=true, audit status=completed', async () => { + const run = await runMercuryPayment({ + env: envFor(KV_WITH_TOKEN), + payload: VALID_PAYLOAD, + sovereignty: FRESH_ASSESSMENT, + idempotencyKey: 'e'.repeat(64), + fetchImpl: fetchReturning( + 200, + JSON.stringify({ id: 'tx_sent_001', status: 'sent', amount: 1.0 }), + ), + }); + expect(run.ok).toBe(true); + expect(run.auditStatus).toBe('completed'); + expect(run.transactionId).toBe('tx_sent_001'); + }); + + it('Mercury network error (thrown) → refusal mercury_api_failure with failureKind=network', async () => { + const run = await runMercuryPayment({ + env: envFor(KV_WITH_TOKEN), + payload: VALID_PAYLOAD, + sovereignty: FRESH_ASSESSMENT, + idempotencyKey: 'f'.repeat(64), + fetchImpl: fetchThrowing(), + }); + expect(run.ok).toBe(false); + expect(run.refusalReason).toBe('mercury_api_failure'); + expect(run.failureKind).toBe('network'); + expect(run.bodySnippet).toContain('ECONNREFUSED'); + }); + + it('account_slug with mixed case or special chars → refusal invalid_account_slug (no KV lookup)', async () => { + // KV_WITH_TOKEN only has a token for "aribia-llc". A slug "Aribia/LLC" + // would normalize to "aribiallc" — but normalization changing the value + // means the input was malformed, so we refuse BEFORE the KV lookup. + const run = await runMercuryPayment({ + env: envFor(KV_WITH_TOKEN), + payload: { ...VALID_PAYLOAD, account_slug: 'Aribia/LLC' }, + sovereignty: FRESH_ASSESSMENT, + idempotencyKey: 'g'.repeat(64), + fetchImpl: fetchThrowing(), // must not be reached + }); + expect(run.ok).toBe(false); + expect(run.refusalReason).toBe('invalid_account_slug'); + }); + + it('account_slug already normalized but no token in KV → refusal missing_token', async () => { + const run = await runMercuryPayment({ + env: envFor(KV_NO_TOKEN), + payload: VALID_PAYLOAD, + sovereignty: FRESH_ASSESSMENT, + idempotencyKey: 'h'.repeat(64), + fetchImpl: fetchThrowing(), + }); + expect(run.ok).toBe(false); + expect(run.refusalReason).toBe('missing_token'); + }); +}); + +describe('chat-surface tool — refuses Mercury payments unconditionally', () => { + it('execute_payment tool returns a refusal regardless of inputs (chat surface has no actor ChittyID)', async () => { + // Verifies the chat path no longer drives runMercuryPayment with a + // synthetic { decision: "autonomous" } snapshot. We import the tool + // factory and call the execute function directly with no SQL writes + // exercised against a real DB — the chat refusal does perform a DB + // INSERT, so we need a SQL stub that records the call. This is NOT a + // mock of mercuryClient or of the DB module; it is a no-op sql tag + // function for verifying the tool's contract: refuse + audit. + const { createActionTools } = await import('../../../src/agents/tools/actions'); + const sqlCalls: string[] = []; + const sql = ((strings: TemplateStringsArray, ..._values: unknown[]) => { + sqlCalls.push(strings.join('?')); + return Promise.resolve([]); + }) as unknown as Parameters[1]; + const tools = createActionTools(envFor(KV_WITH_TOKEN), sql); + const execFn = tools.execute_payment.execute; + if (!execFn) throw new Error('execute_payment.execute missing'); + const raw = await execFn( + { + account_slug: 'aribia-llc', + mercury_account_id: 'acct_real_0001', + recipient_id: 'rcpt_real_0001', + amount: 100, + note: 'should-not-execute', + obligation_id: undefined, + }, + { toolCallId: 'test', messages: [] }, + ); + const result = raw as { success: boolean; error: string; refusal_reason: string }; + expect(result.success).toBe(false); + expect(result.refusal_reason).toBe('chat_surface_refuses_mercury'); + expect(result.error).toMatch(/dashboard/i); + // Refusal must be audited so an operator sees a chat-initiated payment attempt. + expect(sqlCalls.some((s) => s.includes('cc_actions_log'))).toBe(true); + }); +}); + +// MERCURY_SOVEREIGNTY_FRESHNESS_MS import kept to ensure the constant remains +// public (other tests / runbook docs reference it). +void MERCURY_SOVEREIGNTY_FRESHNESS_MS; From 03d52723562d094303531d8fbd889a780a5d503e Mon Sep 17 00:00:00 2001 From: chitcommit <208086304+chitcommit@users.noreply.github.com> Date: Thu, 4 Jun 2026 10:15:00 +0000 Subject: [PATCH 3/4] fix(mercury): forward idempotencyKey as Idempotency-Key HTTP header (PR #108 review C1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mercury's ACH/wire/check transaction API dedupes creations by the `Idempotency-Key` HTTP header, not by a field in the JSON body. Prior to this fix, `createPayment` sent the key only in the body, so a transport-level retry (network blip, edge timeout, etc.) could create a duplicate ACH transfer — money out twice. Changes: - `mercuryClient.post` accepts an `opts.idempotencyKey` and sets the `Idempotency-Key` HTTP header on the outbound fetch when present. - `createPayment` strips `idempotencyKey` from the request body and forwards it via `opts`, so the header is set on every Mercury POST. - Regression test in `tests/meta/executors/mercury-payment-failures.spec.ts` captures the outbound `Request` via the existing injected-fetch pattern and asserts `headers.get('Idempotency-Key')` equals the executor's computed idempotency key. No mocks beyond the FetchImpl injection already used by the file. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/lib/integrations.ts | 28 +++++++++++++-- .../mercury-payment-failures.spec.ts | 36 +++++++++++++++++++ 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/lib/integrations.ts b/src/lib/integrations.ts index 9b5fe65..d744298 100644 --- a/src/lib/integrations.ts +++ b/src/lib/integrations.ts @@ -599,12 +599,25 @@ export function mercuryClient(token: string, fetchImpl: FetchImpl = fetch) { * audit row can show what Mercury returned without leaking secrets (body is * Mercury's payment object — no token in it). */ - async function post(path: string, body: unknown): Promise> { + async function post( + path: string, + body: unknown, + opts?: { idempotencyKey?: string }, + ): Promise> { let res: Response; try { + const headers: Record = { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/json', + Accept: 'application/json', + }; + // Mercury dedupes ACH/wire/check creations by the `Idempotency-Key` HTTP + // header. Putting the key only in the JSON body lets a transport retry + // create duplicate transfers — money out twice. Forward as header. + if (opts?.idempotencyKey) headers['Idempotency-Key'] = opts.idempotencyKey; res = await fetchImpl(`${baseUrl}${path}`, { method: 'POST', - headers: { Authorization: `Bearer ${token}`, 'Content-Type': 'application/json', Accept: 'application/json' }, + headers, body: JSON.stringify(body), }); } catch (err) { @@ -655,7 +668,16 @@ export function mercuryClient(token: string, fetchImpl: FetchImpl = fetch) { paymentMethod: 'ach' | 'wire' | 'check'; idempotencyKey: string; note?: string; - }) => post<{ id: string; status: string; amount: number }>(`/account/${accountId}/transactions`, payment), + }) => { + // Mercury expects `Idempotency-Key` as an HTTP header, not in the body. + // Strip from body and forward via opts so transport retries dedupe. + const { idempotencyKey, ...bodyWithoutKey } = payment; + return post<{ id: string; status: string; amount: number }>( + `/account/${accountId}/transactions`, + bodyWithoutKey, + { idempotencyKey }, + ); + }, }; } diff --git a/tests/meta/executors/mercury-payment-failures.spec.ts b/tests/meta/executors/mercury-payment-failures.spec.ts index c341f96..d0495c4 100644 --- a/tests/meta/executors/mercury-payment-failures.spec.ts +++ b/tests/meta/executors/mercury-payment-failures.spec.ts @@ -242,6 +242,42 @@ describe('chat-surface tool — refuses Mercury payments unconditionally', () => }); }); +describe('mercury-payment — idempotency forwarded as HTTP header (PR #108 review C1)', () => { + it('createPayment sends Idempotency-Key as a request header, not just in body', async () => { + const captured: { url: string; headers: Headers; bodyText: string }[] = []; + const idemKey = 'i'.repeat(64); + + const capturingFetch: typeof fetch = (async ( + input: RequestInfo | URL, + init?: RequestInit, + ) => { + const req = new Request(input as RequestInfo, init); + const bodyText = await req.text(); + captured.push({ url: req.url, headers: req.headers, bodyText }); + return new Response( + JSON.stringify({ id: 'tx_hdr_001', status: 'sent', amount: 1.0 }), + { status: 200, headers: { 'Content-Type': 'application/json' } }, + ); + }) as unknown as typeof fetch; + + const run = await runMercuryPayment({ + env: envFor(KV_WITH_TOKEN), + payload: VALID_PAYLOAD, + sovereignty: FRESH_ASSESSMENT, + idempotencyKey: idemKey, + fetchImpl: capturingFetch, + }); + + expect(run.ok).toBe(true); + expect(captured.length).toBeGreaterThan(0); + const post = captured.find((c) => c.url.includes('/transactions')); + expect(post, 'expected a POST to /transactions to be captured').toBeDefined(); + // The Idempotency-Key HTTP header is what Mercury uses to dedupe transfers + // on transport retry. Without it, a retried POST creates a duplicate ACH. + expect(post!.headers.get('Idempotency-Key')).toBe(idemKey); + }); +}); + // MERCURY_SOVEREIGNTY_FRESHNESS_MS import kept to ensure the constant remains // public (other tests / runbook docs reference it). void MERCURY_SOVEREIGNTY_FRESHNESS_MS; From a2e159da0c97ad47591b5b20aa60d8d1a2139e89 Mon Sep 17 00:00:00 2001 From: chitcommit <208086304+chitcommit@users.noreply.github.com> Date: Wed, 10 Jun 2026 12:50:35 +0000 Subject: [PATCH 4/4] fix(mercury): route indeterminate outcomes to in_flight reconciliation, not terminal failed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adversarial review (silent-failure-hunter) + ChittyConnect concierge ruling: recording 409-collision / network-lost-response / 5xx / unparseable-2xx as terminal `failed` buries payments that MAY have moved money — executeIntent then blocks retry on terminal state and dispatch replays `failed` instead of triggering the in_flight_unknown reconciliation runbook. Two-part fix (both mandatory): - meta/executors/mercury-payment.ts: classify the "money may have moved" set (409 idempotency_collision, network, parse-on-2xx, http>=500) as `indeterminate`; the run() mapping records them as `in_flight` with action_type `payment_indeterminate`. Definite 4xx (!=409) and explicit Mercury status:"failed" stay terminal `failed` (no money moved). - meta/intent.ts: executeIntent skips failIntent when result.indeterminate, leaving the intent claimable so the next dispatch pass reaches in_flight_unknown. Without this the audit-status change is inert. - meta/executors/types.ts + dispatch.ts: propagate `indeterminate` on ExecutorRunOutput -> ExecutorResult. - mercury-payment.ts docstring: correct the idempotency-key formula to sha256(intent.id:intent_type) (NO attempt) — a per-attempt key defeats Mercury dedup and is a double-spend vector. Do not reintroduce. Tests: - tests/meta/executors/mercury-payment-failures.spec.ts: assert indeterminate true for 5xx/409/network, falsy for explicit 2xx status:"failed". 10/10 pass locally (injected fetch, real Response, no mocks). - tests/meta/executor-pr106-criticals.spec.ts: re-assert to the deterministic- key contract — attempt is COUNT(*)+1 (2 after a seeded row), sovereignty refusal returns replayed:falsy (not true); outcome guarantees (one refusal row, terminal intent) preserved. Co-Authored-By: Claude Opus 4.8 (1M context) --- meta/executors/dispatch.ts | 4 ++ meta/executors/mercury-payment.ts | 64 ++++++++++++++----- meta/executors/types.ts | 15 +++++ meta/intent.ts | 10 ++- tests/meta/executor-pr106-criticals.spec.ts | 24 +++++-- .../mercury-payment-failures.spec.ts | 10 +++ 6 files changed, 101 insertions(+), 26 deletions(-) diff --git a/meta/executors/dispatch.ts b/meta/executors/dispatch.ts index b8589f0..8b2eb6b 100644 --- a/meta/executors/dispatch.ts +++ b/meta/executors/dispatch.ts @@ -343,6 +343,10 @@ export async function dispatch( actionLogId: auditId, data: runOutput.responsePayload, error: runOutput.errorMessage, + // Propagate indeterminate so executeIntent skips failIntent and leaves the + // intent claimable — the row stays `in_flight` for reconciliation rather + // than being buried as a terminal failure. + indeterminate: runOutput.indeterminate, }; } diff --git a/meta/executors/mercury-payment.ts b/meta/executors/mercury-payment.ts index c7585b2..7c32e9f 100644 --- a/meta/executors/mercury-payment.ts +++ b/meta/executors/mercury-payment.ts @@ -20,13 +20,13 @@ * `cc_actions_log (intent_id, idempotency_key) WHERE intent_id IS NOT * NULL AND idempotency_key IS NOT NULL` prevents double-execute. The * dispatcher (meta/executors/dispatch.ts) computes the idempotency key - * as `sha256("{intent.id}:{attempt}:{intent_type}")` and passes it via - * `ctx.idempotencyKey`. Because `intent.id` is immutable and `attempt` - * is derived deterministically from prior `cc_actions_log` rows, a - * replay of the same intent reuses the same key — functionally - * equivalent to a payload-derived key for replay protection. The - * Mercury API call itself uses `ctx.idempotencyKey` as the Mercury - * `idempotencyKey`, so Mercury also de-dupes on the same value. + * as `sha256("{intent.id}:{intent_type}")` — DETERMINISTIC on intent.id, + * with NO `attempt` component. This is load-bearing: every retry of the + * same intent MUST reuse the same key so Mercury de-dupes end-to-end and + * a retry after a Neon blip cannot double-spend. Do NOT add `attempt` to + * this formula — a per-attempt key defeats Mercury's dedup and is a + * double-spend vector. The Mercury API call passes `ctx.idempotencyKey` + * as the `Idempotency-Key` header. * 4. NEVER log raw API keys, account numbers, routing numbers, or PII. * `responsePayload` carries `transaction_id`, `status`, `amount`, * `recipient_id`, last-4 only when available. `requestPayload` (set by @@ -114,6 +114,16 @@ export interface MercuryPaymentRunResult { bodySnippet?: string; /** Failure kind from the discriminated MercuryPostResult, when ok=false. */ failureKind?: 'network' | 'http' | 'parse' | 'idempotency_collision'; + /** + * True iff money MAY have moved but the outcome is unknown — Mercury 409 + * collision (the original payment under this key likely already went out), + * network/lost-response, a 5xx, or an unparseable 2xx body. The dispatcher + * records the audit row as `in_flight` (NOT `failed`) so it triggers the + * `in_flight_unknown` operator reconciliation path. A definite 4xx rejection + * (≠409) and an explicit Mercury `status:"failed"` are NOT indeterminate — + * no money moved, so they stay terminal `failed`. + */ + indeterminate?: boolean; } /** Allowed account_slug pattern: lowercase alnum + hyphens only. */ @@ -220,25 +230,38 @@ export async function runMercuryPayment(args: { // operators can diagnose without re-running Mercury. if (!result.ok) { if (result.kind === 'idempotency_collision') { + // 409 → a payment under this key was already accepted by Mercury; money + // very likely moved. Indeterminate → reconcile, do NOT bury as failed. return { ok: false, refusalReason: 'idempotency_collision', failureKind: 'idempotency_collision', + indeterminate: true, httpStatus: result.httpStatus, bodySnippet: result.bodySnippet, errorMessage: - 'Mercury returned 409 idempotency collision — same idempotency key was previously used with a different payload (replay attempt or payload mutation)', + 'Mercury returned 409 idempotency collision — a payment under this idempotency key was already accepted; money may have moved. Reconcile against Mercury before any terminal transition.', }; } + // "money may have moved" set (chico ruling): network/lost-response, an + // unparseable 2xx body, and 5xx are all indeterminate — the request may + // have committed before the response was lost/unreadable. A definite 4xx + // rejection (≠409: 422 bad recipient, 400 bad amount, 401/403 auth) did + // NOT move money → terminal `failed`, remediate via a new intent. + const indeterminate = + result.kind === 'network' || + result.kind === 'parse' || + (result.kind === 'http' && (result.httpStatus ?? 0) >= 500); return { ok: false, refusalReason: 'mercury_api_failure', failureKind: result.kind, + indeterminate, httpStatus: result.httpStatus, bodySnippet: result.bodySnippet, errorMessage: `Mercury API ${result.kind} failure${ result.httpStatus ? ` (HTTP ${result.httpStatus})` : '' - }: ${result.bodySnippet ?? 'no body'}`, + }${indeterminate ? ' — outcome unknown, money may have moved; reconcile' : ''}: ${result.bodySnippet ?? 'no body'}`, }; } @@ -377,23 +400,32 @@ const executor: IntentExecutor = { if (!run.ok) { const reason = run.refusalReason ?? 'unknown'; + // Indeterminate (money may have moved): leave the row `in_flight` so the + // next dispatch pass hits `in_flight_unknown` and surfaces a + // reconciliation signal. Definite refusal / no-money-moved: terminal + // `failed`. See chico ruling + meta/executors/dispatch.ts in_flight path. + const indeterminate = run.indeterminate === true; return { ok: false, - description: `mercury_payment refused: ${reason}${ - run.httpStatus ? ` (HTTP ${run.httpStatus})` : '' - }`, - actionType: 'payment_refusal', + indeterminate, + description: indeterminate + ? `mercury_payment INDETERMINATE: ${reason}${ + run.httpStatus ? ` (HTTP ${run.httpStatus})` : '' + } — money may have moved; left in_flight for operator reconciliation` + : `mercury_payment refused: ${reason}${ + run.httpStatus ? ` (HTTP ${run.httpStatus})` : '' + }`, + actionType: indeterminate ? 'payment_indeterminate' : 'payment_refusal', targetType: 'recipient', targetId, - // Mercury said `failed` outright → record as failed. Anything else - // refused at our gate is also `failed` (refusal = not executed). - status: 'failed', + status: indeterminate ? 'in_flight' : 'failed', errorMessage: run.errorMessage ?? reason, responsePayload, metadata: { refusal_reason: reason, failure_kind: run.failureKind ?? null, http_status: run.httpStatus ?? null, + indeterminate, }, }; } diff --git a/meta/executors/types.ts b/meta/executors/types.ts index 8f7b12b..3e83a23 100644 --- a/meta/executors/types.ts +++ b/meta/executors/types.ts @@ -43,6 +43,14 @@ export interface ExecutorResult { error?: string; /** True iff the result was replayed from a prior cc_actions_log row. */ replayed?: boolean; + /** + * True iff the outcome is indeterminate — money MAY have moved but the + * result is unknown (Mercury 409 collision, network/lost-response, 5xx, or an + * unparseable 2xx). The audit row is left `in_flight` and `executeIntent` + * MUST NOT mark the intent `failed`, so the next dispatch pass reaches the + * `in_flight_unknown` reconciliation branch instead of burying it as failed. + */ + indeterminate?: boolean; } export interface IntentExecutor { @@ -84,4 +92,11 @@ export interface ExecutorRunOutput { responsePayload?: Record; errorMessage?: string; metadata?: Record; + /** + * Set by money-path executors when `ok: false` AND money may have moved + * (status === 'in_flight'). The dispatcher propagates this onto + * ExecutorResult so executeIntent skips failIntent and the row stays + * `in_flight` for operator reconciliation. See mercury-payment.ts. + */ + indeterminate?: boolean; } diff --git a/meta/intent.ts b/meta/intent.ts index 7dbb7b3..a9e9323 100644 --- a/meta/intent.ts +++ b/meta/intent.ts @@ -552,9 +552,13 @@ export async function executeIntent( SET status = 'running', updated_at = NOW() WHERE id = ${intentId} AND status = 'claimed'`; await completeIntent(env, intentId); - } else if (!result.replayed) { - // Only mark failed on a fresh failure; replays should not overwrite - // terminal state. + } else if (!result.replayed && !result.indeterminate) { + // Only mark failed on a fresh, DETERMINATE failure. Replays must not + // overwrite terminal state, and indeterminate outcomes (money may have + // moved — Mercury 409/network/5xx/unparseable-2xx) must NOT go terminal: + // the audit row is left `in_flight` so the next dispatch pass reaches the + // `in_flight_unknown` reconciliation branch instead of being buried as + // failed. See meta/executors/mercury-payment.ts + dispatch.ts. await failIntent(env, intentId, result.error ?? 'unknown error').catch( () => null, ); diff --git a/tests/meta/executor-pr106-criticals.spec.ts b/tests/meta/executor-pr106-criticals.spec.ts index bb0351d..290819d 100644 --- a/tests/meta/executor-pr106-criticals.spec.ts +++ b/tests/meta/executor-pr106-criticals.spec.ts @@ -98,8 +98,9 @@ describe.skipIf(SKIP)('PR #106 criticals — replay-by-key + single failIntent', }); // Hand-insert a terminal cc_actions_log row for this intent with a key - // that we know will NOT match the key dispatch() will compute for - // attempt=1 (sha256("{id}:1:{type}")). Use a sentinel hex string. + // that we know will NOT match the key dispatch() will compute — the key is + // deterministic on intent.id (sha256("{id}:{type}"), NO attempt component). + // Use a sentinel hex string guaranteed to differ. const bogusKey = 'a'.repeat(64); await sql` INSERT INTO cc_actions_log @@ -124,7 +125,11 @@ describe.skipIf(SKIP)('PR #106 criticals — replay-by-key + single failIntent', `) as unknown as Array<{ status: string }>; expect(oblStatus[0].status).toBe('paid'); - // Two rows now: the bogus seed (attempt=0) + the real run (attempt=1). + // Two rows now: the bogus seed (attempt=0) + the real run. `attempt` is + // COUNT(*)+1 over prior rows for this intent — the seed makes COUNT=1 at + // pre-write time, so the real run is attempt=2. (`attempt` is audit-only + // metadata under the deterministic-key contract; the unique partial index + // dedupes, not the attempt number.) const auditRows = (await sql` SELECT attempt, idempotency_key FROM cc_actions_log WHERE intent_id = ${intent.id}::uuid ORDER BY attempt ASC @@ -132,7 +137,7 @@ describe.skipIf(SKIP)('PR #106 criticals — replay-by-key + single failIntent', expect(auditRows.length).toBe(2); expect(auditRows[0].attempt).toBe(0); expect(auditRows[0].idempotency_key).toBe(bogusKey); - expect(auditRows[1].attempt).toBe(1); + expect(auditRows[1].attempt).toBe(2); expect(auditRows[1].idempotency_key).toBe(result.idempotencyKey); }); @@ -190,9 +195,14 @@ describe.skipIf(SKIP)('PR #106 criticals — replay-by-key + single failIntent', freshnessMs: 1, // force stale-snapshot branch even if clock skews }); expect(result.ok).toBe(false); - // FIX 2: dispatch returned replayed:true so executeIntent did not - // re-call failIntent. - expect(result.replayed).toBe(true); + // FIX 2 (deterministic-key contract): a sovereignty refusal is a fresh, + // DEFINITE failure — not a replay and not indeterminate. dispatch fails the + // intent via safeFailIntent and returns WITHOUT `replayed`. executeIntent's + // `!replayed && !indeterminate` guard then calls failIntent again, but + // failIntent's status guard (claimed/running) makes that second call a + // no-op — so still exactly ONE failed-transition. The canonical signal is + // the single audit row + terminal intent state asserted below. + expect(result.replayed).toBeFalsy(); // Exactly one sovereignty_refusal audit row was written by dispatch. const auditRows = (await sql` diff --git a/tests/meta/executors/mercury-payment-failures.spec.ts b/tests/meta/executors/mercury-payment-failures.spec.ts index d0495c4..ef165df 100644 --- a/tests/meta/executors/mercury-payment-failures.spec.ts +++ b/tests/meta/executors/mercury-payment-failures.spec.ts @@ -92,6 +92,8 @@ describe('mercury-payment — discriminated-error & body-status handling (real R expect(run.failureKind).toBe('http'); expect(run.httpStatus).toBe(503); expect(run.bodySnippet).toContain('upstream timeout'); + // 5xx → money may have moved → indeterminate (row left in_flight, not failed). + expect(run.indeterminate).toBe(true); }); it('Mercury returns 409 → refusal with idempotency_collision (replay-attack tell)', async () => { @@ -107,6 +109,8 @@ describe('mercury-payment — discriminated-error & body-status handling (real R expect(run.failureKind).toBe('idempotency_collision'); expect(run.httpStatus).toBe(409); expect(run.bodySnippet).toContain('idempotency conflict'); + // 409 → a payment under this key likely already went out → indeterminate. + expect(run.indeterminate).toBe(true); }); it('Mercury returns 200 with {"status":"failed"} → refusal mercury_internal_failure, audit status=failed', async () => { @@ -126,6 +130,9 @@ describe('mercury-payment — discriminated-error & body-status handling (real R expect(run.transactionId).toBe('tx_failed_001'); expect(run.mercuryStatus).toBe('failed'); expect(run.httpStatus).toBe(200); + // Explicit Mercury status:"failed" on a 2xx → definite, no money moved → + // NOT indeterminate (stays terminal failed, remediate via a new intent). + expect(run.indeterminate).toBeFalsy(); }); it('Mercury returns 200 with {"status":"pending"} → ok=true, audit status=in_progress (NOT completed)', async () => { @@ -174,6 +181,9 @@ describe('mercury-payment — discriminated-error & body-status handling (real R expect(run.refusalReason).toBe('mercury_api_failure'); expect(run.failureKind).toBe('network'); expect(run.bodySnippet).toContain('ECONNREFUSED'); + // network/lost-response → request may have committed before the response + // was lost → indeterminate (reconcile, do not bury as failed). + expect(run.indeterminate).toBe(true); }); it('account_slug with mixed case or special chars → refusal invalid_account_slug (no KV lookup)', async () => {