wyckit · wyckit · Jun 21, 2026 · Jun 21, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -47,11 +47,18 @@ jobs:
       - name: Verify TLM bundle (round-trip + checksum)
         run: dotnet run --project src/IntentMesh.Tlm.Cli -c Release --no-build -- verify --root dataset
 
-      - name: Test
+      # Two separate test steps so the secret and the untrusted-npm execution NEVER share an environment:
+      # the live-LLM test gets ANTHROPIC_API_KEY but does NOT run npx; the real-filesystem E2E runs the
+      # pinned npx package but has NO secret in its env (a compromised package can't read the API key).
+      - name: Test (unit + live-LLM; no npx)
         env:
-          INTENTMESH_FS_E2E: '1'   # run the REAL @modelcontextprotocol/server-filesystem path (node + network present)
-          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}   # live-LLM test runs when the secret is configured, else skips
-        run: dotnet test IntentMesh.slnx -c Release --no-build --nologo
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}   # live-LLM test runs when configured, else skips
+        run: dotnet test IntentMesh.slnx -c Release --no-build --nologo --filter "FullyQualifiedName!~McpProxy_wires_a_real_filesystem_mcp_server_end_to_end"
+
+      - name: Real filesystem MCP E2E (pinned npx; NO secrets in env)
+        env:
+          INTENTMESH_FS_E2E: '1'   # runs `npx -y @modelcontextprotocol/server-filesystem@<pinned>` — note: no ANTHROPIC_API_KEY here
+        run: dotnet test IntentMesh.slnx -c Release --no-build --nologo --filter "FullyQualifiedName~McpProxy_wires_a_real_filesystem_mcp_server_end_to_end"
 
       - name: Policy fixtures
         run: dotnet run --project src/IntentMesh.Cli -c Release --no-build -- policy fixtures

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,39 @@
 All notable changes to IntentMesh. Claims are test-backed; see [docs/MATURITY.md](docs/MATURITY.md)
 for the production-ready / experimental / future breakdown.
 
+## v1.12.0 — MCP audit/approval + policy hardening (sixth review pass)
+
+Closes a sixth external review (8 High + 3 Medium). **245 passing + 3 env-gated skipped.**
+
+High:
+- **MCP side effects require a durable signed audit.** `McpProxy.GateAndForward` can be wired with an
+  `IRunArtifactStore` + key provider; it persists a signed `TraceBundle` of the approved decision **before**
+  forwarding to the real server, and **fails closed** (does not forward) if the audit can't be written.
+- **MCP approvals are challenge-bound.** With an `ApprovalChallengeService` configured, an MCP approval is
+  a server-issued challenge bound to `{call fingerprint (tool+canonical args), tenant, expiry}` — a raw,
+  replayable `n1` no longer approves, and a challenge for one call can't approve another.
+- **Pinned, non-option npx.** `ConnectNpx` rejects option-shaped names (leading dash) and floating specs;
+  only a pinned, digit-led `name@1.2.3` is accepted. Call sites pin `@modelcontextprotocol/server-filesystem@2026.1.14`.
+- **CI isolates the API key from npx.** The live-LLM test (with `ANTHROPIC_API_KEY`) and the real-filesystem
+  E2E (which runs `npx`) are now separate steps with disjoint environments — a compromised npm package can't
+  read the secret.
+- **Production auth boundaries.** Trusted-proxy mode requires `INTENTMESH_PROXY_SECRET` in Production; the
+  legacy `INTENTMESH_WEB_TOKEN` no longer satisfies the Production auth guard and is gone from the quickstart.
+- **`/readyz` probes persistence.** It now writes + atomically moves + deletes a temp file in the runs dir,
+  so it fails when the volume is read-only/full/unmounted — not merely when the directory is absent.
+- **Direct run-query row cap.** A direct `RunQueryAction` (no `RowLimit` field) is now bounded by `db.RowCap`
+  at execution, the same cap a compiled plan must satisfy.
+- **Per-file delete verification.** A new `pc-deletion-matches-approval` postcondition proves the deleted
+  set is exactly the approved file refs — not merely that a delete node ran.
+
+Medium:
+- **Rate-limit key is trust-scoped.** `X-Forwarded-For` is honored only behind the trusted proxy (matching
+  `X-Proxy-Secret`); otherwise the socket IP is used, so a direct client can't rotate the header to evade limits.
+- **Custom-mapper path forwarding.** `NormalizeForForward` rewrites a custom mapper's path arg (e.g. `target`,
+  `filepath`) to the canonical in-root path actually validated, not just the standard keys.
+- **NuGet package signing** remains a documented residual (needs a code-signing certificate); provenance
+  attestation + SHA256SUMS ship today.
+
 ## v1.11.0 — Service & integration hardening (fifth review pass)
 
 Closes a fifth external review (7 High + 4 Medium). **240 passing + 3 env-gated skipped.**

diff --git a/Directory.Build.props b/Directory.Build.props
@@ -5,7 +5,7 @@
        Demos, tools, the web host, the E2E/bench runners, the template, and tests stay non-packable. -->
 
   <PropertyGroup>
-    <Version>1.11.0</Version>
+    <Version>1.12.0</Version>
     <Authors>Chad Sandor</Authors>
     <Company>wyckit</Company>
     <Product>IntentMesh</Product>
@@ -18,7 +18,7 @@
     <!-- MIT-licensed (SPDX expression in package metadata). -->
     <PackageLicenseExpression>MIT</PackageLicenseExpression>
     <PackageRequireLicenseAcceptance>false</PackageRequireLicenseAcceptance>
-    <PackageReleaseNotes>v1.11.0 — service hardening: server-mediated approvals on export, verify-before-rerun, per-file web approvals, fail-closed persistence, rate limiting, CSP/security headers, side-effecting-GET gating, MCP typed-path enforcement. See CHANGELOG.md.</PackageReleaseNotes>
+    <PackageReleaseNotes>v1.12.0 — sixth review pass: MCP pre-forward signed audit + challenge-bound approvals, pinned npx, direct run-query row cap, per-file delete verification, production auth guards, readyz write-probe, trust-scoped rate-limit key. See CHANGELOG.md.</PackageReleaseNotes>
 
     <!-- Reproducible restore: lock files are honored in CI via locked-mode restore. -->
     <RestorePackagesWithLockFile>true</RestorePackagesWithLockFile>

diff --git a/README.md b/README.md
@@ -104,7 +104,7 @@ dotnet run --project src/IntentMesh.Cli -- --trace "plan my Friday and draft Sar
 dotnet run --project src/IntentMesh.Web                       # then open the printed localhost URL
 
 # tests
-dotnet test IntentMesh.slnx                                   # 240 passing (+3 env-gated skipped)
+dotnet test IntentMesh.slnx                                   # 245 passing (+3 env-gated skipped)
 ```
 
 ### Wrap your own agent (the SDK on-ramp)
@@ -190,7 +190,7 @@ v1.7 platform:** Phase 1 (clarity) ✓ · Phase 2 (signed artifacts, replay, con
 Phase 3 (Control Room v1) ✓ · Phase 4 (IntentBench 25/25) ✓ · Phase 5 (SDK + MCP proxy / OpenAPI
 import / real-adapter example) ✓ · Phase 6 (manifesto, whitepaper, landing) ✓. **v1.7** adds the
 adoptable platform surface (full-lifecycle SDK + host template, real-LLM-proposer hardening,
-operator workflow, audit operations). **240 passing (+3 env-gated skipped) tests · IntentBench 25/25 · TLM 7/7.**
+operator workflow, audit operations). **245 passing (+3 env-gated skipped) tests · IntentBench 25/25 · TLM 7/7.**
 
 **Proven vs. experimental vs. future (claims discipline).** [docs/MATURITY.md](docs/MATURITY.md) is
 the canonical statement: every *proven* claim has a passing test that would fail if it stopped being
@@ -207,7 +207,7 @@ and the [CHANGELOG](CHANGELOG.md).
 ## Status
 
 Research prototype with a production-shaped core, **v1.8.0**. Symbolic layer: 7 TLMs, ~125 concepts,
-7/7 round-trip verify; typed action contracts across four domains. **xUnit 240 passing (+3 env-gated skipped).** Five demo
+7/7 round-trip verify; typed action contracts across four domains. **xUnit 245 passing (+3 env-gated skipped).** Five demo
 scenarios. See [docs/MATURITY.md](docs/MATURITY.md) for the proven / experimental / future breakdown.
 Delivered beyond v0.1:
 

diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md
@@ -12,13 +12,18 @@ docker build -f src/IntentMesh.Web/Dockerfile -t intentmesh-controlroom .
 docker run -p 8080:8080 \
   -e ASPNETCORE_ENVIRONMENT=Production \
   -e INTENTMESH_AUDIT_KEY="$(openssl rand -base64 32)" \
-  -e INTENTMESH_WEB_TOKEN="$(openssl rand -hex 24)" \
+  -e INTENTMESH_AUTH_KEY="$(openssl rand -base64 32)" \
+  -e INTENTMESH_PRINCIPALS=/run/secrets/principals.json \
   -e "AllowedHosts=mesh.example.com" \
   -e INTENTMESH_RUNS_DIR=/data/runs \
   -v intentmesh-runs:/data/runs \
   intentmesh-controlroom
 ```
 
+> Production uses **token mode** (above) or **trusted-proxy mode** (Mode B below). The legacy
+> `INTENTMESH_WEB_TOKEN` is **not** accepted as a production auth boundary — the host refuses to start in
+> Production unless token or proxy mode is configured.
+
 **From source:** `dotnet run --project src/IntentMesh.Web` (Development; loopback, demo key).
 
 ## Required configuration (production)
@@ -53,11 +58,11 @@ console snippet) or `printf '%s' "$KEY" | sha256sum`.
 | Variable | Purpose |
 |---|---|
 | `INTENTMESH_TRUSTED_PROXY=1` | Trust identity asserted by an upstream proxy/IdP instead of minting tokens. |
-| `INTENTMESH_PROXY_SECRET` | Shared secret the proxy must present as `X-Proxy-Secret`; without it, asserted headers are honored only from loopback. Set this whenever the app is reachable from anything but the proxy. |
+| `INTENTMESH_PROXY_SECRET` | Shared secret the proxy must present as `X-Proxy-Secret`. **Required in Production** (the host refuses to start in proxy mode without it) — without it, asserted headers would be honored from any loopback-presenting source. It also gates whether `X-Forwarded-For` is trusted for rate-limiting. |
 
 The proxy authenticates the user (OIDC, etc.) and forwards `X-Auth-Principal`, `X-Auth-Tenant`, and a
 comma-separated `X-Auth-Roles`. **The proxy MUST strip any client-supplied `X-Auth-*` / `X-Proxy-Secret`
-headers** so a caller can't spoof identity.
+/ `X-Forwarded-For` headers** so a caller can't spoof identity or its rate-limit bucket.
 
 ### Roles & isolation
 

diff --git a/docs/MATURITY.md b/docs/MATURITY.md
@@ -2,7 +2,7 @@
 
 The single source of truth for **what is production-ready, what is experimental, and what is future
 work.** Every "proven" claim below is backed by a test that would fail if the claim stopped being
-true (`dotnet test IntentMesh.slnx` — **240 passing, 3 env-gated skipped**). Nothing here is aspirational unless it says so.
+true (`dotnet test IntentMesh.slnx` — **245 passing, 3 env-gated skipped**). Nothing here is aspirational unless it says so.
 
 > IntentMesh is a **research prototype with a production-shaped core**: the security kernel and its
 > guarantees are proven and stable; the *operational backends* around it (KMS, DB persistence,

diff --git a/src/IntentMesh.Core/Adapters.cs b/src/IntentMesh.Core/Adapters.cs
@@ -284,16 +284,23 @@ private static ExecutionResult RunQuery(string id, RunQueryAction r, Workspace w
         if (table is null) return ToolHost.Ok(id, $"No such table '{r.Table}'.");
         ws.Db.RanQueries.Add(r.Summary);
 
+        // Row-cap enforcement: a DIRECT run-query carries no RowLimit field, so it is bounded here by the
+        // SAME db.RowCap a compiled plan must satisfy — a direct query cannot scan/aggregate more rows than
+        // the policy cap. (A capped read can't be used to exfiltrate an unbounded table.)
+        var cap = ws.Db.RowCap;
+        var rows = table.Rows.Take(cap).ToList();
+        var capNote = table.Rows.Count > cap ? $" (row cap {cap} applied to {table.Rows.Count} rows)" : "";
+
         // Aggregate signups by plan (non-sensitive columns only).
         int planCol = table.Columns.ToList().IndexOf("plan");
         var agg = planCol >= 0
-            ? string.Join(", ", table.Rows.GroupBy(row => row[planCol]).Select(g => $"{g.Key}: {g.Count()}"))
-            : $"{table.Rows.Count} rows";
+            ? string.Join(", ", rows.GroupBy(row => row[planCol]).Select(g => $"{g.Key}: {g.Count()}"))
+            : $"{rows.Count} rows";
 
         // Query results are retrieved content — scan for an embedded imperative.
         var proposed = new List<ProposedNode>();
-        var effects = new List<string> { $"result ({r.Summary}): {agg}", "no sensitive columns in the result" };
-        foreach (var row in table.Rows)
+        var effects = new List<string> { $"result ({r.Summary}){capNote}: {agg}", "no sensitive columns in the result" };
+        foreach (var row in rows)
             foreach (var cell in row)
                 if (cell.ToUpperInvariant().Contains("IGNORE PREVIOUS INSTRUCTIONS") ||
                     (cell.ToUpperInvariant().Contains("DROP") && cell.ToUpperInvariant().Contains("TABLE")))

diff --git a/src/IntentMesh.Core/PostconditionVerifier.cs b/src/IntentMesh.Core/PostconditionVerifier.cs
@@ -98,6 +98,18 @@ void Add(string id, string expected, string actual, bool pass, string evidence)
             Add("pc-no-unapproved-deletion", "no file deleted without approval",
                 ws.DeletedFiles.Count == 0 ? "none deleted" : (deleteApproved ? "deleted after approval" : "deleted WITHOUT approval!"),
                 ok, $"deleted={ws.DeletedFiles.Count}, approved-node={deleteApproved}");
+
+            // Granular check: the set of files actually deleted must be EXACTLY a subset of the per-file
+            // refs that were approved (node.ApprovedRefs) — proving the adapter deleted only approved
+            // files, not merely that a delete node ran. Catches an over-deleting/buggy adapter.
+            var approvedRefs = graph.Nodes
+                .Where(n => n.Type == Kinds.DeleteFiles)
+                .SelectMany(n => n.ApprovedRefs)
+                .ToHashSet(StringComparer.OrdinalIgnoreCase);
+            var unapproved = ws.DeletedFiles.Where(f => !approvedRefs.Contains(f)).ToList();
+            Add("pc-deletion-matches-approval", "every deleted file was individually approved",
+                unapproved.Count == 0 ? "exact match" : "deleted UNAPPROVED file(s)!", unapproved.Count == 0,
+                $"deleted -> {string.Join(", ", ws.DeletedFiles)}; approved refs -> {string.Join(", ", approvedRefs)}");
         }
 
         // ── Dev-agent postconditions ────────────────────────────────────────