diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4cc8be..cc26ded 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -122,7 +122,12 @@ jobs: - name: Real filesystem MCP E2E (pinned npx; runs after packing; NO secrets in env) env: INTENTMESH_FS_E2E: '1' # runs `npx -y @modelcontextprotocol/server-filesystem@` — note: no ANTHROPIC_API_KEY here - run: dotnet test IntentMesh.slnx -c Release --no-build --nologo --filter "FullyQualifiedName~McpProxy_wires_a_real_filesystem_mcp_server_end_to_end" + run: | + out=$(dotnet test IntentMesh.slnx -c Release --no-build --nologo \ + --filter "FullyQualifiedName~McpProxy_wires_a_real_filesystem_mcp_server_end_to_end" 2>&1) + echo "$out" + # Guard against a renamed test silently matching zero cases (dotnet test exits 0 on "no match"). + echo "$out" | grep -qE "Total:[[:space:]]+1\b" || { echo "::error::FS-E2E filter matched no test (expected exactly 1)"; exit 1; } # Build-provenance attestation runs in a SEPARATE job that holds the only id-token/attestations write # scopes — so the build/test/pack job above has no write tokens. Push events only (fork PRs can't sign). diff --git a/CHANGELOG.md b/CHANGELOG.md index c400a55..e40a637 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,40 @@ All notable changes to IntentMesh. Claims are test-backed; see [docs/MATURITY.md](docs/MATURITY.md) for the production-ready / experimental / future breakdown. +## v1.16.0 — Approval integrity, single-use challenges & concurrency safety (ninth review pass) + +Closes a ninth external review (5 High + Mediums). **260 passing + 3 env-gated skipped.** + +High: +- **Approval can't drift from the reviewed run.** `/api/runs/{id}/approve` now requires the stored run to + still **reproduce** under the current runtime (`RunReplay.Reproduce(...).Reproduced`) before applying + approvals — if code/bundle behavior drifted since review, it returns 409. `/challenges` mints from the + run's **own signed policy decisions** (not a fresh re-run), so the approval queue matches the reviewed graph. +- **Approval challenges are single-use.** A `NonceLedger` consumes each challenge's nonce on success + (web `/approve` and the MCP proxy), so the same challenge can't trigger repeated side effects within its TTL. +- **Private-note drafts are blocked at the gate.** A `DraftEmailAction` whose body sources include a + private note is refused (`pol-draft-private-ref`) **before** the adapter dereferences private content — + not merely flagged as a postcondition after the fact. +- **Concurrent prune can't lose audit history.** `Archive` is serialized and never deletes an existing + archive destination (a content-addressed run already there is the same run); racing prunes drop the + redundant live copy instead. Temp files use unique names so a save racing a prune can't clobber staging. +- **Production startup guards are regression-tested.** A test hosts the app in **Production** and asserts + it refuses to start without a real auth boundary — so the guards can't silently stop firing. + +Medium: +- **Rotation-aware sidecar verification** — owner/external-call signatures verify under the *recorded* + key id (resolved via the provider), so key rotation doesn't invalidate older signed sidecars. +- **Docker healthcheck uses `curl`** (installed in the image) — the base aspnet image ships no `wget`/`curl`, + so the prior probe never worked. +- **Live-Anthropic test fails (not passes) on a dead transport** — it now asserts a non-empty bounded + proposal, so a swallowed transport error can't go green. +- **CI guards against a zero-match FS-E2E filter** (a renamed test would otherwise pass vacuously). +- Stale README version snippet replaced with a non-drifting reference. + +> Known constraints (documented, not regressions): an `McpStdioClient` wraps one server subprocess and is +> not safe to share across threads (use one client per connection); tenant-wide run visibility is the +> chosen model; NuGet cryptographic signing is wired in CI but needs a certificate. + ## v1.15.0 — Container startup, audit binding & production-auth hardening (eighth review pass) Closes an eighth external review (3 High + 6 Medium). **256 passing + 3 env-gated skipped.** diff --git a/Directory.Build.props b/Directory.Build.props index 46659fe..7014633 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -5,7 +5,7 @@ Demos, tools, the web host, the E2E/bench runners, the template, and tests stay non-packable. --> - 1.15.0 + 1.16.0 Chad Sandor wyckit IntentMesh @@ -18,7 +18,7 @@ MIT false - v1.15.0 — eighth review pass: embedded-bundle fallback for containers, signed exact MCP payload + signed owner + distinct principal, 128-bit run ids + collision-fail, retention enforcement, untrusted-GET hint can't suppress confirmation, auth-key!=audit-key + proxy-secret strength + XFF-last, opt-in NuGet signing, doc drift. See CHANGELOG.md. + v1.16.0 — ninth review pass: reproduce-before-approve, single-use approval challenges, private-note draft block at the gate, collision-safe archive, production-startup regression test, rotation-aware sidecar verify, unique temp names, curl healthcheck, fail-not-pass live-LLM test, CI zero-match guard. See CHANGELOG.md. true diff --git a/README.md b/README.md index 7b84c07..6b058c7 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ dotnet run --project src/IntentMesh.Cli -- --trace "plan my Friday and draft Sar dotnet run --project src/IntentMesh.Web # then open the printed localhost URL # tests -dotnet test IntentMesh.slnx # 256 passing (+3 env-gated skipped) +dotnet test IntentMesh.slnx # 260 passing (+3 env-gated skipped) ``` ### Wrap your own agent (the SDK on-ramp) @@ -190,7 +190,7 @@ v1.7 platform:** Phase 1 (clarity) ✓ · Phase 2 (signed artifacts, replay, con Phase 3 (Control Room v1) ✓ · Phase 4 (IntentBench 25/25) ✓ · Phase 5 (SDK + MCP proxy / OpenAPI import / real-adapter example) ✓ · Phase 6 (manifesto, whitepaper, landing) ✓. **v1.7** adds the adoptable platform surface (full-lifecycle SDK + host template, real-LLM-proposer hardening, -operator workflow, audit operations). **256 passing (+3 env-gated skipped) tests · IntentBench 25/25 · TLM 7/7.** +operator workflow, audit operations). **260 passing (+3 env-gated skipped) tests · IntentBench 25/25 · TLM 7/7.** **Proven vs. experimental vs. future (claims discipline).** [docs/MATURITY.md](docs/MATURITY.md) is the canonical statement: every *proven* claim has a passing test that would fail if it stopped being @@ -206,8 +206,8 @@ and the [CHANGELOG](CHANGELOG.md). ## Status -Research prototype with a production-shaped core, **v1.15.0**. Symbolic layer: 7 TLMs, ~125 concepts, -7/7 round-trip verify; typed action contracts across four domains. **xUnit 256 passing (+3 env-gated skipped).** Five demo +Research prototype with a production-shaped core, **v1.16.0**. Symbolic layer: 7 TLMs, ~125 concepts, +7/7 round-trip verify; typed action contracts across four domains. **xUnit 260 passing (+3 env-gated skipped).** Five demo scenarios. See [docs/MATURITY.md](docs/MATURITY.md) for the proven / experimental / future breakdown. Delivered beyond v0.1: @@ -266,7 +266,7 @@ Conventions follow PassGen: .NET 10, nullable + implicit usings, file-scoped nam **Build & SDK.** Requires the **.NET 10 SDK** (10.0.2xx), pinned via [`global.json`](global.json) (`rollForward: latestFeature`) for reproducible builds. The three libraries — `IntentMesh.Tlm`, `IntentMesh.Core`, `IntentMesh.Integrations` — are packable (`dotnet pack -c Release`, -versioned at 1.8.0 with NuGet READMEs); demos, the web host, tools, and tests are not. Publishing to +versioned from `Directory.Build.props` with NuGet READMEs); demos, the web host, tools, and tests are not. Publishing to nuget.org is a future decision — `dotnet pack` produces valid local packages today. ## License diff --git a/docs/MATURITY.md b/docs/MATURITY.md index 859aa07..6932443 100644 --- a/docs/MATURITY.md +++ b/docs/MATURITY.md @@ -2,7 +2,7 @@ The single source of truth for **what is production-ready, what is experimental, and what is future work.** Every "proven" claim below is backed by a test that would fail if the claim stopped being -true (`dotnet test IntentMesh.slnx` — **256 passing, 3 env-gated skipped**). Nothing here is aspirational unless it says so. +true (`dotnet test IntentMesh.slnx` — **260 passing, 3 env-gated skipped**). Nothing here is aspirational unless it says so. > IntentMesh is a **research prototype with a production-shaped core**: the security kernel and its > guarantees are proven and stable; the *operational backends* around it (KMS, DB persistence, diff --git a/src/IntentMesh.Core/Auth.cs b/src/IntentMesh.Core/Auth.cs index 06da50d..403072f 100644 --- a/src/IntentMesh.Core/Auth.cs +++ b/src/IntentMesh.Core/Auth.cs @@ -144,8 +144,11 @@ public string Mint(string runId, string nodeId, string tenantId, long issuedAtUn } /// Verify a presented challenge against the run+tenant the caller is acting on. Returns the - /// attested node id only when the signature, type, run, tenant, and expiry all hold. - public bool TryVerify(string? token, string expectedRunId, string expectedTenant, long nowUnix, out string nodeId) + /// attested node id only when the signature, type, run, tenant, and expiry all hold. When a + /// is supplied the challenge is SINGLE-USE: its nonce is consumed on success, + /// so the same token can't trigger a repeated side effect within its TTL. + public bool TryVerify(string? token, string expectedRunId, string expectedTenant, long nowUnix, + out string nodeId, NonceLedger? ledger = null) { nodeId = ""; if (!SignedToken.TryDecode(token, _key, out var json)) return false; @@ -155,11 +158,28 @@ public bool TryVerify(string? token, string expectedRunId, string expectedTenant if (p is null || p.typ != "appr" || p.exp <= nowUnix) return false; if (!string.Equals(p.run, expectedRunId, StringComparison.Ordinal)) return false; if (!string.Equals(p.ten, expectedTenant, StringComparison.Ordinal)) return false; + // Consume the nonce LAST (after every other check passes) so a single-use challenge is burned only + // when it would actually be honored. A reused token fails here. + if (ledger is not null && !ledger.TryConsume(p.jti, p.exp, nowUnix)) return false; nodeId = p.node; return true; } } +/// A thread-safe single-use ledger of challenge nonces (jti). A nonce is accepted at most once; +/// expired entries are evicted opportunistically so memory stays bounded to live (unexpired) challenges. +public sealed class NonceLedger +{ + private readonly System.Collections.Concurrent.ConcurrentDictionary _seen = new(); + + public bool TryConsume(string jti, long expiresAtUnix, long nowUnix) + { + foreach (var kv in _seen) // evict expired (bounds memory to active challenges) + if (kv.Value <= nowUnix) _seen.TryRemove(kv.Key, out _); + return _seen.TryAdd(jti, expiresAtUnix); // false if this nonce was already consumed + } +} + /// Maps verified reverse-proxy / OIDC headers to a principal. The TRUST decision (is this /// request actually from the configured proxy hop?) is made by the host before calling this; here we /// only validate and shape the asserted identity. diff --git a/src/IntentMesh.Core/PolicyGate.cs b/src/IntentMesh.Core/PolicyGate.cs index 1dae9e1..108f3c7 100644 --- a/src/IntentMesh.Core/PolicyGate.cs +++ b/src/IntentMesh.Core/PolicyGate.cs @@ -177,6 +177,15 @@ public PolicyDecision Evaluate(IntentNode node, PolicyContext ctx) if (node.Type == Kinds.DraftEmail && IsEmail(node.Action, out var to)) { + // Block BEFORE the adapter dereferences private note bodies into the draft: a draft whose body + // sources include a PRIVATE note is refused at the gate (not merely flagged as a postcondition + // failure after the private content was already pulled into the message). + if (node.Action is DraftEmailAction d + && d.BodySourceRefs.Any(r => ctx.Workspace.Notes.Any(n => n.Id == r && n.Private))) + return new PolicyDecision(node.Id, Decision.Block, risk, + "Draft references a private note — blocked before any private content is dereferenced into the message.", + new[] { "pol-draft-private-ref" }, false, trust, sensitive, false, false); + // Check the recipient AT THE GATE, before the draft is created — not only as a postcondition. // A draft to a recipient the user never named is gated for confirmation, even from a // full-authority proposer (which could invent one). diff --git a/src/IntentMesh.Core/RunArtifactStore.cs b/src/IntentMesh.Core/RunArtifactStore.cs index faf117e..ba029d6 100644 --- a/src/IntentMesh.Core/RunArtifactStore.cs +++ b/src/IntentMesh.Core/RunArtifactStore.cs @@ -109,7 +109,9 @@ public string Save(TraceBundle bundle) /// signature-failing artifact. private static void WriteAtomic(string path, string content) { - var tmp = path + ".tmp"; + // Unique temp name per write so concurrent writers (e.g. a save racing a prune) can't clobber each + // other's staging file — each stages to its own temp, then atomically renames into place. + var tmp = path + "." + Guid.NewGuid().ToString("N") + ".tmp"; File.WriteAllText(tmp, content); File.Move(tmp, path, overwrite: true); } @@ -188,7 +190,11 @@ public void RecordOwner(string runId, RunOwner owner, IAuditKeyProvider? signer if (verifier is not null) { if (string.IsNullOrEmpty(owner.Signature)) return null; - var expected = AuditSigner.SignString(OwnerCanonical(owner), verifier); + // Verify under the key id the record was SIGNED with (resolved via the rotation-aware provider), + // not the provider's current key — so rotation doesn't invalidate older signed sidecars. + var key = AuditSigner.ResolveKey(owner.KeyId ?? verifier.KeyId, verifier); + if (key is null) return null; + var expected = AuditSigner.SignString(OwnerCanonical(owner), key); if (!CryptographicOperations.FixedTimeEquals(Encoding.UTF8.GetBytes(expected), Encoding.UTF8.GetBytes(owner.Signature))) return null; } @@ -218,7 +224,10 @@ public void RecordExternalCall(string runId, string canonicalPayload, IAuditKeyP if (rec is null) return null; if (verifier is not null) { - var expected = AuditSigner.SignString(rec.Payload, verifier); + // Verify under the recorded key id (rotation-aware), not the provider's current key. + var key = AuditSigner.ResolveKey(rec.KeyId, verifier); + if (key is null) return null; + var expected = AuditSigner.SignString(rec.Payload, key); if (!CryptographicOperations.FixedTimeEquals(Encoding.UTF8.GetBytes(expected), Encoding.UTF8.GetBytes(rec.Signature))) return null; } @@ -255,15 +264,23 @@ public IReadOnlyList ListSummaries() /// Move a run's artifacts to a .archive/ subdirectory (preserving verifiability), /// so retention doesn't destroy the audit trail. + private static readonly object _archiveLock = new(); public void Archive(string runId) { - var src = RunDir(runId); - if (!Directory.Exists(src)) return; - var archiveRoot = Path.Combine(_root, ".archive"); - Directory.CreateDirectory(archiveRoot); - var dest = Path.Combine(archiveRoot, runId); - if (Directory.Exists(dest)) Directory.Delete(dest, true); - Directory.Move(src, dest); + // Serialize archiving (across store instances in the process) and NEVER delete an existing archive + // destination: a run is content-addressed, so a dest with the same id IS the same run. Racing + // prunes therefore drop the redundant live copy rather than risk deleting the only archived copy. + lock (_archiveLock) + { + var src = RunDir(runId); + if (!Directory.Exists(src)) return; + var archiveRoot = Path.Combine(_root, ".archive"); + Directory.CreateDirectory(archiveRoot); + var dest = Path.Combine(archiveRoot, runId); + if (Directory.Exists(dest)) { Directory.Delete(src, true); return; } // already archived — drop the live dup + try { Directory.Move(src, dest); } + catch (IOException) when (Directory.Exists(dest)) { if (Directory.Exists(src)) Directory.Delete(src, true); } + } } /// Retention: keep the most-recent runs live and archive diff --git a/src/IntentMesh.Integrations/McpProxy.cs b/src/IntentMesh.Integrations/McpProxy.cs index 01486e8..5692dc2 100644 --- a/src/IntentMesh.Integrations/McpProxy.cs +++ b/src/IntentMesh.Integrations/McpProxy.cs @@ -112,6 +112,7 @@ public sealed class McpProxy private readonly ApprovalChallengeService? _approvalService; private readonly string _tenantId; private readonly string _principalId; + private readonly NonceLedger _approvalLedger = new(); // approval challenges are single-use within TTL /// /// A loaded IntentMeshRuntime. The caller controls which capabilities are @@ -249,7 +250,7 @@ public McpGateResult Gate(McpToolCall call, IReadOnlySet? approvals = nu var fingerprint = CallFingerprint(call); var verified = new HashSet(StringComparer.OrdinalIgnoreCase); foreach (var token in effectiveApprovals) - if (_approvalService.TryVerify(token, fingerprint, _tenantId, now, out var approvedNode)) + if (_approvalService.TryVerify(token, fingerprint, _tenantId, now, out var approvedNode, _approvalLedger)) verified.Add(approvedNode); effectiveApprovals = verified; } diff --git a/src/IntentMesh.Web/Dockerfile b/src/IntentMesh.Web/Dockerfile index a962c10..8f5d3c5 100644 --- a/src/IntentMesh.Web/Dockerfile +++ b/src/IntentMesh.Web/Dockerfile @@ -21,7 +21,12 @@ FROM mcr.microsoft.com/dotnet/aspnet@sha256:ddcf70ad1ab963a4fcd41fbd722a6b660e40 WORKDIR /app # Run as a non-root user, and pre-create the runs volume mount point owned by that user so persistence # works with a named volume (the app writes signed bundles + a /readyz write-probe there). -RUN useradd -u 10001 -m app \ +# Non-root user + the runs volume mount point (owned by that user), and curl for the HEALTHCHECK — the +# base aspnet image ships neither wget nor curl, so the probe tool must be installed explicitly. +RUN apt-get update \ + && apt-get install -y --no-install-recommends curl \ + && rm -rf /var/lib/apt/lists/* \ + && useradd -u 10001 -m app \ && mkdir -p /data/runs \ && chown -R 10001:10001 /data/runs COPY --from=build /app . @@ -39,6 +44,6 @@ VOLUME ["/data/runs"] # docker run -e INTENTMESH_AUDIT_KEY==16 bytes> -e INTENTMESH_AUTH_KEY==16 bytes> \ # -e INTENTMESH_PRINCIPALS=/run/secrets/principals.json -e "AllowedHosts=mesh.example.com" \ # -v intentmesh-runs:/data/runs -p 8080:8080 intentmesh-controlroom -HEALTHCHECK --interval=30s --timeout=3s --retries=3 CMD wget -qO- http://localhost:8080/readyz || exit 1 +HEALTHCHECK --interval=30s --timeout=3s --retries=3 CMD curl -fsS http://localhost:8080/readyz || exit 1 ENTRYPOINT ["dotnet", "IntentMesh.Web.dll"] diff --git a/src/IntentMesh.Web/Program.cs b/src/IntentMesh.Web/Program.cs index b6ae675..72ef18b 100644 --- a/src/IntentMesh.Web/Program.cs +++ b/src/IntentMesh.Web/Program.cs @@ -98,6 +98,7 @@ var effectiveAuthKey = authKeyRaw is not null ? AuthKeys.Parse(authKeyRaw) : keyProvider.GetKey(); var tokenSvc = new AuthTokenService(effectiveAuthKey); var challengeSvc = new ApprovalChallengeService(effectiveAuthKey); +var approvalLedger = new NonceLedger(); // makes each approval challenge SINGLE-USE within its TTL var principals = PrincipalStore.FromEnvironment(); var webToken = Environment.GetEnvironmentVariable("INTENTMESH_WEB_TOKEN"); // legacy single-token (default tenant) var tokenMode = principals.Count > 0; @@ -446,15 +447,15 @@ try { saved = StoreFor(p.TenantId).Load(id); } catch (Exception ex) when (ex is FileNotFoundException or ArgumentException) { return Results.NotFound(new { error = $"no run '{id}'" }); } - // Trust the stored run BEFORE re-running it: a tampered bundle must not become input to a freshly - // signed approved run. (Mirrors RunReplay, which verifies before re-execution.) if (!TraceBundleBuilder.VerifySignature(saved, keyProvider)) return Results.Json(new { error = "stored run failed integrity verification" }, statusCode: 409); - var res = runtime.Run(saved.Prompt, Workspace.CreateDemo(), new HashSet()); + // Mint challenges from the REVIEWED run's OWN signed policy decisions — not a fresh re-run — so the + // approval queue can't drift from the graph that was actually reviewed (the signed bundle is the + // source of truth). var now = NowUnix(); var exp = now + ChallengeTtlSeconds; - var challenges = res.Policy.Where(x => x.RequiresConfirmation).SelectMany(x => + var challenges = saved.PolicyDecisions.Decisions.Where(x => x.RequiresConfirmation).SelectMany(x => // No refs → one challenge for the bare node; per-file delete → one challenge per "node#ref". (x.ApprovalRefs.Count == 0 ? new[] { (unit: x.NodeId, fileRef: (string?)null) } : x.ApprovalRefs.Select(r => (unit: $"{x.NodeId}#{r}", fileRef: (string?)r)).ToArray()) @@ -482,15 +483,19 @@ try { saved = store.Load(id); } catch (Exception ex) when (ex is FileNotFoundException or ArgumentException) { return Results.NotFound(new { error = $"no run '{id}'" }); } - // Trust the stored run BEFORE re-running it under approval — a tampered bundle must never become - // input to a newly signed approved run. - if (!TraceBundleBuilder.VerifySignature(saved, keyProvider)) + // Bind approval to the EXACT reviewed graph: require the stored run to still REPRODUCE under the + // current runtime (signature verifies AND a deterministic re-run is byte-identical). If code/bundle + // behavior drifted since review, the approved graph could differ from the reviewed one — refuse. + var replay = RunReplay.Reproduce(runtime, Workspace.CreateDemo(), saved, keyProvider); + if (!replay.SignatureVerified) return Results.Json(new { error = "stored run failed integrity verification" }, statusCode: 409); + if (!replay.Reproduced) + return Results.Json(new { error = "stored run no longer reproduces under the current runtime (behavior drift) — re-create the run before approving" }, statusCode: 409); var now = NowUnix(); var approved = new HashSet(StringComparer.OrdinalIgnoreCase); foreach (var token in req.Challenges ?? Array.Empty()) - if (challengeSvc.TryVerify(token, id, p.TenantId, now, out var unit)) + if (challengeSvc.TryVerify(token, id, p.TenantId, now, out var unit, approvalLedger)) // single-use approved.Add(unit); // unit is a bare node id, or "node#fileRef" for a per-file delete if (approved.Count == 0) return Results.Json(new { error = "no valid approval challenge presented" }, statusCode: 400); diff --git a/tests/IntentMesh.Tests/AuthTests.cs b/tests/IntentMesh.Tests/AuthTests.cs index 3ae4aa8..98ec726 100644 --- a/tests/IntentMesh.Tests/AuthTests.cs +++ b/tests/IntentMesh.Tests/AuthTests.cs @@ -127,6 +127,21 @@ public void Approval_challenge_verifies_only_for_its_own_run_and_tenant() Assert.False(svc.TryVerify(challenge, "runA", "acme", Now + 301, out _)); // expired } + [Fact] + public void An_approval_challenge_is_single_use_with_a_ledger() + { + var svc = new ApprovalChallengeService(Key); + var ledger = new NonceLedger(); + var token = svc.Mint("runA", "n1", "acme", Now, Now + 300, "nonce-x"); + + Assert.True(svc.TryVerify(token, "runA", "acme", Now + 10, out _, ledger)); // first use consumes it + Assert.False(svc.TryVerify(token, "runA", "acme", Now + 10, out _, ledger)); // reuse within TTL is rejected + + // Without a ledger the call is stateless (back-compat) — verification alone doesn't consume. + Assert.True(svc.TryVerify(token, "runA", "acme", Now + 10, out _)); + Assert.True(svc.TryVerify(token, "runA", "acme", Now + 10, out _)); + } + [Fact] public void Trusted_proxy_headers_map_to_a_principal_and_reject_invalid_ids() { diff --git a/tests/IntentMesh.Tests/LlmProposerTests.cs b/tests/IntentMesh.Tests/LlmProposerTests.cs index 597de8c..567b085 100644 --- a/tests/IntentMesh.Tests/LlmProposerTests.cs +++ b/tests/IntentMesh.Tests/LlmProposerTests.cs @@ -218,6 +218,38 @@ public void A_proposer_invented_recipient_not_in_the_prompt_is_gated_before_crea /// Real Anthropic call — env-gated (ANTHROPIC_API_KEY). Proves the AnthropicLlmClient transport /// is wired; deterministic logic is covered by the scripted tests above. /// + private sealed class SingleNodeProposer : IIntentProposer + { + private readonly IntentNode _n; + public SingleNodeProposer(IntentNode n) => _n = n; + public ProposedPlan Propose(string prompt, Workspace ws) => + new(new[] { _n }, Array.Empty(), Array.Empty()); + } + + [Fact] + public void A_draft_referencing_a_private_note_is_blocked_before_dereferencing() + { + // A proposer (LLM or full-authority) that sources a draft body from a PRIVATE note must be blocked + // AT THE GATE — before the adapter pulls the private content into the message. + var node = new IntentNode + { + Id = "n1", + Type = Kinds.DraftEmail, + Label = "draft to Sarah", + Action = new DraftEmailAction("Sarah", "Update", new[] { "note-strategy" }), // note-strategy is Private + SourceText = "draft", + TrustSource = TrustSource.User, + Status = NodeStatus.Resolved, + }; + var ws = Workspace.CreateDemo(); + var r = IntentMeshRuntime.Load().RunWith(new SingleNodeProposer(node), "Draft Sarah from my notes.", ws, new HashSet()); + + var view = r.Nodes.First(n => n.Id == "n1"); + Assert.Equal("Blocked", view.Status); + Assert.Contains("pol-draft-private-ref", r.Policy.First(p => p.NodeId == "n1").TriggeredRules); + Assert.Empty(ws.Drafts); // the private note was never dereferenced into a draft + } + [SkippableFact] public void LlmProposer_against_the_real_api_when_configured() { @@ -225,6 +257,9 @@ public void LlmProposer_against_the_real_api_when_configured() if (client is null) { Skip.If(true, "ANTHROPIC_API_KEY not set — real-API test skipped"); return; } var bundle = Bundle(); var plan = new LlmIntentProposer(bundle, client).Propose("Read my calendar for Friday.", Workspace.CreateDemo()); - Assert.NotNull(plan); // a real model should propose something registry-bounded (or nothing) without throwing + // When the key IS configured this must prove the REAL transport worked: a clear "read my calendar" + // prompt yields at least one registry-bounded node. A swallowed transport failure returns an empty + // plan, which now FAILS here rather than passing — the test can't go green on a dead transport. + Assert.NotEmpty(plan.Nodes); } } diff --git a/tests/IntentMesh.Tests/WebAuthzTests.cs b/tests/IntentMesh.Tests/WebAuthzTests.cs index 8a2885f..f758bb3 100644 --- a/tests/IntentMesh.Tests/WebAuthzTests.cs +++ b/tests/IntentMesh.Tests/WebAuthzTests.cs @@ -194,6 +194,29 @@ public async Task Caller_asserted_approvals_are_ignored_and_only_a_server_issued finally { Cleanup(f, runs); } } + [Fact] + public async Task A_used_approval_challenge_cannot_be_replayed() + { + var (f, runs) = MakeTokenMode(); + try + { + var alice = await ClientFor(f, AliceKey); + var id = (await alice.PostAsJsonAsync("/api/run", new { prompt = Demo1 })) + .Headers.GetValues("X-Run-Id").First(); + var challenges = (await (await alice.PostAsync($"/api/runs/{id}/challenges", null)) + .Content.ReadFromJsonAsync())!; + var tokens = challenges.challenges.Select(x => x.challenge).ToArray(); + + // First approval consumes the single-use challenges. + (await alice.PostAsJsonAsync($"/api/runs/{id}/approve", new { challenges = tokens })).EnsureSuccessStatusCode(); + + // Replaying the SAME challenges must not trigger a second approved run. + var replay = await alice.PostAsJsonAsync($"/api/runs/{id}/approve", new { challenges = tokens }); + Assert.Equal(HttpStatusCode.BadRequest, replay.StatusCode); + } + finally { Cleanup(f, runs); } + } + [Fact] public async Task The_approver_role_is_required_to_approve() { diff --git a/tests/IntentMesh.Tests/WebTests.cs b/tests/IntentMesh.Tests/WebTests.cs index 46e04a6..3c658b1 100644 --- a/tests/IntentMesh.Tests/WebTests.cs +++ b/tests/IntentMesh.Tests/WebTests.cs @@ -74,6 +74,34 @@ public async Task Health_and_readiness_endpoints_respond() finally { Cleanup(f, runs); } } + [Fact] + public void Production_refuses_to_start_without_a_real_auth_boundary() + { + // Real audit key (so the demo-key guard passes) but NO auth configured → the Production auth guard + // must refuse startup. This regression-tests the production guards, which Development-hosted tests + // never exercise: the early return surfaces as a failure to start the test host. + Environment.SetEnvironmentVariable("INTENTMESH_AUDIT_KEY", Convert.ToBase64String(new byte[32])); + Environment.SetEnvironmentVariable("INTENTMESH_PRINCIPALS", null); + Environment.SetEnvironmentVariable("INTENTMESH_WEB_TOKEN", null); + Environment.SetEnvironmentVariable("INTENTMESH_TRUSTED_PROXY", null); + Environment.SetEnvironmentVariable("INTENTMESH_ALLOW_INSECURE_AUTH", null); + var runsDir = Path.Combine(Path.GetTempPath(), "im-prod-" + Guid.NewGuid().ToString("N")); + Environment.SetEnvironmentVariable("INTENTMESH_RUNS_DIR", runsDir); + var factory = new WebApplicationFactory().WithWebHostBuilder(b => b.UseEnvironment("Production")); + try + { + Assert.ThrowsAny(() => factory.CreateClient()); + } + finally + { + factory.Dispose(); + foreach (var v in new[] { "INTENTMESH_AUDIT_KEY", "INTENTMESH_PRINCIPALS", "INTENTMESH_WEB_TOKEN", + "INTENTMESH_TRUSTED_PROXY", "INTENTMESH_ALLOW_INSECURE_AUTH", "INTENTMESH_RUNS_DIR" }) + Environment.SetEnvironmentVariable(v, null); + try { Directory.Delete(runsDir, recursive: true); } catch { /* best effort */ } + } + } + [Fact] public async Task Security_headers_are_present_on_responses() {