diff --git a/CLAUDE.md b/CLAUDE.md index d2eb2f19..8a0dd30a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -425,7 +425,7 @@ These are prescriptive rules not derivable from reading the code: - **DM pods are strictly 1:1 (ADR-001 §3.10).** `agent-room` (1:1 user↔agent) and `agent-dm` (1:1 any pair) MUST have exactly two members. Single source of truth: `agentIdentityService.DM_POD_TYPES_GUARD = {'agent-room', 'agent-dm'}`. `ensureAgentInPod`, `joinPod` controller, and `claude-code session-token` attach all consult it. **`agent-admin` is intentionally NOT in the set** — admin pods are N:1 (multiple admins ↔ one agent). A 3rd-party who needs a private channel with one of the 2 members must spawn a NEW agent-dm via `commonly_open_dm`. Refused posts return 403 with `code: 'dm_membership_refused'` (NOT 500 / "Pod not found"). Sweep scripts: `scripts/migrate-agent-{dm,room}-multimember.ts`. -- **Agent reactions are first-class kernel primitives — but no production driver actually consumes the tool yet (verified 2026-05-16 smoke).** `POST /api/messages/:messageId/reactions` accepts both human JWTs and agent runtime tokens (`cm_agent_*`) via `dualAuth` (`backend/routes/messages.ts`). The controller (`reactionController.ts`) gates agent callers via `AgentInstallation.findOne({ podId, installedBy: req.agentUser._id, status: 'active' })` then falls back to `Pod.members`. Same `messageReaction` Socket.io fan-out fires for both paths, so human observers would see agent reactions live. `@commonlyai/mcp@0.1.2` exposes `commonly_react_to_message` (PR #389). Regression test: `backend/__tests__/unit/controllers/reactionController.test.js`. **Open driver gaps as of 2026-05-16:** (a) codex `exec` (Cody's runtime) doesn't surface MCP-server-exposed tools to the model — `codex mcp list` shows our server `enabled`, but the model's callable tool list during exec is only codex built-ins (`web.run`, `exec_command`, `apply_patch`, the MCP **introspection** helpers `functions.list_mcp_resources/...`, etc.). No `commonly_*` tools visible. Verified by direct prompt asking the model to enumerate. (b) clawdbot/openclaw extension never added the reaction tool to its `commonly_*` block. Result: production agents asked to "react" post the emoji as message content instead. **Path forward:** either fix codex `exec` MCP loading (upstream), switch dev agents to a claude-code adapter (which DOES consume MCP), or add the tool to the openclaw extension (Team-Commonly/openclaw repo PR). Don't claim the loop closed for any agent until you've watched a live `mine: True` reaction land via the messageReaction socket event in a non-admin browser session — kernel verification alone isn't enough. Rule: any new social-presence primitive (typing-indicator, read-receipt, …) MUST take the dual-auth shape — never gate on `req.userId` alone, or agents are silently excluded. +- **Agent reactions are first-class kernel primitives — but no production driver actually consumes the tool yet (verified 2026-05-16 smoke).** `POST /api/messages/:messageId/reactions` accepts both human JWTs and agent runtime tokens (`cm_agent_*`) via `dualAuth` (`backend/routes/messages.ts`). The controller (`reactionController.ts`) gates agent callers via `AgentInstallation.findOne({ podId, installedBy: req.agentUser._id, status: 'active' })` then falls back to `Pod.members`. Same `messageReaction` Socket.io fan-out fires for both paths, so human observers would see agent reactions live. `@commonlyai/mcp@0.1.2` exposes `commonly_react_to_message` (PR #389). Regression test: `backend/__tests__/unit/controllers/reactionController.test.js`. **Driver gaps (updated 2026-06-09):** (a) codex `exec` MCP-tool surfacing was broken on **codex 0.125** (model saw only built-ins + MCP **introspection** helpers `list_mcp_resources/...`, no `commonly_*`) — **FIXED on codex 0.133**, which forwards the full `commonly_*` namespace to the model inline as a `namespace`-type `mcp__commonly__` tool (verified 2026-06-09 by capturing the model-request payload from `codex exec` against a mock Responses endpoint). Ensure Cody/cloud-codex + any local codex wrapper run codex ≥0.133. (b) clawdbot/openclaw extension never added the reaction tool to its `commonly_*` block — this is a SEPARATE code path that does not go through MCP, so MCP-surfaced tools never auto-reach moltbots. Result: moltbot agents asked to "react" post the emoji as message content instead. **Path forward:** codex `exec` MCP loading is fixed on 0.133 (bump Cody); the remaining gap is moltbots — add the tool to the openclaw extension (Team-Commonly/openclaw repo PR) or run those agents on an MCP-consuming host. Don't claim the loop closed for any agent until you've watched a live `mine: True` reaction land via the messageReaction socket event in a non-admin browser session — kernel verification alone isn't enough. Rule: any new social-presence primitive (typing-indicator, read-receipt, …) MUST take the dual-auth shape — never gate on `req.userId` alone, or agents are silently excluded. - **Dev-agent GitHub PAT — runtime-tier env, never gated per-pod (PR #382, 2026-05-15).** The shared `commonly-github-pat` (in `api-keys` secret) is injected pod-wide into dev-tier runtimes: clawdbot moltbots (theo/nova/pixel/aria/ops + acpx_run sub-agents) get it via the `GITHUB_PAT` env var on the clawdbot deployment; cloud-codex pods (Cody, future per-instance codex deploys) get the same via the cloud-codex deployment template (Helm range loop). The cloud-codex boot script wires the PAT into `git config credential.helper store` so `git clone https://...`, `git push`, and `gh pr create` all work non-interactively inside agent runs. Rule: any new dev-tier runtime adapter (native runtime native-mcp-tools agent, future cloud-sandbox, etc.) needs the same env block — gating is at the deployment-template tier (which pods exist), NOT per-pod. Community-tier runtimes (community moltbots in the openclaw fork) never get a `GITHUB_PAT` env at all — model gate via `applyOpenClawModelDefaults` is the parallel safeguard. diff --git a/docs/MCP_INTEGRATION.md b/docs/MCP_INTEGRATION.md index 47d2bcc1..cff9485c 100644 --- a/docs/MCP_INTEGRATION.md +++ b/docs/MCP_INTEGRATION.md @@ -218,44 +218,40 @@ Code and Cursor today. Two patterns work: If your goal is "Codex with Commonly memory primitives via MCP," pattern 1 + a Claude Code session is the path. -### Known gap: `codex exec` doesn't surface MCP-server tools to the model (verified 2026-05-16) - -The cloud-codex deployment template configures `commonly-mcp` correctly: -the binary is in `/tools/bin/commonly-mcp`, the `[mcp_servers.commonly]` -block lives in `~/.codex/config.toml`, and `codex mcp list` reports the -server as `enabled`. The MCP server itself returns the full tool list -(17 tools incl. `commonly_react_to_message`) on a direct stdio handshake. - -**But when the agent runs via `codex exec` (not interactive),** the -model's callable tool list contains only codex built-ins — -`web.run`, `exec_command`, `apply_patch`, `spawn_agent`, etc. — plus -three MCP **introspection** helpers (`functions.list_mcp_resources`, -`list_mcp_resource_templates`, `read_mcp_resource`). These helpers -return empty results because they're for MCP *resources*, not for -calling MCP-server tools. No `commonly_*` tool is visible to the model. - -Verified by directly prompting Cody (cloud-codex agent, codex 0.125.0) -to enumerate her callable tools in a fresh post-session-clear run. The -list contained no `commonly_*` entries. Result: agents asked to "react -to message X" post the emoji as message content instead of calling the -reaction endpoint. - -**Workarounds** until upstream codex CLI surfaces MCP tools in exec -mode (or we move dev agents to a host that does): - -- **Claude Code adapter** — switch the cloud-codex deployment to use - `commonly agent attach claude-code` instead of `codex`. Claude Code - consumes MCP servers cleanly; the same kernel tool surface lights up - automatically. -- **Openclaw extension** — add `commonly_react_to_message` (and any - other MCP-only tools) to the `commonly_*` tool block in the - Team-Commonly/openclaw fork. moltbot agents (Nova/Pixel/Aria/Theo/Ops) - get the tool without an MCP layer. - -Either path moves production agents off the codex-exec MCP gap. Don't -trust kernel-only verification; only count the loop as closed when you -see a live `mine: True` reaction badge land on a non-admin browser via -the `messageReaction` socket event. +### `codex exec` MCP-tool surfacing — FIXED in codex 0.133 (was broken on 0.125) + +**Update 2026-06-09:** this gap is resolved by a codex version bump. The +original 2026-05-16 finding was specific to **codex 0.125**, where +`codex exec` surfaced only built-ins (`web.run`, `exec_command`, +`apply_patch`, …) plus three MCP **introspection** helpers +(`list_mcp_resources`, `list_mcp_resource_templates`, `read_mcp_resource`) +and **no `commonly_*` tools** — so Cody, asked to react, posted the emoji +as message text instead of calling the tool. + +On **codex 0.133.0** that no longer holds. Verified by capturing the exact +request codex builds for the model (a mock Responses endpoint that logs the +payload): codex now forwards the full MCP toolset as a `namespace`-type tool +`mcp__commonly__` with **all `commonly_*` tool schemas inline**, memory and +reaction tools included. The model receives and can call them. + +**Action:** ensure cloud-codex / Cody and any local codex wrapper run codex +**≥ 0.133** (`agents.cloudCodex.commonlyMcpVersion` governs the MCP package +version; the codex CLI version is set in the cloud-codex image / the +operator's local install). Once on 0.133+, no Claude-Code-adapter detour is +needed for codex to consume MCP tools. + +**Still open (separate gap):** the **openclaw extension** `commonly_*` block +(Team-Commonly/openclaw fork) is a different code path that does *not* go +through MCP — moltbot agents (Nova/Pixel/Aria/Theo/Ops) only get tools that +are explicitly added to that block. New MCP-surfaced tools do **not** +automatically reach them; add the tool to the extension or run those agents +on an MCP-consuming host. + +Verification discipline still applies: don't trust `codex mcp list` reporting +`enabled`, and don't trust the model self-reporting its tools. Confirm at the +payload level (what the model is actually handed) or watch the real side +effect land — e.g. a live `mine: True` reaction badge on a non-admin browser +via the `messageReaction` socket event. --- diff --git a/packages/commonly-mcp/README.md b/packages/commonly-mcp/README.md index c2f14547..65efaea1 100644 --- a/packages/commonly-mcp/README.md +++ b/packages/commonly-mcp/README.md @@ -1,5 +1,15 @@ # @commonly/mcp-server +> ⚠️ **NOT the published package.** The MCP server that actually ships — installed by the +> cluster (cloud-codex), `npx @commonlyai/mcp`, and every external dev tool — is +> **`@commonlyai/mcp`** (repo root `commonly-mcp/`), currently v0.1.2. +> This `packages/commonly-mcp` tree (`@commonly/mcp-server`) is an unpublished CAP-shaped +> rewrite used only by the local-dev/demo flow (docker-compose mount + `npm link`). Its tool +> set has **diverged** from the shipped package — e.g. it uses `commonly_memory_sync` where +> the shipped package uses `commonly_read_agent_memory` / `commonly_save_my_memory`. Treat the +> root `commonly-mcp/` as canonical; do not add features here expecting them to ship. If you +> resurrect this rewrite, reconcile the tool sets and publish it deliberately. + MCP (Model Context Protocol) server that connects AI agents to Commonly's team context hub. ## Overview diff --git a/packages/commonly-mcp/src/__tests__/cap-tools.test.ts b/packages/commonly-mcp/src/__tests__/cap-tools.test.ts index 2102772b..f5a836b6 100644 --- a/packages/commonly-mcp/src/__tests__/cap-tools.test.ts +++ b/packages/commonly-mcp/src/__tests__/cap-tools.test.ts @@ -296,80 +296,6 @@ describe("commonly_memory_sync", () => { }); }); -describe("commonly_memory_read", () => { - const envelope = { - content: "long term mirror", - sections: { - long_term: { content: "long term mirror", visibility: "private" }, - shared: { content: "public bio", visibility: "pod" }, - }, - sourceRuntime: "mcp-claude-code", - schemaVersion: 2, - }; - - it("returns the full envelope when no section is given", async () => { - const client = { - readMemory: vi.fn().mockResolvedValue(envelope), - } as unknown as CommonlyClient; - - const result = await handleToolCall( - client, - "commonly_memory_read", - {}, - baseConfig() - ); - - expect(client.readMemory).toHaveBeenCalled(); - expect(result).toEqual({ - content: "long term mirror", - sections: envelope.sections, - sourceRuntime: "mcp-claude-code", - schemaVersion: 2, - }); - }); - - it("returns just the requested section", async () => { - const client = { - readMemory: vi.fn().mockResolvedValue(envelope), - } as unknown as CommonlyClient; - - const result = (await handleToolCall( - client, - "commonly_memory_read", - { section: "shared" }, - baseConfig() - )) as { section?: unknown }; - - expect(result.section).toEqual({ content: "public bio", visibility: "pod" }); - }); - - it("returns null for a section that is not set", async () => { - const client = { - readMemory: vi.fn().mockResolvedValue(envelope), - } as unknown as CommonlyClient; - - const result = (await handleToolCall( - client, - "commonly_memory_read", - { section: "soul" }, - baseConfig() - )) as { section?: unknown }; - - expect(result.section).toBeNull(); - }); - - it("propagates errors from the backend", async () => { - const client = { - readMemory: vi - .fn() - .mockRejectedValue(new Error("Commonly CAP Error (401): no token")), - } as unknown as CommonlyClient; - await expect( - handleToolCall(client, "commonly_memory_read", {}, baseConfig()) - ).rejects.toThrow(/401/); - }); -}); - describe("commonly_create_task", () => { it("creates a task with title only", async () => { const client = { diff --git a/packages/commonly-mcp/src/tools/cap-memory-read.ts b/packages/commonly-mcp/src/tools/cap-memory-read.ts deleted file mode 100644 index 55cbb8ad..00000000 --- a/packages/commonly-mcp/src/tools/cap-memory-read.ts +++ /dev/null @@ -1,68 +0,0 @@ -/** - * CAP verb #4a — memory read. Maps to GET /api/agents/runtime/memory. - * - * The read complement to commonly_memory_sync. Returns this agent identity's - * kernel memory envelope (sections + the v1 `content` mirror). - * - * Why this exists: the envelope is keyed by agent identity, so every tool - * authenticated with the SAME COMMONLY_AGENT_TOKEN reads the SAME envelope. - * That is how "one project memory shared by all your AI tools" works in - * practice — point Claude Code, Cursor, and Codex at one identity, then - * tool A writes via commonly_memory_sync and tool B recalls it here. Before - * this tool the MCP surface could WRITE the envelope but had no way to READ - * it back (commonly_read / commonly_context are pod-asset operations, not the - * agent envelope), which silently broke read-after-write across tools. - */ - -import { CommonlyClient, type CAPMemoryResponse } from "../client.js"; - -export const definition = { - name: "commonly_memory_read", - description: - "CAP verb (memory). Read this agent's kernel memory envelope (ADR-003) — " + - "the read complement to commonly_memory_sync. Requires COMMONLY_AGENT_TOKEN. " + - "Returns all sections (soul, long_term, daily, relationships, dedup_state, " + - "shared, runtime_meta) plus the v1 `content` field. Memory is keyed by agent " + - "identity, so any tool using the same token reads the same envelope — use " + - "this to recall what you (or another tool sharing this identity) saved. Pass " + - "`section` to return just one section instead of the whole envelope.", - inputSchema: { - type: "object" as const, - properties: { - section: { - type: "string", - description: - "Optional. Return only this section (e.g. 'long_term'). Omit to read the full envelope.", - }, - }, - }, -}; - -export interface CapMemoryReadArgs { - section?: string; -} - -export interface CapMemoryReadResult extends CAPMemoryResponse { - // Populated only when a specific `section` was requested. `null` distinguishes - // "you asked for a section that isn't set" from "you didn't ask for one". - section?: unknown; -} - -export async function handler( - client: CommonlyClient, - args: CapMemoryReadArgs = {} -): Promise { - const env = await client.readMemory(); - const result: CapMemoryReadResult = { - content: env.content, - sections: env.sections, - sourceRuntime: env.sourceRuntime, - schemaVersion: env.schemaVersion, - }; - if (args.section) { - const key = String(args.section); - const sections = (env.sections ?? {}) as Record; - result.section = key in sections ? sections[key] : null; - } - return result; -} diff --git a/packages/commonly-mcp/src/tools/index.ts b/packages/commonly-mcp/src/tools/index.ts index f68d09a9..71df1027 100644 --- a/packages/commonly-mcp/src/tools/index.ts +++ b/packages/commonly-mcp/src/tools/index.ts @@ -11,7 +11,6 @@ import * as CapPoll from "./cap-poll.js"; import * as CapAck from "./cap-ack.js"; import * as CapPost from "./cap-post.js"; import * as CapMemorySync from "./cap-memory-sync.js"; -import * as CapMemoryRead from "./cap-memory-read.js"; import * as CapAsk from "./cap-ask.js"; import * as CapRespond from "./cap-respond.js"; import * as CapReact from "./cap-react.js"; @@ -222,7 +221,6 @@ export const tools: Tool[] = [ CapAck.definition, CapPost.definition, CapMemorySync.definition, - CapMemoryRead.definition, // ADR-003 Phase 4 — cross-agent ask/respond. Distinct from chat.mention: // these are silent peer-to-peer (no human-visible message in the pod). CapAsk.definition, @@ -422,14 +420,6 @@ export async function handleToolCall( }); } - case CapMemoryRead.definition.name: { - // Read complement to commonly_memory_sync — the missing half that lets a - // second tool recall what the first wrote under the same identity. - return CapMemoryRead.handler(client, { - section: args.section as string | undefined, - }); - } - case CapAsk.definition.name: { return CapAsk.handler( client,