From b381eafd9f217a4412fb23ad44f46c4eb88ba94c Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Fri, 12 Jun 2026 17:01:58 +0200 Subject: [PATCH 1/2] =?UTF-8?q?chore(release):=20version=20packages=20?= =?UTF-8?q?=E2=80=94=20colonyq=200.8.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consumes all pending changesets (the token-reduction, open-coordination, awareness, and Windows-green work): colonyq 0.7.0 -> 0.8.0 plus internal package bumps and generated changelogs. --- .../account-claims-task-claim-account.md | 10 - .../auto-release-stale-plan-subtask-claims.md | 26 -- .changeset/batched-ingest-pipeline.md | 7 - .changeset/begin-immediate-claim-lifecycle.md | 17 - .../bridge-lifecycle-replay-and-dry-run.md | 5 - .changeset/bridge-replay-subcommand.md | 14 - .changeset/claude-code-verify.md | 11 - .changeset/cluster-observations-mcp-tool.md | 19 - .changeset/codex-gpu-embedding-provider.md | 18 - .../colony-context-aware-non-forcing.md | 14 - ...colony-demo-command-and-policy-examples.md | 5 - .changeset/colony-gain-rtk-summary-view.md | 19 - .changeset/colony-grab-bridge.md | 24 -- .changeset/colony-skills-cue-wiring.md | 13 - .../explain-task-claim-file-rejection.md | 39 --- .changeset/fix-health-readiness-scoring.md | 9 - .changeset/fix-silent-catch-blocks.md | 8 - .../fix-worktree-contention-realpath.md | 13 - .changeset/gain-drift-detector.md | 21 -- .changeset/grumpy-bottles-sneeze.md | 16 - .changeset/health-coach-mode.md | 20 -- .../icm-feedback-record-search-stats.md | 30 -- .changeset/icm-importance-decay.md | 34 -- .changeset/icm-memoirs.md | 31 -- .changeset/lean-tool-profile.md | 6 - .changeset/listplans-default-limit-10.md | 7 - ...-error-fallback-and-sqlite-busy-timeout.md | 8 - .changeset/mcp-token-bloat-and-claim-ux.md | 11 - .../metrics-wrapper-session-fallback.md | 26 -- .../move-task-foraging-report-to-foraging.md | 15 - .changeset/native-windows-support.md | 19 - .changeset/open-coordination-mode.md | 6 - .changeset/post-tool-use-awareness-push.md | 6 - .changeset/preface-budget-expand-default.md | 8 - .changeset/protected-branch-claim-rejected.md | 12 - .changeset/reindex-batched-embed-loop.md | 9 - .changeset/run-attempts-schema.md | 5 - ...rage-at-rest-detach-from-savings-report.md | 9 - .changeset/scope-listtasks-pretooluse.md | 10 - .changeset/semantic-search-mcp-tool.md | 24 -- .changeset/spotty-spoons-pick.md | 10 - .changeset/startup-panel-one-call.md | 5 - ...storage-busy-timeout-and-retry-headroom.md | 13 - .changeset/surface-unpublished-disk-plans.md | 13 - .changeset/task-claim-file-repo-root-hint.md | 22 -- .changeset/task-list-compact-default.md | 9 - .changeset/tool-description-diet.md | 5 - .changeset/trim-sessionstart-preface-bloat.md | 13 - .changeset/unified-liveness-working-notes.md | 7 - .changeset/vast-impalas-draw.md | 13 - .changeset/windows-plan-paths-archive.md | 5 - .changeset/worktree-contention-realpath.md | 10 - apps/cli/CHANGELOG.md | 194 +++++++++++ apps/cli/package.json | 10 +- apps/mcp-server/CHANGELOG.md | 325 ++++++++++++++++++ apps/mcp-server/package.json | 2 +- apps/worker/CHANGELOG.md | 46 +++ apps/worker/package.json | 2 +- .../.openspec.yaml | 2 + .../notes.md | 16 + packages/config/CHANGELOG.md | 43 +++ packages/config/package.json | 6 +- packages/core/CHANGELOG.md | 293 ++++++++++++++++ packages/core/package.json | 6 +- packages/embedding/CHANGELOG.md | 31 ++ packages/embedding/package.json | 6 +- packages/foraging/CHANGELOG.md | 44 +++ packages/foraging/package.json | 6 +- packages/hooks/CHANGELOG.md | 65 ++++ packages/hooks/package.json | 6 +- packages/installers/CHANGELOG.md | 23 ++ packages/installers/package.json | 6 +- packages/queen/CHANGELOG.md | 36 ++ packages/queen/package.json | 6 +- packages/spec/CHANGELOG.md | 35 ++ packages/spec/package.json | 2 +- packages/storage/CHANGELOG.md | 235 +++++++++++++ packages/storage/package.json | 6 +- 78 files changed, 1431 insertions(+), 750 deletions(-) delete mode 100644 .changeset/account-claims-task-claim-account.md delete mode 100644 .changeset/auto-release-stale-plan-subtask-claims.md delete mode 100644 .changeset/batched-ingest-pipeline.md delete mode 100644 .changeset/begin-immediate-claim-lifecycle.md delete mode 100644 .changeset/bridge-lifecycle-replay-and-dry-run.md delete mode 100644 .changeset/bridge-replay-subcommand.md delete mode 100644 .changeset/claude-code-verify.md delete mode 100644 .changeset/cluster-observations-mcp-tool.md delete mode 100644 .changeset/codex-gpu-embedding-provider.md delete mode 100644 .changeset/colony-context-aware-non-forcing.md delete mode 100644 .changeset/colony-demo-command-and-policy-examples.md delete mode 100644 .changeset/colony-gain-rtk-summary-view.md delete mode 100644 .changeset/colony-grab-bridge.md delete mode 100644 .changeset/colony-skills-cue-wiring.md delete mode 100644 .changeset/explain-task-claim-file-rejection.md delete mode 100644 .changeset/fix-health-readiness-scoring.md delete mode 100644 .changeset/fix-silent-catch-blocks.md delete mode 100644 .changeset/fix-worktree-contention-realpath.md delete mode 100644 .changeset/gain-drift-detector.md delete mode 100644 .changeset/grumpy-bottles-sneeze.md delete mode 100644 .changeset/health-coach-mode.md delete mode 100644 .changeset/icm-feedback-record-search-stats.md delete mode 100644 .changeset/icm-importance-decay.md delete mode 100644 .changeset/icm-memoirs.md delete mode 100644 .changeset/lean-tool-profile.md delete mode 100644 .changeset/listplans-default-limit-10.md delete mode 100644 .changeset/mcp-error-fallback-and-sqlite-busy-timeout.md delete mode 100644 .changeset/mcp-token-bloat-and-claim-ux.md delete mode 100644 .changeset/metrics-wrapper-session-fallback.md delete mode 100644 .changeset/move-task-foraging-report-to-foraging.md delete mode 100644 .changeset/native-windows-support.md delete mode 100644 .changeset/open-coordination-mode.md delete mode 100644 .changeset/post-tool-use-awareness-push.md delete mode 100644 .changeset/preface-budget-expand-default.md delete mode 100644 .changeset/protected-branch-claim-rejected.md delete mode 100644 .changeset/reindex-batched-embed-loop.md delete mode 100644 .changeset/run-attempts-schema.md delete mode 100644 .changeset/savings-storage-at-rest-detach-from-savings-report.md delete mode 100644 .changeset/scope-listtasks-pretooluse.md delete mode 100644 .changeset/semantic-search-mcp-tool.md delete mode 100644 .changeset/spotty-spoons-pick.md delete mode 100644 .changeset/startup-panel-one-call.md delete mode 100644 .changeset/storage-busy-timeout-and-retry-headroom.md delete mode 100644 .changeset/surface-unpublished-disk-plans.md delete mode 100644 .changeset/task-claim-file-repo-root-hint.md delete mode 100644 .changeset/task-list-compact-default.md delete mode 100644 .changeset/tool-description-diet.md delete mode 100644 .changeset/trim-sessionstart-preface-bloat.md delete mode 100644 .changeset/unified-liveness-working-notes.md delete mode 100644 .changeset/vast-impalas-draw.md delete mode 100644 .changeset/windows-plan-paths-archive.md delete mode 100644 .changeset/worktree-contention-realpath.md create mode 100644 openspec/changes/agent-claude-version-packages-release-cut-v0-8-0-2026-06-12-16-59/.openspec.yaml create mode 100644 openspec/changes/agent-claude-version-packages-release-cut-v0-8-0-2026-06-12-16-59/notes.md diff --git a/.changeset/account-claims-task-claim-account.md b/.changeset/account-claims-task-claim-account.md deleted file mode 100644 index 5c0fe458..00000000 --- a/.changeset/account-claims-task-claim-account.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -'@colony/storage': minor -'@colony/mcp-server': minor ---- - -Add `account_claims` table and three new MCP tools for binding Codex accounts to planner waves. - -`task_claim_account`, `task_release_account_claim`, and `task_list_account_claims` let the recodee planner Account Capacity rail bind a Codex account to a planner wave so multiple operators on the same plan see the same dispatch state. Bindings are keyed by `(plan_slug, wave_id)` — a planner-logical coordinate that exists before any Colony task is spawned — and persist across operators via a new `account_claims` SQLite table. A partial unique index enforces at-most-one-active claim per wave; released claims stay as audit history. - -Schema migrates forward-only from version 10 to 11. No data backfill is required: the table starts empty and is populated by user action. The contract is regression-tested via an MCP-inspector test (`apps/mcp-server/test/account-claims.test.ts`) exercising the full claim → rebind → release → list lifecycle. diff --git a/.changeset/auto-release-stale-plan-subtask-claims.md b/.changeset/auto-release-stale-plan-subtask-claims.md deleted file mode 100644 index eabfa064..00000000 --- a/.changeset/auto-release-stale-plan-subtask-claims.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -'@colony/core': minor -'@colony/mcp-server': minor ---- - -Auto-release stale plan-subtask claims in `task_ready_for_agent` - -A `claimed` subtask older than `STALE_PLAN_SUBTASK_CLAIM_MS` (default 1h) -is now released server-side at the top of `task_ready_for_agent` so the -ready-queue can route it to the next eligible worker on the same tick. -Replaces the prior `rescue_candidate` + `next_tool: rescue_stranded_scan` -suggestion path, which deadlocked policy-locked worker fleets (codex, -others) that refuse to rescue another agent's claim. - -New behavior: -- The response carries `auto_released_stale_claims[]` listing the - releases performed by this call (omitted when nothing was stale). -- Pass `auto_release_stale_claims: false` to suppress the sweep and - observe stale state without mutating it (admin / telemetry callers). -- A new `autoReleaseStalePlanSubtaskClaims(store, plans, options)` - export from `@colony/core` exposes the same sweep to other consumers - (e.g. an autopilot tick). - -Audit trail: each release writes one `plan-subtask-claim` observation -with `status: 'available'` and `rescue_reason: 'auto-released-stale-claim'`, -matching the shape `bulkRescueStrandedSessions` already uses. diff --git a/.changeset/batched-ingest-pipeline.md b/.changeset/batched-ingest-pipeline.md deleted file mode 100644 index d18b355f..00000000 --- a/.changeset/batched-ingest-pipeline.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -"@colony/worker": patch -"@colony/embedding": patch -"@colony/core": patch ---- - -Batch worker observation embedding calls and add batch-capable embedder providers. diff --git a/.changeset/begin-immediate-claim-lifecycle.md b/.changeset/begin-immediate-claim-lifecycle.md deleted file mode 100644 index 28dcc1f3..00000000 --- a/.changeset/begin-immediate-claim-lifecycle.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -'@colony/storage': patch -'@colony/core': patch ---- - -Fix read-then-write race in claim cleanup paths - -`releaseExpiredQuotaClaims` and `bulkRescueStrandedSessions` previously read -eligible claims outside their DEFERRED transaction, allowing two concurrent -callers to both snapshot the same rows and each emit a duplicate -`claim-weakened` or `rescue-stranded` audit observation. - -The fix moves the claim read inside a `BEGIN IMMEDIATE` transaction on both -paths so the write lock is acquired before any row is inspected. The storage -`transaction()` helper gains an `{ immediate: true }` option that maps to -better-sqlite3's `.immediate()` mode. A new idempotency test confirms that -calling each cleanup path twice produces exactly one audit observation. diff --git a/.changeset/bridge-lifecycle-replay-and-dry-run.md b/.changeset/bridge-lifecycle-replay-and-dry-run.md deleted file mode 100644 index 7651137a..00000000 --- a/.changeset/bridge-lifecycle-replay-and-dry-run.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"colonyq": minor ---- - -`colony bridge lifecycle` gains `--replay ` and `--dry-run` so a saved `colony-omx-lifecycle-v1` envelope (e.g. captured `.pre.json`) can be routed offline through the real lifecycle logic without touching the live data dir. Combined with `--json`, this gives runtime integrators a CI-shaped harness for asserting on `route`, `event_type`, `extracted_paths`, and `ok`. diff --git a/.changeset/bridge-replay-subcommand.md b/.changeset/bridge-replay-subcommand.md deleted file mode 100644 index d749c647..00000000 --- a/.changeset/bridge-replay-subcommand.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -'colonyq': minor ---- - -`colony bridge replay ` is now a first-class subcommand for -offline debugging of captured pre-tool-use envelopes. Default is `--dry-run` -(ephemeral in-memory SQLite, no side effects); pass `--apply` to write to -the live store. A new `--rewrite-root =` flag rewrites absolute -paths in the envelope before dispatch so captures from another machine can -be replayed locally. Reuses the existing -`packages/contracts/fixtures/colony-omx-lifecycle-v1/` fixtures and does not -require the worker daemon. The shell shim at `apps/cli/bin/colony.sh` -short-circuits only `bridge lifecycle` to the daemon, so `bridge replay` -runs in-process automatically. diff --git a/.changeset/claude-code-verify.md b/.changeset/claude-code-verify.md deleted file mode 100644 index 66366332..00000000 --- a/.changeset/claude-code-verify.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -"@colony/installers": minor ---- - -`colony install --ide claude-code --verify` now works. The claude-code installer -gained a `verify()` implementation (it previously threw "does not support ---verify" despite the flag being advertised): it validates ~/.claude/settings.json -for the colony MCP server, all six hooks, and any detected OMX-layer MCP servers, -reporting missing/stale hooks per the same contract codex uses. The generic -validation helpers (sameArgs, missingDetectedOmxServers, validationIssue) moved -to a shared `validation.ts` module so both installers reuse them. diff --git a/.changeset/cluster-observations-mcp-tool.md b/.changeset/cluster-observations-mcp-tool.md deleted file mode 100644 index c353a5c9..00000000 --- a/.changeset/cluster-observations-mcp-tool.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -"@colony/core": minor -"@colony/mcp-server": minor ---- - -Add `cluster_observations` MCP tool for semantic dedupe. - -Greedy single-linkage clustering by cosine threshold over a caller- -supplied set of observation IDs. The intended consumer is handoff / -attention_inbox dedupe: collect pending handoff IDs, pass them through -`cluster_observations(ids, threshold)`, and show the user one canonical -row per cluster instead of three different agents saying the same thing. - -The MCP tool wraps a new `MemoryStore.clusterObservations` primitive, -so callers that already use the core package can dedupe without going -through MCP. Threshold defaults to `0.85`; observations missing an -embedding come back in a separate `unembedded` array so the caller -chooses whether to keep them as singletons. Capped at 500 input IDs to -bound the O(N²) cosine cost. diff --git a/.changeset/codex-gpu-embedding-provider.md b/.changeset/codex-gpu-embedding-provider.md deleted file mode 100644 index c3f349d1..00000000 --- a/.changeset/codex-gpu-embedding-provider.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -"@colony/config": minor -"@colony/embedding": minor ---- - -Add `codex-gpu` embedding provider that targets the recodee -`codex-gpu-embedder` HTTP service (`POST /embed`). - -When `settings.embedding.provider = 'codex-gpu'`, the worker hits the -local GPU embedder over HTTP instead of running Transformers.js in -process. Endpoint defaults to `http://127.0.0.1:8100`; override via -`settings.embedding.endpoint`. Captures `dim` from a one-shot warm-up -probe at init time, matching every other provider's contract. - -The recodee dev box measures ~16 ms per single embed on the GPU vs ~200 -ms on local CPU, so the worker's embedding-backfill loop completes -roughly 14× faster when configured this way. Behavior unchanged for any -deployment that does not opt in via the new provider value. diff --git a/.changeset/colony-context-aware-non-forcing.md b/.changeset/colony-context-aware-non-forcing.md deleted file mode 100644 index cc0a85c0..00000000 --- a/.changeset/colony-context-aware-non-forcing.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -"@colony/hooks": minor -"@colony/config": minor ---- - -Colony is context-aware by default, never forcing. The PreToolUse hook no -longer hard-denies a tool call on a protected-branch claim conflict under the -default `warn` policy — it warns and records telemetry but lets the edit -proceed. Repos that want the old hard block opt in with -`bridge.policyMode = "block-on-conflict"`. The per-turn Claude Code -read-before-edit reminder is dropped (the harness already enforces it), and the -compact session-start contract is reframed from imperative "claim/hand off -before X" mandates to availability framing ("coordination is available, pull it -when it helps"). No write-path persistence changes. diff --git a/.changeset/colony-demo-command-and-policy-examples.md b/.changeset/colony-demo-command-and-policy-examples.md deleted file mode 100644 index 98947515..00000000 --- a/.changeset/colony-demo-command-and-policy-examples.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -'colonyq': patch ---- - -Add `colony demo`: a 60-second guided walkthrough of file-claim contention prevention. Two simulated agents (`claude-code` and `codex`) join the same task and try to claim `src/api.ts`; the second agent gets `blocked_active_owner`, then `claude-code` releases and `codex` retries successfully. The demo runs against an isolated temp data dir and cleans up on exit, with `--json` for a structured transcript and `--keep-data` for inspection. Also ship pre-baked `~/.colony/settings.json` fragments under `examples/policies/` for Next.js monorepos, Python packages, and Rust workspaces — each fragment lists stack-appropriate `privacy.excludePatterns` (build output, caches, `.env`) and `protected_files` (lockfiles, root config). README points to both surfaces from the install block. diff --git a/.changeset/colony-gain-rtk-summary-view.md b/.changeset/colony-gain-rtk-summary-view.md deleted file mode 100644 index 863962f6..00000000 --- a/.changeset/colony-gain-rtk-summary-view.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -'colonyq': minor -'@colony/storage': minor ---- - -`colony gain --summary` now renders an rtk-style compact view over the same -`mcp_metrics` receipts: headline KPI stack (total calls, input/output/total -tokens, tokens saved, total exec time), efficiency meter, top-N **By -Operation** table with proportional impact bars, a 30-day **Daily Activity** -bar graph, and a 12-day **Daily Breakdown** table. `--graph` and `--daily` -narrow the output to a single section; `--days ` and `--top-ops ` tune -the window and table size. Per-operation saved-token credit is distributed -across each comparison row's `matched_operations` proportionally to call share -so the `Saved` column lines up with the headline total. - -Storage gains `Storage.aggregateMcpMetricsDaily({ since, until, operation })` -returning per-UTC-day rollups (`{ day, calls, input_tokens, output_tokens, -total_tokens, total_duration_ms }`) ordered newest-first. Type exports -`AggregateMcpMetricsDailyOptions` and `McpMetricsDailyRow` come along. diff --git a/.changeset/colony-grab-bridge.md b/.changeset/colony-grab-bridge.md deleted file mode 100644 index 8dd0905f..00000000 --- a/.changeset/colony-grab-bridge.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -'colonyq': minor ---- - -Add `colony grab` command group: a per-project localhost intake daemon that -turns a react-grab "Add context" submit into a colony task on a fresh -`agent/*` worktree and starts a detached `tmux` session running `codex` -inside it. - -- `colony grab serve` — long-lived HTTP daemon on 127.0.0.1 with strict - request gating (bearer token, `Origin` allowlist, JSON content-type, - CORS preflight). On accepted `POST /grab`, creates a colony task, - posts the react-grab payload as a `kind: "note"` observation, writes - `.colony/INTAKE.md` into the worktree, and spawns `tmux new-session -d` - running `codex` in the worktree. -- `colony grab attach ` — convenience attach to the spawned - tmux session `rg-`. -- `colony grab status` — list grab daemons known to `$COLONY_HOME`. - -In-memory dedup (default 5 min window) keyed by -`sha256(repo_root|file_path|content|extra_prompt)` collapses repeat -submits into `task_post` notes on the existing task. - -The daemon is off by default; it must be started explicitly. diff --git a/.changeset/colony-skills-cue-wiring.md b/.changeset/colony-skills-cue-wiring.md deleted file mode 100644 index f84919cf..00000000 --- a/.changeset/colony-skills-cue-wiring.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -"colonyq": minor ---- - -`colony install` now auto-wires the Colony skill into the active cue profile so -the agent discovers Colony as a pullable capability (loads on a real trigger) -instead of relying on forced session prefaces. New `colony skills wire` / -`colony skills unwire` subcommands shell out to cue's own `cue skills -add-to-profile` / `remove-from-profile`, targeting the cue-resident -`colony/colony` skill. Wiring is best-effort: a missing cue is a soft no-op that -prints the manual `npx skills add` fallback (now with the `recodee` typo fixed), -never a failed install. Opt out with `colony install --no-skills` or -`COLONY_SKILL_WIRE=0`; uninstall removes the skill symmetrically. diff --git a/.changeset/explain-task-claim-file-rejection.md b/.changeset/explain-task-claim-file-rejection.md deleted file mode 100644 index 5540cacd..00000000 --- a/.changeset/explain-task-claim-file-rejection.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -'@colony/storage': minor -'@colony/core': patch -'@colony/mcp-server': patch ---- - -Explain `task_claim_file` rejections instead of returning a generic "not claimable" - -`task_claim_file` (and the `TaskThread.claimFile` / -`normalizeOptionalClaimPath` paths inside `@colony/core`) used to throw -`INVALID_CLAIM_PATH: claim path is not claimable` with no hint at the -reason. Telemetry showed agents bouncing off the same surface for the same -input — e.g. `colony/packages/core/test` (a directory) — because the -message gave them nothing to act on. - -The rejection branch now classifies the failure and renders a specific -message per reason: - -- `directory` — *"claim path "X" is a directory; claim individual files inside it instead."* -- `pseudo` — *"claim path "X" is a pseudo path (e.g. /dev/null) and cannot be claimed."* -- `outside_repo` — *"claim path "X" resolves outside this task's repo_root and cannot be claimed."* -- `empty` — *"claim path is empty."* -- fallback — the legacy generic message, still keyed on the input path. - -New exports from `@colony/storage`: - -- `classifyClaimPathRejection(context)` — pure classifier paralleling - `normalizeRepoFilePath`. Returns the reason or `null`. -- `claimPathRejectionMessage(reason, file_path)` — single source of - truth for the user-facing message so the MCP `task_claim_file` - handler and `TaskThread.claimFile` stay in sync. -- New storage method `classifyTaskFilePathRejection(task_id, file_path, - cwd?)` plumbs the task → repo_root lookup that the existing - `normalizeTaskFilePath` already does, so callers only pay for the - classifier on the error branch. - -No behavior change: the same inputs that used to be rejected are still -rejected; only the error message and code surface improve. Existing -INVALID_CLAIM_PATH error code is preserved. diff --git a/.changeset/fix-health-readiness-scoring.md b/.changeset/fix-health-readiness-scoring.md deleted file mode 100644 index f8581e02..00000000 --- a/.changeset/fix-health-readiness-scoring.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -"colonyq": patch -"@colony/storage": patch ---- - -Fix two `colony health` scoring bugs that surfaced as "bad" readiness areas with no real defect: - -- **`colony_mcp_share.mcp_tool_calls = 0` despite live MCP traffic.** The counter only read `tool_calls` rows, missing MCP traffic when the calling agent's PostToolUse hook didn't fire for `mcp__*` tools. The counter now takes the max of that observed count and `mcp_metrics` row count (colony MCP server's own per-call receipt), with the source surfaced in `source_breakdown.colony_mcp_metrics`. New storage helper `countMcpMetricsSince(since, until?)`. -- **`claim_before_edit_ratio = null` when any edits lacked file_path metadata.** Forcing the ratio to null whenever `edit_tool_calls !== edits_with_file_path` turned a real 200/363 = 55% signal into a bare `n/a` headline. The ratio is now computed over measurable edits whenever `edits_with_file_path > 0`; the `status` field still communicates partial measurability for downstream consumers. diff --git a/.changeset/fix-silent-catch-blocks.md b/.changeset/fix-silent-catch-blocks.md deleted file mode 100644 index b6855efe..00000000 --- a/.changeset/fix-silent-catch-blocks.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -"@colony/hooks": patch -"@colony/foraging": patch ---- - -Surface silent `catch {}` failures to stderr (rule #9). - -Every empty catch in `session-start`, `scanner`, and the foraging MCP tool now either logs a `[colony] : ` line to stderr or carries a one-line comment explaining why silence is intentional (fs-stat races, missing-directory guards, best-effort cleanup). Previously a whole session's 43/43 MCP call failures could vanish with no trace. diff --git a/.changeset/fix-worktree-contention-realpath.md b/.changeset/fix-worktree-contention-realpath.md deleted file mode 100644 index 0c620a60..00000000 --- a/.changeset/fix-worktree-contention-realpath.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -"@colony/core": patch ---- - -Fix worktree-contention detection on macOS and Windows. `isGitWorktree` compared -`git rev-parse --show-toplevel` (a fully realpath'd path) against a merely -`resolve()`'d scan path. `resolve()` does not collapse symlinks (macOS -`/var` -> `/private/var`), 8.3 short names, or case (Windows), so every managed -worktree was dropped and `worktree_count` came back 0 — failing -`readWorktreeContentionReport` on the macOS/Windows CI matrix. Both sides are now -canonicalised with `realpathSync.native` (falling back to `resolve` when the path -can't be stat'd). Added a regression test that reaches the repo through an -absolute symlink, reproducing the condition on any platform. diff --git a/.changeset/gain-drift-detector.md b/.changeset/gain-drift-detector.md deleted file mode 100644 index 9bcdda8c..00000000 --- a/.changeset/gain-drift-detector.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -'colonyq': minor -'@colony/storage': minor -'@colony/core': minor -'@colony/mcp-server': minor ---- - -`colony gain drift` and a matching `savings_drift_report` MCP tool flag -tools whose median tokens-per-call has drifted up or down. Default windows -are non-overlapping: recent = last 3 days, baseline = 14 days ending 3 days -before recent. Default thresholds: `--threshold 1.25` (up), `--down-threshold -0.75`, `--min-calls 20` per window. Classifications: `up_drift`, -`down_drift`, `new_tool` (no baseline), `gone` (no recent), `insufficient_data`, -`stable`. - -Storage gains `Storage.mcpTokenDriftPerOperation()` which computes per-operation -medians with a `ROW_NUMBER() OVER (PARTITION BY operation ORDER BY tpc)` -window function — chosen over the correlated `LIMIT 1 OFFSET (COUNT-1)/2` -form because SQLite forbids outer aggregate references in scalar-subquery -`OFFSET`. A `mcpMetricsMinTs()` helper surfaces a one-line warning when the -baseline window starts before the first recorded metric. diff --git a/.changeset/grumpy-bottles-sneeze.md b/.changeset/grumpy-bottles-sneeze.md deleted file mode 100644 index bf4570f1..00000000 --- a/.changeset/grumpy-bottles-sneeze.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -'@colony/storage': patch -'colonyq': patch ---- - -Fix `aggregateMcpMetrics` error_reasons grouping so per-row counts sum to -`error_count`. The grouping previously partitioned by `(operation, error_code, -error_message)`, but several handlers embed unique session IDs in their error -messages (e.g. `sub-task is claimed by codex-session-XYZ`), so each race loss -produced a distinct group. Combined with a 3-row truncation per operation, the -result was that nearly all errors were hidden — `task_plan_claim_subtask` would -report 7 errors in the Top error reasons table while the Operations table showed -93 for the same row. Grouping now drops `error_message` from the key (SQLite -picks the row with the latest `ts` for the sample message via its bare-column- -with-MAX optimization) and the per-operation cap is bumped from 3 to 8 since -codes are low-cardinality. Sum-of-reasons now matches error_count exactly. diff --git a/.changeset/health-coach-mode.md b/.changeset/health-coach-mode.md deleted file mode 100644 index d52487f3..00000000 --- a/.changeset/health-coach-mode.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -'colonyq': minor -'@colony/storage': minor ---- - -`colony health --coach` walks a repo through first-week setup. It detects -adoption stage (`fresh` / `installed_no_signal` / `early` / `mid_adoption`) -from cheap signals (`countObservations`, installed-IDE flags, -`firstObservationTs`, `Math.max(toolCallsSince, countMcpMetricsSince)`), -then surfaces the NEXT incomplete step from a fixed 7-step ladder: -`install_runtime` → `first_task_post` → `first_task_claim_file` → -`first_task_hand_off` → `first_plan_claim` → `first_quota_release` → -`first_gain_review`. Each step carries an exact `cmd:` and `tool:` string. - -Progress is persisted in a new `coach_progress` SQLite table (migration -`014-coach-progress.ts`, schema_version 13 → 14). Step completion is -event-observed via `mcp_metrics` / `observations`, never user-clicked. -`colony gain` records a `coach_gain_review` observation so step 7 can -self-detect. `--coach` is mutually exclusive with `--fix-plan` and respects -`--json`. diff --git a/.changeset/icm-feedback-record-search-stats.md b/.changeset/icm-feedback-record-search-stats.md deleted file mode 100644 index fd91347f..00000000 --- a/.changeset/icm-feedback-record-search-stats.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -"@colony/storage": minor -"@colony/core": minor -"@colony/mcp-server": minor ---- - -ICM slice 2 — feedback `record`, `search`, and `stats` MCP tools. - -Adds a new `feedback` lane that records "AI predicted X, real answer -was Y" corrections so a future agent can search prior mistakes by -topic before repeating them. Migration 015 introduces the `feedback` -table plus a porter-unicode61 `feedback_fts` virtual table mirrored -by the standard `ai/ad/au` triggers; importance is a four-level enum -defaulting to `medium`. `prediction`, `correction`, and the optional -`context` flow through `MemoryStore.recordFeedback`, which routes each -body through `prepareMemoryText` — the same redact-then-compress path -observations use — so the compression invariant holds at the write -boundary. - -MCP surface (progressive disclosure): - -- `feedback_record({ topic, prediction, correction, context?, importance?, created_by? })` → `{ id }` -- `feedback_search({ query, topic?, limit? })` → compact hits (`id`, `topic`, `importance`, `score`, `snippet`, `created_at`) -- `feedback_stats({ topic? })` → per-topic counts and `last_created_at` - -Follow-up (separate PR): a pre-tool-use hook that surfaces prior -corrections on inbound prompts. This PR keeps the slice scoped to the -storage + search surface so it can ship behind a manual query first. - -Reference: `docs/icm-integration-plan.md` slice 2. diff --git a/.changeset/icm-importance-decay.md b/.changeset/icm-importance-decay.md deleted file mode 100644 index 6320d158..00000000 --- a/.changeset/icm-importance-decay.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -"@colony/storage": minor -"@colony/core": minor -"@colony/mcp-server": minor -"colonyq": minor ---- - -ICM slice 3 — observation importance + temporal decay. - -Every observation now carries an `importance` tier -(`critical | high | medium | low`, default `medium`), a rolling -`access_count`, a `last_accessed_at` timestamp, and a `weight` value. -Critical/high pin their weight to the base value and never decay; -medium/low decay as `baseWeight / (1 + access_count * 0.1)` whenever -they are read. Read paths (`MemoryStore.search`, `getObservations`, -`semanticSearch`) coalesce ids into a debounced 50ms batch and flush -the access bookkeeping in one transactional UPDATE, so heavy read -loops trade at most one extra write per ~50ms window. - -Search and `get_observations` MCP responses now include `importance` -and `weight` on each row (additive — older callers ignore them). -`task_post` accepts an optional `importance` parameter forwarded to -the underlying observation insert. - -New CLI subcommand `colony memory prune` deletes near-zero-weight -medium/low rows; `--min-weight ` overrides the default 0.1 -threshold and `--dry-run` reports the candidate count without -deleting. Critical/high are never affected. - -Storage: schema bumped to version 17 with four additive columns on -`observations` and two new indexes. `Storage.recordAccess`, -`Storage.pruneLowDecay`, and `Storage.countLowDecayCandidates` are -the public primitives. (Originally targeted version 15 in isolation; -landed at 17 alongside slice 1 memoirs and slice 2 feedback.) diff --git a/.changeset/icm-memoirs.md b/.changeset/icm-memoirs.md deleted file mode 100644 index 9cad09d1..00000000 --- a/.changeset/icm-memoirs.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -"@colony/storage": minor -"@colony/core": minor -"@colony/mcp-server": minor ---- - -ICM slice 1 — memoirs (typed knowledge graphs). - -A memoir is a named container of typed concepts (graph nodes) connected -by typed relations (graph edges). Concept content routes through the -same `prepareMemoryText` redact → compress pipeline used for -observations, so the compression invariant holds at the write boundary. -The nine relation types -(`part_of`, `depends_on`, `related_to`, `contradicts`, `refines`, -`alternative_to`, `caused_by`, `instance_of`, `superseded_by`) mirror -ICM's taxonomy and let agents express "what supersedes X", "what -contradicts decision Y", and similar structural reasoning without -abandoning the flat-observations primary store. - -Schema bump 14 → 15 adds three tables (`memoirs`, -`memoir_concepts`, `memoir_relations`) and one virtual table -(`memoir_concepts_fts`) with `(ai, ad, au)` triggers mirroring -`observations_fts`. Migrations are forward-only. - -`MemoryStore` exposes `createMemoir`, `listMemoirs`, `addConcept`, -`refineConcept`, `linkConcepts`, `searchConcepts`, and `inspectConcept`. -Seven MCP tools (`memoir_create`, `memoir_list`, `memoir_add_concept`, -`memoir_refine`, `memoir_link`, `memoir_search`, `memoir_inspect`) wrap -them with progressive disclosure — `memoir_search` returns compact hits -and `memoir_inspect` is the only path that returns full bodies plus a -BFS neighbourhood. diff --git a/.changeset/lean-tool-profile.md b/.changeset/lean-tool-profile.md deleted file mode 100644 index 860b1c94..00000000 --- a/.changeset/lean-tool-profile.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -'@colony/mcp-server': minor -'@colony/config': minor ---- - -MCP tool profiles: the stdio server now defaults to a lean ~20-tool surface (memory + coordination primitives), cutting per-session schema-injection context cost by more than half. Set `COLONY_TOOL_PROFILE=full` or `settings.mcp.toolProfile: 'full'` to restore the entire tool surface (plan, spec, foraging, memoir, proposal, savings, queen lanes). New `mcp.toolProfile` setting in `@colony/config`. diff --git a/.changeset/listplans-default-limit-10.md b/.changeset/listplans-default-limit-10.md deleted file mode 100644 index a8585b36..00000000 --- a/.changeset/listplans-default-limit-10.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -'@colony/core': patch ---- - -Drop the default `listPlans` limit from 50 → 10. - -Companion to the compact-default `task_plan_list` from #531. Even with compact responses, callers that don't pass `limit` were getting 50 plans by default. Lowering the implicit default trims the long tail for `task_plan_list` callers that omit `limit` — the explicit `.max(50)` ceiling on the MCP schema is preserved for callers that want more. Verified post-#531 that `colony gain` shows the predicted ~87% reduction on per-call token cost when the new binary is live; this change squeezes the remaining ~10-15% for non-compact-aware callers. diff --git a/.changeset/mcp-error-fallback-and-sqlite-busy-timeout.md b/.changeset/mcp-error-fallback-and-sqlite-busy-timeout.md deleted file mode 100644 index 32dbb879..00000000 --- a/.changeset/mcp-error-fallback-and-sqlite-busy-timeout.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -"colonyq": patch ---- - -Stop mislabelling generic MCP errors and reduce SQLite contention failures. - -- `mcpError` now codes non-`TaskThreadError` throws as `INTERNAL_ERROR` instead of `OBSERVATION_NOT_ON_TASK`, so validation failures and SQLite "database is locked" errors surface honestly in `mcp_metrics` and `colony gain`. -- Storage now sets `PRAGMA busy_timeout = 5000` on every connection (worker daemon, MCP server, CLI hooks all open separate handles to the same WAL DB), so concurrent writers wait the kernel out instead of throwing `SQLITE_BUSY` immediately. diff --git a/.changeset/mcp-token-bloat-and-claim-ux.md b/.changeset/mcp-token-bloat-and-claim-ux.md deleted file mode 100644 index 5f675df2..00000000 --- a/.changeset/mcp-token-bloat-and-claim-ux.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -'@colony/mcp-server': minor ---- - -Trim `task_plan_list` token bloat and add recovery hints to two error paths. - -`task_plan_list` now defaults to a compact rollup that omits `subtasks[].description` and `subtasks[].file_scope` — the two heavy fields driving ~60% of MCP wire token spend (12.9k avg per call in `colony gain`). The full legacy shape is preserved as opt-in via `detail: 'full'`. Internal `listPlans()` callers in `@colony/core` bypass MCP and are unaffected. - -`task_note_working` now returns `nearby_tasks` (ranked branch-and-repo > branch-only > repo-only) plus a recovery `hint` when `ACTIVE_TASK_NOT_FOUND` and the caller supplied a `repo_root` or `branch`. Fresh agent sessions that have not yet joined the active task on their branch can recover via `task_post(task_id=...)` or `task_accept_handoff` without re-listing the task table. - -`task_plan_claim_subtask` now attaches `next_available_subtask_index`, `next_available_count`, and a compact `next_available[]` list to `PLAN_SUBTASK_NOT_AVAILABLE` errors so a racing claimer can retry immediately instead of issuing a full `task_plan_list` round trip. diff --git a/.changeset/metrics-wrapper-session-fallback.md b/.changeset/metrics-wrapper-session-fallback.md deleted file mode 100644 index 97f2388e..00000000 --- a/.changeset/metrics-wrapper-session-fallback.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -'@colony/mcp-server': patch ---- - -Attribute `` session metrics via the MCP client-identity fallback - -`colony gain` previously bucketed ~9,000 calls/day into a single -`` session row. Cause: high-volume read-only tools -(`task_plan_list`, `get_observations`, `search`, `task_timeline`, -`list_sessions`, `examples_list`, …) carry no `session_id` in their -schema, so `metricContextOf` had nothing to attribute the call to. - -The metrics wrapper now reuses the same `detectMcpClientIdentity` heuristic -the heartbeat wrapper already runs on every call: env-derived identity -(`CODEX_SESSION_ID`, `CLAUDECODE_SESSION_ID`, `COLONY_CLIENT_SESSION_ID`), -or a stable `mcp-` fallback when no signal is available. The -explicit `args.session_id` / `args.current_session_id` path is unchanged; -the fallback only fires when both are absent. - -Effect on the savings report: per-client session rows replace the giant -`` bucket, making it possible to see which agent / connection is -driving the bulk of the load — the regression-investigation gap that -PR #531's compact-mode fix left open. - -No new tool dependencies; the wrapper reads `detectMcpClientIdentity` from -`./heartbeat.js` (already in the same package). diff --git a/.changeset/move-task-foraging-report-to-foraging.md b/.changeset/move-task-foraging-report-to-foraging.md deleted file mode 100644 index fda88d40..00000000 --- a/.changeset/move-task-foraging-report-to-foraging.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -'@colony/mcp-server': patch ---- - -Move `task_foraging_report` source location from `attention.ts` to `foraging.ts` - -The tool is part of the foraging surface (it wraps `ProposalSystem.foragingReport` -and is conceptually paired with `examples_list` / `examples_query`), not the -attention-inbox surface. It only lived in `attention.ts` as an accident of the -pre-split monolithic `server.ts`. - -Pure code-location refactor. The MCP tool name, description, input schema, and -handler body are byte-identical. `server.ts` registers it at the same call-site -slot via a new `registerTaskForagingReport` named export, so the `listTools` -ordering observed by inspectors stays unchanged. diff --git a/.changeset/native-windows-support.md b/.changeset/native-windows-support.md deleted file mode 100644 index 1d6f7bc2..00000000 --- a/.changeset/native-windows-support.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -'colonyq': patch ---- - -Native Windows support for the `colony` CLI. The bin entry was a POSIX shell -script (`bin/colony.sh`) that npm could not execute on Windows without WSL, -breaking every Windows install of the package. The shim is now a Node ES -module (`bin/colony.mjs`) using only `node:*` builtins, so npm's generated -`.cmd` / `.ps1` wrappers run it natively under cmd, PowerShell, and Git Bash. - -The daemon fast-path for `colony bridge lifecycle --json` is preserved — the -HTTP POST to `127.0.0.1:$COLONY_WORKER_PORT/api/bridge/lifecycle` now goes -through `node:http`, with a `node:net` connect probe (1s) before the request -(2s) so the fallback latency stays close to the curl-based version when the -daemon isn't running. Stdin is buffered and replayed on fallback, preserving -rule #10 (a dead daemon must never lose or block a write). - -CI now runs the build matrix on `ubuntu-latest`, `macos-latest`, and -`windows-latest` across Node 20 and 22 so this regression cannot recur. diff --git a/.changeset/open-coordination-mode.md b/.changeset/open-coordination-mode.md deleted file mode 100644 index 80ab8f5f..00000000 --- a/.changeset/open-coordination-mode.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -'@colony/mcp-server': minor -'@colony/config': minor ---- - -Open coordination mode (new default): role gates become advisory. `settings.coordinationMode: 'open' | 'guarded'` — under open, scouts can claim, any agent can propose (no cap), everyone sees all proposals, and contended `task_claim_file` calls succeed with loud contention info (`contention`, `contention_detail`, `warning`) instead of erroring; table ownership stays with the live owner. Queen-only approval, subtask completion ownership, evidence requirements, and protected-branch rejection stay hard in both modes. `task_plan_claim_subtask` gains `force: boolean` to override unmet deps with an audit note. Set `coordinationMode: 'guarded'` to restore strict behavior. diff --git a/.changeset/post-tool-use-awareness-push.md b/.changeset/post-tool-use-awareness-push.md deleted file mode 100644 index fb1dd4c4..00000000 --- a/.changeset/post-tool-use-awareness-push.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -'@colony/hooks': minor -'colonyq': patch ---- - -Push awareness: when an edit touches a file another live session holds, the PostToolUse hook now injects a one-line `[Colony] session X recently claimed …` note into the agent's context immediately (Claude Code `additionalContext`), instead of waiting for the next turn's preface. Debounced to once per 2 minutes per session via an `awareness-push` observation marker (hook processes are one-shot, so the marker doubles as audit trail). `autoClaimFromToolUse` now reports live-owner blocked takeovers in its `conflicts` result. diff --git a/.changeset/preface-budget-expand-default.md b/.changeset/preface-budget-expand-default.md deleted file mode 100644 index 840fe6c8..00000000 --- a/.changeset/preface-budget-expand-default.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -'@colony/hooks': minor -'@colony/mcp-server': minor -'@colony/config': minor -'@colony/core': patch ---- - -Session-start preface now enforces a global token budget (`sessionStart.prefaceTokenBudget`, default 800): low-priority sections (scope check, foraging, suggestions, prior sessions) drop first with a one-line trailer naming the trim. Prior-session summaries are capped at 300 chars each. MCP `get_observations` stops forcing expansion — it now honors `compression.expandForModel` (default false), so model-facing reads stay compressed unless `expand: true` is passed. `@colony/core` re-exports `countTokens`. diff --git a/.changeset/protected-branch-claim-rejected.md b/.changeset/protected-branch-claim-rejected.md deleted file mode 100644 index c215b402..00000000 --- a/.changeset/protected-branch-claim-rejected.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -'@colony/core': patch -'@colony/mcp-server': patch ---- - -Reject `task_claim_file` at the MCP layer when the task's branch is a protected base branch. - -`guardedClaimFile` already returned `protected_branch_rejected` (controlled by the `rejectProtectedBranchClaims` setting, default `true`) but the MCP handler silently fell through and recorded the claim anyway. The handler now checks for that status and returns a distinct `PROTECTED_BRANCH_CLAIM_REJECTED` error code with a message directing the agent to start a sandbox worktree first. - -`PROTECTED_BRANCH_CLAIM_REJECTED` is added to `TASK_THREAD_ERROR_CODES` in `@colony/core`. Two new integration tests cover the reject and allow cases. - -Note: the same `guardedClaimFile` call in `task_plan_claim_subtask` has the same gap; that is out of scope for this patch. diff --git a/.changeset/reindex-batched-embed-loop.md b/.changeset/reindex-batched-embed-loop.md deleted file mode 100644 index 79d74fda..00000000 --- a/.changeset/reindex-batched-embed-loop.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -"@colony/worker": patch -"@colony/embedding": patch -"@colony/core": patch -"@colony/storage": patch -"@colony/config": patch ---- - -Changed the embedding backfill loop to send one batch of texts to embedders that support `embedBatch`, default worker batches to 32 observations, and persist each batch in a single SQLite transaction. The codex-gpu provider now calls `/embed/batch`, while storage copies returned embedding buffers so vector reads do not alias SQLite row memory. diff --git a/.changeset/run-attempts-schema.md b/.changeset/run-attempts-schema.md deleted file mode 100644 index b47a38e9..00000000 --- a/.changeset/run-attempts-schema.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@colony/storage": minor ---- - -Add `task_run_attempts` table + repository helpers (Symphony §4.1.5 / §7.2 — run-attempt lifecycle). New exports: `createRunAttempt`, `getRunAttempt`, `listRunAttemptsByTask`, `updateRunAttemptStatus`, `recordRunAttemptEvent`, `finishRunAttempt`, `RunAttemptError`, plus types `TaskRunAttemptRow`, `NewTaskRunAttempt`, `TaskRunAttemptEventUpdate`, `TaskRunAttemptFinish`, `RunAttemptStatus`, `RunAttemptTerminalStatus` and constants `RUN_ATTEMPT_ACTIVE_STATUSES` / `RUN_ATTEMPT_TERMINAL_STATUSES`. Foundation for Symphony Wave 3 MCP tools (Agents 209/210/211). diff --git a/.changeset/savings-storage-at-rest-detach-from-savings-report.md b/.changeset/savings-storage-at-rest-detach-from-savings-report.md deleted file mode 100644 index a3ae8436..00000000 --- a/.changeset/savings-storage-at-rest-detach-from-savings-report.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -'@colony/core': patch ---- - -Stop attributing the storage-at-rest compression claim to live `savings_report` calls - -The `Storage at rest (per observation)` reference row used to map to `['savings_report']`. Live `savings_report` output is structured JSON (~3.5k tokens per call) where the caveman compressor preserves technical tokens byte-for-byte, so the live comparison projected the row's 1k-token baseline against ~3.5k actual tokens and reported negative savings (e.g. `-155%`). - -The row stays in the static reference — caveman compression really does shrink prose observations on disk — but it is now a structural claim about the storage layer rather than a per-call cost, so `mcp_operations` is empty. `savings_report` calls now show up under `unmatched_operations` instead of inflating the row. diff --git a/.changeset/scope-listtasks-pretooluse.md b/.changeset/scope-listtasks-pretooluse.md deleted file mode 100644 index ebaa7d8f..00000000 --- a/.changeset/scope-listtasks-pretooluse.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -'@colony/storage': patch -'@colony/hooks': patch ---- - -Stop scanning the full task table on every PreToolUse tool call - -`protectedLiveClaimConflict` in the PreToolUse hook used `listTasks(1_000_000)` to find conflicting protected-branch claims and then linearly filtered the result by `repo_root` and `isProtectedBranch(branch)`. With the task table growing into the thousands across all agents, that scan dominated p95 latency on every editor tool call and violated the <150ms hook-handler budget. - -`@colony/storage` now exposes `listProtectedBranchTasksByRepo(repoRoot)`, a single index-backed query against the existing `UNIQUE(repo_root, branch)` constraint. The PreToolUse hook calls this in place of the unbounded scan; defensive `resolve()` and `isProtectedBranch()` checks remain inside the loop so storage path inconsistencies still get filtered out. No new migration is needed — the unique index already covers the new query shape. diff --git a/.changeset/semantic-search-mcp-tool.md b/.changeset/semantic-search-mcp-tool.md deleted file mode 100644 index 06567532..00000000 --- a/.changeset/semantic-search-mcp-tool.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -"@colony/core": minor -"@colony/mcp-server": minor ---- - -Add `semantic_search` MCP tool for pure-vector recall. - -The existing `search` tool starts with a BM25 candidate pool and then -vector-reranks it. Queries whose terms never appear in any stored -observation return zero candidates and miss the vector path entirely — -that hurts cross-language queries, concept-level queries, and any case -where the agent and the writer used different words for the same idea. - -`semantic_search` is the escape hatch: skip FTS entirely, embed the -query, score every stored observation vector by cosine, return top-K. -Same compact return shape as `search` (id + session_id + kind + snippet -+ score + ts + task_id) and the same progressive-disclosure rule -(callers fetch full bodies via `get_observations`). Requires an -embedding provider; returns a structured error otherwise. - -The implementation is intentionally O(N) over stored vectors. At 50k -observations on the dev box it still fits inside the 50 ms p95 budget -for `search`; a future ANN index (sqlite-vss / HNSW) goes behind the -same method signature when scale demands it. diff --git a/.changeset/spotty-spoons-pick.md b/.changeset/spotty-spoons-pick.md deleted file mode 100644 index 298b93d0..00000000 --- a/.changeset/spotty-spoons-pick.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -'colonyq': patch ---- - -Surface hot-loop dominance and drop double-"saved" labels in `colony gain`. -Top spend now reports the operation's share of total tokens, and a `Hot loop:` -callout fires when one operation owns ≥70% of token spend across ≥100 calls. -The "Saved:" / "USD saved:" labels are renamed to "Net:" / "Net USD:" so the -phrase no longer reads "Saved: X saved", and the live sessions header drops the -trailing `, -` when cost isn't configured. diff --git a/.changeset/startup-panel-one-call.md b/.changeset/startup-panel-one-call.md deleted file mode 100644 index 204e68f6..00000000 --- a/.changeset/startup-panel-one-call.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -'@colony/mcp-server': minor ---- - -One-call startup + registration-cost telemetry. `startup_panel` now carries `compact_hivemind` (lane map), `attention_summary` ({unread, blocking, pending_handoffs}), and `tool_profile` (lean/full, so agents know whether to restart with COLONY_TOOL_PROFILE=full for plan/spec/memoir tools) — AGENTS.md blesses it as THE startup call, with the legacy 4-call sweep deprecated but working. `savings_report` gains `registration_cost` ({profile, tool_count, name_description_tokens}) so the per-session schema-injection cost is observable. diff --git a/.changeset/storage-busy-timeout-and-retry-headroom.md b/.changeset/storage-busy-timeout-and-retry-headroom.md deleted file mode 100644 index 10b8246f..00000000 --- a/.changeset/storage-busy-timeout-and-retry-headroom.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -'colonyq': patch ---- - -Raise SQLite contention headroom in `@colony/storage` so the worker daemon, -MCP server, CLI hooks, and codex-fleet panes can share `~/.colony/data.db` -without surfacing `SQLITE_BUSY: database is locked` to callers. The -`Storage` constructor now sets `busy_timeout=15000` (was 5000), and -`withBusyRetry` defaults bump to 8 attempts with up-to-1s backoff (was 5 -attempts / 250ms cap). Happy-path callers are unaffected because no busy -error still means no retry sleep; sustained contention from ~30+ concurrent -writers — the codex-fleet shape that triggered this — now has ~3.85s of -combined SQLite + Node retry headroom before raising. diff --git a/.changeset/surface-unpublished-disk-plans.md b/.changeset/surface-unpublished-disk-plans.md deleted file mode 100644 index 1e5d6ccd..00000000 --- a/.changeset/surface-unpublished-disk-plans.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -'@colony/mcp-server': minor -'colonyq': minor -'@colony/core': patch ---- - -Surface unpublished on-disk plan workspaces in `task_plan_list`, and chain `plan create` into `plan publish` - -Two related improvements so orchestrators (and fleets of codex workers) don't waste cap on a plan they have a workspace for but never registered in Colony: - -- `task_plan_list` now scans `openspec/plans/*` and merges any disk workspace whose slug is not already registered, marked `registry_status: 'unpublished'`. Workers cannot claim from these, but seeing them lets the orchestrator notice and run `colony plan publish `. Pass `include_unpublished: false` to mirror the legacy registered-only behavior. -- `colony plan create` now accepts `--publish` (plus optional `--publish-session`, `--publish-agent`, `--publish-auto-archive`) which chains into the same publish path immediately after the workspace is created, eliminating the "I created a plan but workers don't see it" failure mode. -- `PlanInfo.registry_status` gains an `'unpublished'` variant. diff --git a/.changeset/task-claim-file-repo-root-hint.md b/.changeset/task-claim-file-repo-root-hint.md deleted file mode 100644 index c85dbc59..00000000 --- a/.changeset/task-claim-file-repo-root-hint.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -'@colony/storage': patch -'@colony/core': patch -'@colony/mcp-server': patch ---- - -`task_claim_file` now surfaces the task's `repo_root` in the -`INVALID_CLAIM_PATH` rejection message so agents see the exact anchor their -path failed to resolve against. The `outside_repo` and `unknown` branches of -`claimPathRejectionMessage(reason, file_path, { repo_root })` switch from a -terse "claim path is not claimable: …" to an actionable -"… resolves outside this task's repo_root \"\" …" / "… could not be -resolved relative to this task's repo_root \"\". Either retarget a task -whose repo_root matches the path being claimed, or pass a path that resolves -inside that anchor." So the agent can immediately tell whether to rewrite the -path or claim a different task. - -The MCP handler in `apps/mcp-server/src/tools/task.ts` and both -`TaskThread.claimFile` / `TaskThread.normalizeOptionalClaimPath` paths in -`packages/core/src/task-thread.ts` thread the task's repo_root through. -Backward compatible — the `context` arg is optional and existing callers see -the original messages. diff --git a/.changeset/task-list-compact-default.md b/.changeset/task-list-compact-default.md deleted file mode 100644 index 3e0a6172..00000000 --- a/.changeset/task-list-compact-default.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -'@colony/mcp-server': minor ---- - -`task_list` MCP tool now returns a compact rollup by default. - -`colony gain` showed `task_list` averaging 14.1k tokens per call — the second-largest line item after the recently-trimmed `task_plan_list`. Each row was emitting the full `TaskRow` shape (8 fields including long `repo_root` absolute paths and long `agent/*` branch names) for up to 50 tasks per call. - -Default response is now `tasks: [{ id, title, branch, status, updated_at }]`. The legacy shape including `repo_root`, `created_by`, and `created_at` is preserved as opt-in via `detail: "full"`. Internal callers using `store.storage.listTasks()` directly are unaffected. diff --git a/.changeset/tool-description-diet.md b/.changeset/tool-description-diet.md deleted file mode 100644 index 986b0416..00000000 --- a/.changeset/tool-description-diet.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -'@colony/mcp-server': patch ---- - -Tool description diet + registration token budget guard. The six bulkiest MCP tool descriptions (attention_inbox, task_ready_for_agent, task_relay, task_hand_off, cluster_observations, task_plan_list) are rewritten to keep their trigger phrases and drop tail prose. New `tool-budget.test.ts` fails CI if the lean surface exceeds 4,200 estimated tokens, the full surface exceeds 15,000, or any description exceeds 540 chars. Also de-flakes coordination-loop's claim-preview ordering assertion. diff --git a/.changeset/trim-sessionstart-preface-bloat.md b/.changeset/trim-sessionstart-preface-bloat.md deleted file mode 100644 index 91ee2357..00000000 --- a/.changeset/trim-sessionstart-preface-bloat.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -'@colony/config': minor -'@colony/hooks': minor ---- - -SessionStart hook now emits a compact one-line pointer to the quota-safe -operating contract by default instead of the full ~14-line verbose block, -saving ~350 tokens per SessionStart fire in every repo. New -`sessionStart.contractMode` setting (`'compact' | 'full' | 'none'`, -default `'compact'`) restores the legacy verbose preface (`'full'`) or -suppresses the contract entirely (`'none'`). AGENTS.md / CLAUDE.md keep -carrying the full protocol, so the compact pointer is enough for active -agents; one-time onboarding flows can opt into `'full'`. diff --git a/.changeset/unified-liveness-working-notes.md b/.changeset/unified-liveness-working-notes.md deleted file mode 100644 index b960f864..00000000 --- a/.changeset/unified-liveness-working-notes.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -'@colony/core': minor -'@colony/mcp-server': minor -'@colony/hooks': minor ---- - -Cross-agent awareness: unified liveness + working-note "now lines". `readHivemind` accepts an optional `sqliteLiveness` source — sessions with stale heartbeat files but fresh SQLite observations are reclassified as working (`liveness_source: 'sqlite'`); every hivemind session now carries `liveness_source`. `attention_inbox` gains `active_working_notes` (latest task_note_working note per other live session, 30-min window) plus `summary.active_working_note_count`. The SessionStart task preface shows up to 3 co-participant "now:" lines so agents start each session knowing what everyone else is mid-flight on. diff --git a/.changeset/vast-impalas-draw.md b/.changeset/vast-impalas-draw.md deleted file mode 100644 index b55f9895..00000000 --- a/.changeset/vast-impalas-draw.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -'colonyq': minor ---- - -Add a `Movers` section to `colony gain` that splits the queried window into a -trailing "recent" segment and a "prior" segment, then surfaces operations whose -per-hour call rate, token rate, or error count has shifted materially between -the two. Top 3 risers (▲), top 3 fallers (▼), and top 3 error risers (!) are -listed inline above the existing Operations table. New ops (no prior activity) -are tagged `(new)` and disappeared ops `(gone)`. Two new flags: `--recent-hours -` to override the split (default: `window / 7`) and `--no-movers` to -suppress the section. JSON output gains a `live.movers` payload with the same -shape as the rendered rows. diff --git a/.changeset/windows-plan-paths-archive.md b/.changeset/windows-plan-paths-archive.md deleted file mode 100644 index af55c405..00000000 --- a/.changeset/windows-plan-paths-archive.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -'@colony/spec': patch ---- - -Windows CI green: `publishPlan` returns forward-slash `spec_change_path` / `plan_workspace_path` (separator-stable for MCP callers on every OS), and `archiveChange` fails deterministically when the archive target already exists (POSIX rename threw; Windows MoveFileEx silently clobbered). diff --git a/.changeset/worktree-contention-realpath.md b/.changeset/worktree-contention-realpath.md deleted file mode 100644 index 2a09121f..00000000 --- a/.changeset/worktree-contention-realpath.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -"@colony/core": patch ---- - -Fix the worktree-contention report silently dropping worktrees whose path -crosses a symlink (e.g. macOS's `/var` -> `/private/var` tmpdir). `git -rev-parse --show-toplevel` returns a symlink-resolved path, but the detector -compared it with `resolve()`, which only normalizes — so the paths never -matched and the worktree was skipped. It now compares `realpathSync` --canonicalized paths on both sides. diff --git a/apps/cli/CHANGELOG.md b/apps/cli/CHANGELOG.md index 7809cb43..d58125dd 100644 --- a/apps/cli/CHANGELOG.md +++ b/apps/cli/CHANGELOG.md @@ -1,5 +1,199 @@ # @imdeadpool/colony-cli +## 0.8.0 + +### Minor Changes + +- 86a62d9: `colony bridge lifecycle` gains `--replay ` and `--dry-run` so a saved `colony-omx-lifecycle-v1` envelope (e.g. captured `.pre.json`) can be routed offline through the real lifecycle logic without touching the live data dir. Combined with `--json`, this gives runtime integrators a CI-shaped harness for asserting on `route`, `event_type`, `extracted_paths`, and `ok`. +- 99936fa: `colony bridge replay ` is now a first-class subcommand for + offline debugging of captured pre-tool-use envelopes. Default is `--dry-run` + (ephemeral in-memory SQLite, no side effects); pass `--apply` to write to + the live store. A new `--rewrite-root =` flag rewrites absolute + paths in the envelope before dispatch so captures from another machine can + be replayed locally. Reuses the existing + `packages/contracts/fixtures/colony-omx-lifecycle-v1/` fixtures and does not + require the worker daemon. The shell shim at `apps/cli/bin/colony.sh` + short-circuits only `bridge lifecycle` to the daemon, so `bridge replay` + runs in-process automatically. +- edc318f: `colony gain --summary` now renders an rtk-style compact view over the same + `mcp_metrics` receipts: headline KPI stack (total calls, input/output/total + tokens, tokens saved, total exec time), efficiency meter, top-N **By + Operation** table with proportional impact bars, a 30-day **Daily Activity** + bar graph, and a 12-day **Daily Breakdown** table. `--graph` and `--daily` + narrow the output to a single section; `--days ` and `--top-ops ` tune + the window and table size. Per-operation saved-token credit is distributed + across each comparison row's `matched_operations` proportionally to call share + so the `Saved` column lines up with the headline total. + + Storage gains `Storage.aggregateMcpMetricsDaily({ since, until, operation })` + returning per-UTC-day rollups (`{ day, calls, input_tokens, output_tokens, +total_tokens, total_duration_ms }`) ordered newest-first. Type exports + `AggregateMcpMetricsDailyOptions` and `McpMetricsDailyRow` come along. + +- 9f6b502: Add `colony grab` command group: a per-project localhost intake daemon that + turns a react-grab "Add context" submit into a colony task on a fresh + `agent/*` worktree and starts a detached `tmux` session running `codex` + inside it. + + - `colony grab serve` — long-lived HTTP daemon on 127.0.0.1 with strict + request gating (bearer token, `Origin` allowlist, JSON content-type, + CORS preflight). On accepted `POST /grab`, creates a colony task, + posts the react-grab payload as a `kind: "note"` observation, writes + `.colony/INTAKE.md` into the worktree, and spawns `tmux new-session -d` + running `codex` in the worktree. + - `colony grab attach ` — convenience attach to the spawned + tmux session `rg-`. + - `colony grab status` — list grab daemons known to `$COLONY_HOME`. + + In-memory dedup (default 5 min window) keyed by + `sha256(repo_root|file_path|content|extra_prompt)` collapses repeat + submits into `task_post` notes on the existing task. + + The daemon is off by default; it must be started explicitly. + +- 86a3d1a: `colony install` now auto-wires the Colony skill into the active cue profile so + the agent discovers Colony as a pullable capability (loads on a real trigger) + instead of relying on forced session prefaces. New `colony skills wire` / + `colony skills unwire` subcommands shell out to cue's own `cue skills +add-to-profile` / `remove-from-profile`, targeting the cue-resident + `colony/colony` skill. Wiring is best-effort: a missing cue is a soft no-op that + prints the manual `npx skills add` fallback (now with the `recodee` typo fixed), + never a failed install. Opt out with `colony install --no-skills` or + `COLONY_SKILL_WIRE=0`; uninstall removes the skill symmetrically. +- a83eeea: `colony gain drift` and a matching `savings_drift_report` MCP tool flag + tools whose median tokens-per-call has drifted up or down. Default windows + are non-overlapping: recent = last 3 days, baseline = 14 days ending 3 days + before recent. Default thresholds: `--threshold 1.25` (up), `--down-threshold +0.75`, `--min-calls 20` per window. Classifications: `up_drift`, + `down_drift`, `new_tool` (no baseline), `gone` (no recent), `insufficient_data`, + `stable`. + + Storage gains `Storage.mcpTokenDriftPerOperation()` which computes per-operation + medians with a `ROW_NUMBER() OVER (PARTITION BY operation ORDER BY tpc)` + window function — chosen over the correlated `LIMIT 1 OFFSET (COUNT-1)/2` + form because SQLite forbids outer aggregate references in scalar-subquery + `OFFSET`. A `mcpMetricsMinTs()` helper surfaces a one-line warning when the + baseline window starts before the first recorded metric. + +- 53836ff: `colony health --coach` walks a repo through first-week setup. It detects + adoption stage (`fresh` / `installed_no_signal` / `early` / `mid_adoption`) + from cheap signals (`countObservations`, installed-IDE flags, + `firstObservationTs`, `Math.max(toolCallsSince, countMcpMetricsSince)`), + then surfaces the NEXT incomplete step from a fixed 7-step ladder: + `install_runtime` → `first_task_post` → `first_task_claim_file` → + `first_task_hand_off` → `first_plan_claim` → `first_quota_release` → + `first_gain_review`. Each step carries an exact `cmd:` and `tool:` string. + + Progress is persisted in a new `coach_progress` SQLite table (migration + `014-coach-progress.ts`, schema_version 13 → 14). Step completion is + event-observed via `mcp_metrics` / `observations`, never user-clicked. + `colony gain` records a `coach_gain_review` observation so step 7 can + self-detect. `--coach` is mutually exclusive with `--fix-plan` and respects + `--json`. + +- 0950b42: ICM slice 3 — observation importance + temporal decay. + + Every observation now carries an `importance` tier + (`critical | high | medium | low`, default `medium`), a rolling + `access_count`, a `last_accessed_at` timestamp, and a `weight` value. + Critical/high pin their weight to the base value and never decay; + medium/low decay as `baseWeight / (1 + access_count * 0.1)` whenever + they are read. Read paths (`MemoryStore.search`, `getObservations`, + `semanticSearch`) coalesce ids into a debounced 50ms batch and flush + the access bookkeeping in one transactional UPDATE, so heavy read + loops trade at most one extra write per ~50ms window. + + Search and `get_observations` MCP responses now include `importance` + and `weight` on each row (additive — older callers ignore them). + `task_post` accepts an optional `importance` parameter forwarded to + the underlying observation insert. + + New CLI subcommand `colony memory prune` deletes near-zero-weight + medium/low rows; `--min-weight ` overrides the default 0.1 + threshold and `--dry-run` reports the candidate count without + deleting. Critical/high are never affected. + + Storage: schema bumped to version 17 with four additive columns on + `observations` and two new indexes. `Storage.recordAccess`, + `Storage.pruneLowDecay`, and `Storage.countLowDecayCandidates` are + the public primitives. (Originally targeted version 15 in isolation; + landed at 17 alongside slice 1 memoirs and slice 2 feedback.) + +- 66fa52c: Surface unpublished on-disk plan workspaces in `task_plan_list`, and chain `plan create` into `plan publish` + + Two related improvements so orchestrators (and fleets of codex workers) don't waste cap on a plan they have a workspace for but never registered in Colony: + + - `task_plan_list` now scans `openspec/plans/*` and merges any disk workspace whose slug is not already registered, marked `registry_status: 'unpublished'`. Workers cannot claim from these, but seeing them lets the orchestrator notice and run `colony plan publish `. Pass `include_unpublished: false` to mirror the legacy registered-only behavior. + - `colony plan create` now accepts `--publish` (plus optional `--publish-session`, `--publish-agent`, `--publish-auto-archive`) which chains into the same publish path immediately after the workspace is created, eliminating the "I created a plan but workers don't see it" failure mode. + - `PlanInfo.registry_status` gains an `'unpublished'` variant. + +- 61150c7: Add a `Movers` section to `colony gain` that splits the queried window into a + trailing "recent" segment and a "prior" segment, then surfaces operations whose + per-hour call rate, token rate, or error count has shifted materially between + the two. Top 3 risers (▲), top 3 fallers (▼), and top 3 error risers (!) are + listed inline above the existing Operations table. New ops (no prior activity) + are tagged `(new)` and disappeared ops `(gone)`. Two new flags: `--recent-hours +` to override the split (default: `window / 7`) and `--no-movers` to + suppress the section. JSON output gains a `live.movers` payload with the same + shape as the rendered rows. + +### Patch Changes + +- 55581ed: Add `colony demo`: a 60-second guided walkthrough of file-claim contention prevention. Two simulated agents (`claude-code` and `codex`) join the same task and try to claim `src/api.ts`; the second agent gets `blocked_active_owner`, then `claude-code` releases and `codex` retries successfully. The demo runs against an isolated temp data dir and cleans up on exit, with `--json` for a structured transcript and `--keep-data` for inspection. Also ship pre-baked `~/.colony/settings.json` fragments under `examples/policies/` for Next.js monorepos, Python packages, and Rust workspaces — each fragment lists stack-appropriate `privacy.excludePatterns` (build output, caches, `.env`) and `protected_files` (lockfiles, root config). README points to both surfaces from the install block. +- 8917c73: Fix two `colony health` scoring bugs that surfaced as "bad" readiness areas with no real defect: + + - **`colony_mcp_share.mcp_tool_calls = 0` despite live MCP traffic.** The counter only read `tool_calls` rows, missing MCP traffic when the calling agent's PostToolUse hook didn't fire for `mcp__*` tools. The counter now takes the max of that observed count and `mcp_metrics` row count (colony MCP server's own per-call receipt), with the source surfaced in `source_breakdown.colony_mcp_metrics`. New storage helper `countMcpMetricsSince(since, until?)`. + - **`claim_before_edit_ratio = null` when any edits lacked file_path metadata.** Forcing the ratio to null whenever `edit_tool_calls !== edits_with_file_path` turned a real 200/363 = 55% signal into a bare `n/a` headline. The ratio is now computed over measurable edits whenever `edits_with_file_path > 0`; the `status` field still communicates partial measurability for downstream consumers. + +- e52cd83: Fix `aggregateMcpMetrics` error_reasons grouping so per-row counts sum to + `error_count`. The grouping previously partitioned by `(operation, error_code, +error_message)`, but several handlers embed unique session IDs in their error + messages (e.g. `sub-task is claimed by codex-session-XYZ`), so each race loss + produced a distinct group. Combined with a 3-row truncation per operation, the + result was that nearly all errors were hidden — `task_plan_claim_subtask` would + report 7 errors in the Top error reasons table while the Operations table showed + 93 for the same row. Grouping now drops `error_message` from the key (SQLite + picks the row with the latest `ts` for the sample message via its bare-column- + with-MAX optimization) and the per-operation cap is bumped from 3 to 8 since + codes are low-cardinality. Sum-of-reasons now matches error_count exactly. +- 9376314: Stop mislabelling generic MCP errors and reduce SQLite contention failures. + + - `mcpError` now codes non-`TaskThreadError` throws as `INTERNAL_ERROR` instead of `OBSERVATION_NOT_ON_TASK`, so validation failures and SQLite "database is locked" errors surface honestly in `mcp_metrics` and `colony gain`. + - Storage now sets `PRAGMA busy_timeout = 5000` on every connection (worker daemon, MCP server, CLI hooks all open separate handles to the same WAL DB), so concurrent writers wait the kernel out instead of throwing `SQLITE_BUSY` immediately. + +- 5efdb52: Native Windows support for the `colony` CLI. The bin entry was a POSIX shell + script (`bin/colony.sh`) that npm could not execute on Windows without WSL, + breaking every Windows install of the package. The shim is now a Node ES + module (`bin/colony.mjs`) using only `node:*` builtins, so npm's generated + `.cmd` / `.ps1` wrappers run it natively under cmd, PowerShell, and Git Bash. + + The daemon fast-path for `colony bridge lifecycle --json` is preserved — the + HTTP POST to `127.0.0.1:$COLONY_WORKER_PORT/api/bridge/lifecycle` now goes + through `node:http`, with a `node:net` connect probe (1s) before the request + (2s) so the fallback latency stays close to the curl-based version when the + daemon isn't running. Stdin is buffered and replayed on fallback, preserving + rule #10 (a dead daemon must never lose or block a write). + + CI now runs the build matrix on `ubuntu-latest`, `macos-latest`, and + `windows-latest` across Node 20 and 22 so this regression cannot recur. + +- 1d78c99: Push awareness: when an edit touches a file another live session holds, the PostToolUse hook now injects a one-line `[Colony] session X recently claimed …` note into the agent's context immediately (Claude Code `additionalContext`), instead of waiting for the next turn's preface. Debounced to once per 2 minutes per session via an `awareness-push` observation marker (hook processes are one-shot, so the marker doubles as audit trail). `autoClaimFromToolUse` now reports live-owner blocked takeovers in its `conflicts` result. +- 08482b1: Surface hot-loop dominance and drop double-"saved" labels in `colony gain`. + Top spend now reports the operation's share of total tokens, and a `Hot loop:` + callout fires when one operation owns ≥70% of token spend across ≥100 calls. + The "Saved:" / "USD saved:" labels are renamed to "Net:" / "Net USD:" so the + phrase no longer reads "Saved: X saved", and the live sessions header drops the + trailing `, -` when cost isn't configured. +- 658b722: Raise SQLite contention headroom in `@colony/storage` so the worker daemon, + MCP server, CLI hooks, and codex-fleet panes can share `~/.colony/data.db` + without surfacing `SQLITE_BUSY: database is locked` to callers. The + `Storage` constructor now sets `busy_timeout=15000` (was 5000), and + `withBusyRetry` defaults bump to 8 attempts with up-to-1s backoff (was 5 + attempts / 250ms cap). Happy-path callers are unaffected because no busy + error still means no retry sleep; sustained contention from ~30+ concurrent + writers — the codex-fleet shape that triggered this — now has ~3.85s of + combined SQLite + Node retry headroom before raising. + ## 0.7.0 ### Minor Changes diff --git a/apps/cli/package.json b/apps/cli/package.json index 9a688f03..fa95c910 100644 --- a/apps/cli/package.json +++ b/apps/cli/package.json @@ -1,6 +1,6 @@ { "name": "colonyq", - "version": "0.7.0", + "version": "0.8.0", "license": "MIT", "description": "Local-first memory and coordination for Claude Code, Gemini CLI, OpenCode, Codex, and Cursor.", "keywords": [ @@ -32,7 +32,13 @@ "colony": "bin/colony.mjs" }, "main": "./dist/index.js", - "files": ["bin/colony.mjs", "dist", "hooks-scripts", "README.md", "LICENSE"], + "files": [ + "bin/colony.mjs", + "dist", + "hooks-scripts", + "README.md", + "LICENSE" + ], "scripts": { "build": "tsup", "dev": "tsup --watch --onSuccess \"node dist/index.js\"", diff --git a/apps/mcp-server/CHANGELOG.md b/apps/mcp-server/CHANGELOG.md index cff5af11..c2be1bbe 100644 --- a/apps/mcp-server/CHANGELOG.md +++ b/apps/mcp-server/CHANGELOG.md @@ -1,5 +1,330 @@ # @colony/mcp-server +## 0.8.0 + +### Minor Changes + +- 8f33724: Add `account_claims` table and three new MCP tools for binding Codex accounts to planner waves. + + `task_claim_account`, `task_release_account_claim`, and `task_list_account_claims` let the recodee planner Account Capacity rail bind a Codex account to a planner wave so multiple operators on the same plan see the same dispatch state. Bindings are keyed by `(plan_slug, wave_id)` — a planner-logical coordinate that exists before any Colony task is spawned — and persist across operators via a new `account_claims` SQLite table. A partial unique index enforces at-most-one-active claim per wave; released claims stay as audit history. + + Schema migrates forward-only from version 10 to 11. No data backfill is required: the table starts empty and is populated by user action. The contract is regression-tested via an MCP-inspector test (`apps/mcp-server/test/account-claims.test.ts`) exercising the full claim → rebind → release → list lifecycle. + +- 819660d: Auto-release stale plan-subtask claims in `task_ready_for_agent` + + A `claimed` subtask older than `STALE_PLAN_SUBTASK_CLAIM_MS` (default 1h) + is now released server-side at the top of `task_ready_for_agent` so the + ready-queue can route it to the next eligible worker on the same tick. + Replaces the prior `rescue_candidate` + `next_tool: rescue_stranded_scan` + suggestion path, which deadlocked policy-locked worker fleets (codex, + others) that refuse to rescue another agent's claim. + + New behavior: + + - The response carries `auto_released_stale_claims[]` listing the + releases performed by this call (omitted when nothing was stale). + - Pass `auto_release_stale_claims: false` to suppress the sweep and + observe stale state without mutating it (admin / telemetry callers). + - A new `autoReleaseStalePlanSubtaskClaims(store, plans, options)` + export from `@colony/core` exposes the same sweep to other consumers + (e.g. an autopilot tick). + + Audit trail: each release writes one `plan-subtask-claim` observation + with `status: 'available'` and `rescue_reason: 'auto-released-stale-claim'`, + matching the shape `bulkRescueStrandedSessions` already uses. + +- 782ddb6: Add `cluster_observations` MCP tool for semantic dedupe. + + Greedy single-linkage clustering by cosine threshold over a caller- + supplied set of observation IDs. The intended consumer is handoff / + attention_inbox dedupe: collect pending handoff IDs, pass them through + `cluster_observations(ids, threshold)`, and show the user one canonical + row per cluster instead of three different agents saying the same thing. + + The MCP tool wraps a new `MemoryStore.clusterObservations` primitive, + so callers that already use the core package can dedupe without going + through MCP. Threshold defaults to `0.85`; observations missing an + embedding come back in a separate `unembedded` array so the caller + chooses whether to keep them as singletons. Capped at 500 input IDs to + bound the O(N²) cosine cost. + +- a83eeea: `colony gain drift` and a matching `savings_drift_report` MCP tool flag + tools whose median tokens-per-call has drifted up or down. Default windows + are non-overlapping: recent = last 3 days, baseline = 14 days ending 3 days + before recent. Default thresholds: `--threshold 1.25` (up), `--down-threshold +0.75`, `--min-calls 20` per window. Classifications: `up_drift`, + `down_drift`, `new_tool` (no baseline), `gone` (no recent), `insufficient_data`, + `stable`. + + Storage gains `Storage.mcpTokenDriftPerOperation()` which computes per-operation + medians with a `ROW_NUMBER() OVER (PARTITION BY operation ORDER BY tpc)` + window function — chosen over the correlated `LIMIT 1 OFFSET (COUNT-1)/2` + form because SQLite forbids outer aggregate references in scalar-subquery + `OFFSET`. A `mcpMetricsMinTs()` helper surfaces a one-line warning when the + baseline window starts before the first recorded metric. + +- 7dcece2: ICM slice 2 — feedback `record`, `search`, and `stats` MCP tools. + + Adds a new `feedback` lane that records "AI predicted X, real answer + was Y" corrections so a future agent can search prior mistakes by + topic before repeating them. Migration 015 introduces the `feedback` + table plus a porter-unicode61 `feedback_fts` virtual table mirrored + by the standard `ai/ad/au` triggers; importance is a four-level enum + defaulting to `medium`. `prediction`, `correction`, and the optional + `context` flow through `MemoryStore.recordFeedback`, which routes each + body through `prepareMemoryText` — the same redact-then-compress path + observations use — so the compression invariant holds at the write + boundary. + + MCP surface (progressive disclosure): + + - `feedback_record({ topic, prediction, correction, context?, importance?, created_by? })` → `{ id }` + - `feedback_search({ query, topic?, limit? })` → compact hits (`id`, `topic`, `importance`, `score`, `snippet`, `created_at`) + - `feedback_stats({ topic? })` → per-topic counts and `last_created_at` + + Follow-up (separate PR): a pre-tool-use hook that surfaces prior + corrections on inbound prompts. This PR keeps the slice scoped to the + storage + search surface so it can ship behind a manual query first. + + Reference: `docs/icm-integration-plan.md` slice 2. + +- 0950b42: ICM slice 3 — observation importance + temporal decay. + + Every observation now carries an `importance` tier + (`critical | high | medium | low`, default `medium`), a rolling + `access_count`, a `last_accessed_at` timestamp, and a `weight` value. + Critical/high pin their weight to the base value and never decay; + medium/low decay as `baseWeight / (1 + access_count * 0.1)` whenever + they are read. Read paths (`MemoryStore.search`, `getObservations`, + `semanticSearch`) coalesce ids into a debounced 50ms batch and flush + the access bookkeeping in one transactional UPDATE, so heavy read + loops trade at most one extra write per ~50ms window. + + Search and `get_observations` MCP responses now include `importance` + and `weight` on each row (additive — older callers ignore them). + `task_post` accepts an optional `importance` parameter forwarded to + the underlying observation insert. + + New CLI subcommand `colony memory prune` deletes near-zero-weight + medium/low rows; `--min-weight ` overrides the default 0.1 + threshold and `--dry-run` reports the candidate count without + deleting. Critical/high are never affected. + + Storage: schema bumped to version 17 with four additive columns on + `observations` and two new indexes. `Storage.recordAccess`, + `Storage.pruneLowDecay`, and `Storage.countLowDecayCandidates` are + the public primitives. (Originally targeted version 15 in isolation; + landed at 17 alongside slice 1 memoirs and slice 2 feedback.) + +- 0950b42: ICM slice 1 — memoirs (typed knowledge graphs). + + A memoir is a named container of typed concepts (graph nodes) connected + by typed relations (graph edges). Concept content routes through the + same `prepareMemoryText` redact → compress pipeline used for + observations, so the compression invariant holds at the write boundary. + The nine relation types + (`part_of`, `depends_on`, `related_to`, `contradicts`, `refines`, + `alternative_to`, `caused_by`, `instance_of`, `superseded_by`) mirror + ICM's taxonomy and let agents express "what supersedes X", "what + contradicts decision Y", and similar structural reasoning without + abandoning the flat-observations primary store. + + Schema bump 14 → 15 adds three tables (`memoirs`, + `memoir_concepts`, `memoir_relations`) and one virtual table + (`memoir_concepts_fts`) with `(ai, ad, au)` triggers mirroring + `observations_fts`. Migrations are forward-only. + + `MemoryStore` exposes `createMemoir`, `listMemoirs`, `addConcept`, + `refineConcept`, `linkConcepts`, `searchConcepts`, and `inspectConcept`. + Seven MCP tools (`memoir_create`, `memoir_list`, `memoir_add_concept`, + `memoir_refine`, `memoir_link`, `memoir_search`, `memoir_inspect`) wrap + them with progressive disclosure — `memoir_search` returns compact hits + and `memoir_inspect` is the only path that returns full bodies plus a + BFS neighbourhood. + +- 7aba1eb: MCP tool profiles: the stdio server now defaults to a lean ~20-tool surface (memory + coordination primitives), cutting per-session schema-injection context cost by more than half. Set `COLONY_TOOL_PROFILE=full` or `settings.mcp.toolProfile: 'full'` to restore the entire tool surface (plan, spec, foraging, memoir, proposal, savings, queen lanes). New `mcp.toolProfile` setting in `@colony/config`. +- 45e1abe: Trim `task_plan_list` token bloat and add recovery hints to two error paths. + + `task_plan_list` now defaults to a compact rollup that omits `subtasks[].description` and `subtasks[].file_scope` — the two heavy fields driving ~60% of MCP wire token spend (12.9k avg per call in `colony gain`). The full legacy shape is preserved as opt-in via `detail: 'full'`. Internal `listPlans()` callers in `@colony/core` bypass MCP and are unaffected. + + `task_note_working` now returns `nearby_tasks` (ranked branch-and-repo > branch-only > repo-only) plus a recovery `hint` when `ACTIVE_TASK_NOT_FOUND` and the caller supplied a `repo_root` or `branch`. Fresh agent sessions that have not yet joined the active task on their branch can recover via `task_post(task_id=...)` or `task_accept_handoff` without re-listing the task table. + + `task_plan_claim_subtask` now attaches `next_available_subtask_index`, `next_available_count`, and a compact `next_available[]` list to `PLAN_SUBTASK_NOT_AVAILABLE` errors so a racing claimer can retry immediately instead of issuing a full `task_plan_list` round trip. + +- 3b86d74: Open coordination mode (new default): role gates become advisory. `settings.coordinationMode: 'open' | 'guarded'` — under open, scouts can claim, any agent can propose (no cap), everyone sees all proposals, and contended `task_claim_file` calls succeed with loud contention info (`contention`, `contention_detail`, `warning`) instead of erroring; table ownership stays with the live owner. Queen-only approval, subtask completion ownership, evidence requirements, and protected-branch rejection stay hard in both modes. `task_plan_claim_subtask` gains `force: boolean` to override unmet deps with an audit note. Set `coordinationMode: 'guarded'` to restore strict behavior. +- 7770b58: Session-start preface now enforces a global token budget (`sessionStart.prefaceTokenBudget`, default 800): low-priority sections (scope check, foraging, suggestions, prior sessions) drop first with a one-line trailer naming the trim. Prior-session summaries are capped at 300 chars each. MCP `get_observations` stops forcing expansion — it now honors `compression.expandForModel` (default false), so model-facing reads stay compressed unless `expand: true` is passed. `@colony/core` re-exports `countTokens`. +- ccd51b6: Add `semantic_search` MCP tool for pure-vector recall. + + The existing `search` tool starts with a BM25 candidate pool and then + vector-reranks it. Queries whose terms never appear in any stored + observation return zero candidates and miss the vector path entirely — + that hurts cross-language queries, concept-level queries, and any case + where the agent and the writer used different words for the same idea. + + `semantic_search` is the escape hatch: skip FTS entirely, embed the + query, score every stored observation vector by cosine, return top-K. + Same compact return shape as `search` (id + session_id + kind + snippet + + - score + ts + task_id) and the same progressive-disclosure rule + (callers fetch full bodies via `get_observations`). Requires an + embedding provider; returns a structured error otherwise. + + The implementation is intentionally O(N) over stored vectors. At 50k + observations on the dev box it still fits inside the 50 ms p95 budget + for `search`; a future ANN index (sqlite-vss / HNSW) goes behind the + same method signature when scale demands it. + +- fc53979: One-call startup + registration-cost telemetry. `startup_panel` now carries `compact_hivemind` (lane map), `attention_summary` ({unread, blocking, pending_handoffs}), and `tool_profile` (lean/full, so agents know whether to restart with COLONY_TOOL_PROFILE=full for plan/spec/memoir tools) — AGENTS.md blesses it as THE startup call, with the legacy 4-call sweep deprecated but working. `savings_report` gains `registration_cost` ({profile, tool_count, name_description_tokens}) so the per-session schema-injection cost is observable. +- 66fa52c: Surface unpublished on-disk plan workspaces in `task_plan_list`, and chain `plan create` into `plan publish` + + Two related improvements so orchestrators (and fleets of codex workers) don't waste cap on a plan they have a workspace for but never registered in Colony: + + - `task_plan_list` now scans `openspec/plans/*` and merges any disk workspace whose slug is not already registered, marked `registry_status: 'unpublished'`. Workers cannot claim from these, but seeing them lets the orchestrator notice and run `colony plan publish `. Pass `include_unpublished: false` to mirror the legacy registered-only behavior. + - `colony plan create` now accepts `--publish` (plus optional `--publish-session`, `--publish-agent`, `--publish-auto-archive`) which chains into the same publish path immediately after the workspace is created, eliminating the "I created a plan but workers don't see it" failure mode. + - `PlanInfo.registry_status` gains an `'unpublished'` variant. + +- a918e6f: `task_list` MCP tool now returns a compact rollup by default. + + `colony gain` showed `task_list` averaging 14.1k tokens per call — the second-largest line item after the recently-trimmed `task_plan_list`. Each row was emitting the full `TaskRow` shape (8 fields including long `repo_root` absolute paths and long `agent/*` branch names) for up to 50 tasks per call. + + Default response is now `tasks: [{ id, title, branch, status, updated_at }]`. The legacy shape including `repo_root`, `created_by`, and `created_at` is preserved as opt-in via `detail: "full"`. Internal callers using `store.storage.listTasks()` directly are unaffected. + +- f7b490a: Cross-agent awareness: unified liveness + working-note "now lines". `readHivemind` accepts an optional `sqliteLiveness` source — sessions with stale heartbeat files but fresh SQLite observations are reclassified as working (`liveness_source: 'sqlite'`); every hivemind session now carries `liveness_source`. `attention_inbox` gains `active_working_notes` (latest task_note_working note per other live session, 30-min window) plus `summary.active_working_note_count`. The SessionStart task preface shows up to 3 co-participant "now:" lines so agents start each session knowing what everyone else is mid-flight on. + +### Patch Changes + +- 9e1a791: Explain `task_claim_file` rejections instead of returning a generic "not claimable" + + `task_claim_file` (and the `TaskThread.claimFile` / + `normalizeOptionalClaimPath` paths inside `@colony/core`) used to throw + `INVALID_CLAIM_PATH: claim path is not claimable` with no hint at the + reason. Telemetry showed agents bouncing off the same surface for the same + input — e.g. `colony/packages/core/test` (a directory) — because the + message gave them nothing to act on. + + The rejection branch now classifies the failure and renders a specific + message per reason: + + - `directory` — _"claim path "X" is a directory; claim individual files inside it instead."_ + - `pseudo` — _"claim path "X" is a pseudo path (e.g. /dev/null) and cannot be claimed."_ + - `outside_repo` — _"claim path "X" resolves outside this task's repo_root and cannot be claimed."_ + - `empty` — _"claim path is empty."_ + - fallback — the legacy generic message, still keyed on the input path. + + New exports from `@colony/storage`: + + - `classifyClaimPathRejection(context)` — pure classifier paralleling + `normalizeRepoFilePath`. Returns the reason or `null`. + - `claimPathRejectionMessage(reason, file_path)` — single source of + truth for the user-facing message so the MCP `task_claim_file` + handler and `TaskThread.claimFile` stay in sync. + - New storage method `classifyTaskFilePathRejection(task_id, file_path, +cwd?)` plumbs the task → repo_root lookup that the existing + `normalizeTaskFilePath` already does, so callers only pay for the + classifier on the error branch. + + No behavior change: the same inputs that used to be rejected are still + rejected; only the error message and code surface improve. Existing + INVALID_CLAIM_PATH error code is preserved. + +- f1659a8: Attribute `` session metrics via the MCP client-identity fallback + + `colony gain` previously bucketed ~9,000 calls/day into a single + `` session row. Cause: high-volume read-only tools + (`task_plan_list`, `get_observations`, `search`, `task_timeline`, + `list_sessions`, `examples_list`, …) carry no `session_id` in their + schema, so `metricContextOf` had nothing to attribute the call to. + + The metrics wrapper now reuses the same `detectMcpClientIdentity` heuristic + the heartbeat wrapper already runs on every call: env-derived identity + (`CODEX_SESSION_ID`, `CLAUDECODE_SESSION_ID`, `COLONY_CLIENT_SESSION_ID`), + or a stable `mcp-` fallback when no signal is available. The + explicit `args.session_id` / `args.current_session_id` path is unchanged; + the fallback only fires when both are absent. + + Effect on the savings report: per-client session rows replace the giant + `` bucket, making it possible to see which agent / connection is + driving the bulk of the load — the regression-investigation gap that + PR #531's compact-mode fix left open. + + No new tool dependencies; the wrapper reads `detectMcpClientIdentity` from + `./heartbeat.js` (already in the same package). + +- 364e6a0: Move `task_foraging_report` source location from `attention.ts` to `foraging.ts` + + The tool is part of the foraging surface (it wraps `ProposalSystem.foragingReport` + and is conceptually paired with `examples_list` / `examples_query`), not the + attention-inbox surface. It only lived in `attention.ts` as an accident of the + pre-split monolithic `server.ts`. + + Pure code-location refactor. The MCP tool name, description, input schema, and + handler body are byte-identical. `server.ts` registers it at the same call-site + slot via a new `registerTaskForagingReport` named export, so the `listTools` + ordering observed by inspectors stays unchanged. + +- e6c5766: Reject `task_claim_file` at the MCP layer when the task's branch is a protected base branch. + + `guardedClaimFile` already returned `protected_branch_rejected` (controlled by the `rejectProtectedBranchClaims` setting, default `true`) but the MCP handler silently fell through and recorded the claim anyway. The handler now checks for that status and returns a distinct `PROTECTED_BRANCH_CLAIM_REJECTED` error code with a message directing the agent to start a sandbox worktree first. + + `PROTECTED_BRANCH_CLAIM_REJECTED` is added to `TASK_THREAD_ERROR_CODES` in `@colony/core`. Two new integration tests cover the reject and allow cases. + + Note: the same `guardedClaimFile` call in `task_plan_claim_subtask` has the same gap; that is out of scope for this patch. + +- a87921e: `task_claim_file` now surfaces the task's `repo_root` in the + `INVALID_CLAIM_PATH` rejection message so agents see the exact anchor their + path failed to resolve against. The `outside_repo` and `unknown` branches of + `claimPathRejectionMessage(reason, file_path, { repo_root })` switch from a + terse "claim path is not claimable: …" to an actionable + "… resolves outside this task's repo_root \"\" …" / "… could not be + resolved relative to this task's repo_root \"\". Either retarget a task + whose repo_root matches the path being claimed, or pass a path that resolves + inside that anchor." So the agent can immediately tell whether to rewrite the + path or claim a different task. + + The MCP handler in `apps/mcp-server/src/tools/task.ts` and both + `TaskThread.claimFile` / `TaskThread.normalizeOptionalClaimPath` paths in + `packages/core/src/task-thread.ts` thread the task's repo_root through. + Backward compatible — the `context` arg is optional and existing callers see + the original messages. + +- 13ce3ad: Tool description diet + registration token budget guard. The six bulkiest MCP tool descriptions (attention_inbox, task_ready_for_agent, task_relay, task_hand_off, cluster_observations, task_plan_list) are rewritten to keep their trigger phrases and drop tail prose. New `tool-budget.test.ts` fails CI if the lean surface exceeds 4,200 estimated tokens, the full surface exceeds 15,000, or any description exceeds 540 chars. Also de-flakes coordination-loop's claim-preview ordering assertion. +- Updated dependencies [819660d] +- Updated dependencies [cdf22de] +- Updated dependencies [4a68470] +- Updated dependencies [782ddb6] +- Updated dependencies [b6e2ad4] +- Updated dependencies [86a3d1a] +- Updated dependencies [9e1a791] +- Updated dependencies [829556b] +- Updated dependencies [dafe17b] +- Updated dependencies [a83eeea] +- Updated dependencies [7dcece2] +- Updated dependencies [0950b42] +- Updated dependencies [0950b42] +- Updated dependencies [7aba1eb] +- Updated dependencies [950a95d] +- Updated dependencies [3b86d74] +- Updated dependencies [1d78c99] +- Updated dependencies [7770b58] +- Updated dependencies [e6c5766] +- Updated dependencies [60c3123] +- Updated dependencies [2e8fba1] +- Updated dependencies [3898ff3] +- Updated dependencies [ccd51b6] +- Updated dependencies [66fa52c] +- Updated dependencies [a87921e] +- Updated dependencies [8a15958] +- Updated dependencies [f7b490a] +- Updated dependencies [2cc5ff8] +- Updated dependencies [9a36e5e] + - @colony/core@0.8.0 + - @colony/embedding@0.8.0 + - @colony/config@0.8.0 + - @colony/hooks@0.8.0 + - @colony/foraging@0.8.0 + - @colony/spec@0.8.0 + - @colony/queen@0.8.0 + ## 0.7.0 ### Minor Changes diff --git a/apps/mcp-server/package.json b/apps/mcp-server/package.json index 69cf76bb..72464c3d 100644 --- a/apps/mcp-server/package.json +++ b/apps/mcp-server/package.json @@ -1,6 +1,6 @@ { "name": "@colony/mcp-server", - "version": "0.7.0", + "version": "0.8.0", "license": "MIT", "private": true, "type": "module", diff --git a/apps/worker/CHANGELOG.md b/apps/worker/CHANGELOG.md index ef49ac06..85818cae 100644 --- a/apps/worker/CHANGELOG.md +++ b/apps/worker/CHANGELOG.md @@ -1,5 +1,51 @@ # @colony/worker +## 0.8.0 + +### Patch Changes + +- cdf22de: Batch worker observation embedding calls and add batch-capable embedder providers. +- 60c3123: Changed the embedding backfill loop to send one batch of texts to embedders that support `embedBatch`, default worker batches to 32 observations, and persist each batch in a single SQLite transaction. The codex-gpu provider now calls `/embed/batch`, while storage copies returned embedding buffers so vector reads do not alias SQLite row memory. +- Updated dependencies [8f33724] +- Updated dependencies [819660d] +- Updated dependencies [cdf22de] +- Updated dependencies [4a68470] +- Updated dependencies [782ddb6] +- Updated dependencies [b6e2ad4] +- Updated dependencies [86a3d1a] +- Updated dependencies [edc318f] +- Updated dependencies [9e1a791] +- Updated dependencies [8917c73] +- Updated dependencies [829556b] +- Updated dependencies [dafe17b] +- Updated dependencies [a83eeea] +- Updated dependencies [e52cd83] +- Updated dependencies [53836ff] +- Updated dependencies [7dcece2] +- Updated dependencies [0950b42] +- Updated dependencies [0950b42] +- Updated dependencies [7aba1eb] +- Updated dependencies [950a95d] +- Updated dependencies [3b86d74] +- Updated dependencies [1d78c99] +- Updated dependencies [7770b58] +- Updated dependencies [e6c5766] +- Updated dependencies [60c3123] +- Updated dependencies [71ee50d] +- Updated dependencies [2e8fba1] +- Updated dependencies [3898ff3] +- Updated dependencies [ccd51b6] +- Updated dependencies [66fa52c] +- Updated dependencies [a87921e] +- Updated dependencies [8a15958] +- Updated dependencies [f7b490a] +- Updated dependencies [9a36e5e] + - @colony/storage@0.8.0 + - @colony/core@0.8.0 + - @colony/embedding@0.8.0 + - @colony/config@0.8.0 + - @colony/hooks@0.8.0 + ## 0.7.0 ### Minor Changes diff --git a/apps/worker/package.json b/apps/worker/package.json index 48d6355c..f86fc82c 100644 --- a/apps/worker/package.json +++ b/apps/worker/package.json @@ -1,6 +1,6 @@ { "name": "@colony/worker", - "version": "0.7.0", + "version": "0.8.0", "license": "MIT", "private": true, "type": "module", diff --git a/openspec/changes/agent-claude-version-packages-release-cut-v0-8-0-2026-06-12-16-59/.openspec.yaml b/openspec/changes/agent-claude-version-packages-release-cut-v0-8-0-2026-06-12-16-59/.openspec.yaml new file mode 100644 index 00000000..8fe20555 --- /dev/null +++ b/openspec/changes/agent-claude-version-packages-release-cut-v0-8-0-2026-06-12-16-59/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-06-12 diff --git a/openspec/changes/agent-claude-version-packages-release-cut-v0-8-0-2026-06-12-16-59/notes.md b/openspec/changes/agent-claude-version-packages-release-cut-v0-8-0-2026-06-12-16-59/notes.md new file mode 100644 index 00000000..334c0c8d --- /dev/null +++ b/openspec/changes/agent-claude-version-packages-release-cut-v0-8-0-2026-06-12-16-59/notes.md @@ -0,0 +1,16 @@ +# agent-claude-version-packages-release-cut-v0-8-0-2026-06-12-16-59 (minimal / T1) + +Branch: `agent//` + +Describe the change in a sentence or two. Commit message is the spec of record. + +## Handoff + +- Handoff: change=`agent-claude-version-packages-release-cut-v0-8-0-2026-06-12-16-59`; branch=`agent//`; scope=`TODO`; action=`continue this sandbox or finish cleanup after a usage-limit/manual takeover`. +- Copy prompt: Continue `agent-claude-version-packages-release-cut-v0-8-0-2026-06-12-16-59` on branch `agent//`. Work inside the existing sandbox, review `openspec/changes/agent-claude-version-packages-release-cut-v0-8-0-2026-06-12-16-59/notes.md`, continue from the current state instead of creating a new sandbox, and when the work is done run `gx branch finish --branch agent// --base dev --via-pr --wait-for-merge --cleanup`. + +## Cleanup + +- [ ] Run: `gx branch finish --branch agent// --base dev --via-pr --wait-for-merge --cleanup` +- [ ] Record PR URL + `MERGED` state in the completion handoff. +- [ ] Confirm sandbox worktree is gone (`git worktree list`, `git branch -a`). diff --git a/packages/config/CHANGELOG.md b/packages/config/CHANGELOG.md index 549d5130..d47bc654 100644 --- a/packages/config/CHANGELOG.md +++ b/packages/config/CHANGELOG.md @@ -1,5 +1,48 @@ # @colony/config +## 0.8.0 + +### Minor Changes + +- b6e2ad4: Add `codex-gpu` embedding provider that targets the recodee + `codex-gpu-embedder` HTTP service (`POST /embed`). + + When `settings.embedding.provider = 'codex-gpu'`, the worker hits the + local GPU embedder over HTTP instead of running Transformers.js in + process. Endpoint defaults to `http://127.0.0.1:8100`; override via + `settings.embedding.endpoint`. Captures `dim` from a one-shot warm-up + probe at init time, matching every other provider's contract. + + The recodee dev box measures ~16 ms per single embed on the GPU vs ~200 + ms on local CPU, so the worker's embedding-backfill loop completes + roughly 14× faster when configured this way. Behavior unchanged for any + deployment that does not opt in via the new provider value. + +- 86a3d1a: Colony is context-aware by default, never forcing. The PreToolUse hook no + longer hard-denies a tool call on a protected-branch claim conflict under the + default `warn` policy — it warns and records telemetry but lets the edit + proceed. Repos that want the old hard block opt in with + `bridge.policyMode = "block-on-conflict"`. The per-turn Claude Code + read-before-edit reminder is dropped (the harness already enforces it), and the + compact session-start contract is reframed from imperative "claim/hand off + before X" mandates to availability framing ("coordination is available, pull it + when it helps"). No write-path persistence changes. +- 7aba1eb: MCP tool profiles: the stdio server now defaults to a lean ~20-tool surface (memory + coordination primitives), cutting per-session schema-injection context cost by more than half. Set `COLONY_TOOL_PROFILE=full` or `settings.mcp.toolProfile: 'full'` to restore the entire tool surface (plan, spec, foraging, memoir, proposal, savings, queen lanes). New `mcp.toolProfile` setting in `@colony/config`. +- 3b86d74: Open coordination mode (new default): role gates become advisory. `settings.coordinationMode: 'open' | 'guarded'` — under open, scouts can claim, any agent can propose (no cap), everyone sees all proposals, and contended `task_claim_file` calls succeed with loud contention info (`contention`, `contention_detail`, `warning`) instead of erroring; table ownership stays with the live owner. Queen-only approval, subtask completion ownership, evidence requirements, and protected-branch rejection stay hard in both modes. `task_plan_claim_subtask` gains `force: boolean` to override unmet deps with an audit note. Set `coordinationMode: 'guarded'` to restore strict behavior. +- 7770b58: Session-start preface now enforces a global token budget (`sessionStart.prefaceTokenBudget`, default 800): low-priority sections (scope check, foraging, suggestions, prior sessions) drop first with a one-line trailer naming the trim. Prior-session summaries are capped at 300 chars each. MCP `get_observations` stops forcing expansion — it now honors `compression.expandForModel` (default false), so model-facing reads stay compressed unless `expand: true` is passed. `@colony/core` re-exports `countTokens`. +- 8a15958: SessionStart hook now emits a compact one-line pointer to the quota-safe + operating contract by default instead of the full ~14-line verbose block, + saving ~350 tokens per SessionStart fire in every repo. New + `sessionStart.contractMode` setting (`'compact' | 'full' | 'none'`, + default `'compact'`) restores the legacy verbose preface (`'full'`) or + suppresses the contract entirely (`'none'`). AGENTS.md / CLAUDE.md keep + carrying the full protocol, so the compact pointer is enough for active + agents; one-time onboarding flows can opt into `'full'`. + +### Patch Changes + +- 60c3123: Changed the embedding backfill loop to send one batch of texts to embedders that support `embedBatch`, default worker batches to 32 observations, and persist each batch in a single SQLite transaction. The codex-gpu provider now calls `/embed/batch`, while storage copies returned embedding buffers so vector reads do not alias SQLite row memory. + ## 0.7.0 ### Patch Changes diff --git a/packages/config/package.json b/packages/config/package.json index cddb8420..30566ea4 100644 --- a/packages/config/package.json +++ b/packages/config/package.json @@ -1,6 +1,6 @@ { "name": "@colony/config", - "version": "0.7.0", + "version": "0.8.0", "private": true, "license": "MIT", "type": "module", @@ -12,7 +12,9 @@ "import": "./dist/index.js" } }, - "files": ["dist"], + "files": [ + "dist" + ], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", diff --git a/packages/core/CHANGELOG.md b/packages/core/CHANGELOG.md index 527e694a..95690873 100644 --- a/packages/core/CHANGELOG.md +++ b/packages/core/CHANGELOG.md @@ -1,5 +1,298 @@ # @colony/core +## 0.8.0 + +### Minor Changes + +- 819660d: Auto-release stale plan-subtask claims in `task_ready_for_agent` + + A `claimed` subtask older than `STALE_PLAN_SUBTASK_CLAIM_MS` (default 1h) + is now released server-side at the top of `task_ready_for_agent` so the + ready-queue can route it to the next eligible worker on the same tick. + Replaces the prior `rescue_candidate` + `next_tool: rescue_stranded_scan` + suggestion path, which deadlocked policy-locked worker fleets (codex, + others) that refuse to rescue another agent's claim. + + New behavior: + + - The response carries `auto_released_stale_claims[]` listing the + releases performed by this call (omitted when nothing was stale). + - Pass `auto_release_stale_claims: false` to suppress the sweep and + observe stale state without mutating it (admin / telemetry callers). + - A new `autoReleaseStalePlanSubtaskClaims(store, plans, options)` + export from `@colony/core` exposes the same sweep to other consumers + (e.g. an autopilot tick). + + Audit trail: each release writes one `plan-subtask-claim` observation + with `status: 'available'` and `rescue_reason: 'auto-released-stale-claim'`, + matching the shape `bulkRescueStrandedSessions` already uses. + +- 782ddb6: Add `cluster_observations` MCP tool for semantic dedupe. + + Greedy single-linkage clustering by cosine threshold over a caller- + supplied set of observation IDs. The intended consumer is handoff / + attention_inbox dedupe: collect pending handoff IDs, pass them through + `cluster_observations(ids, threshold)`, and show the user one canonical + row per cluster instead of three different agents saying the same thing. + + The MCP tool wraps a new `MemoryStore.clusterObservations` primitive, + so callers that already use the core package can dedupe without going + through MCP. Threshold defaults to `0.85`; observations missing an + embedding come back in a separate `unembedded` array so the caller + chooses whether to keep them as singletons. Capped at 500 input IDs to + bound the O(N²) cosine cost. + +- a83eeea: `colony gain drift` and a matching `savings_drift_report` MCP tool flag + tools whose median tokens-per-call has drifted up or down. Default windows + are non-overlapping: recent = last 3 days, baseline = 14 days ending 3 days + before recent. Default thresholds: `--threshold 1.25` (up), `--down-threshold +0.75`, `--min-calls 20` per window. Classifications: `up_drift`, + `down_drift`, `new_tool` (no baseline), `gone` (no recent), `insufficient_data`, + `stable`. + + Storage gains `Storage.mcpTokenDriftPerOperation()` which computes per-operation + medians with a `ROW_NUMBER() OVER (PARTITION BY operation ORDER BY tpc)` + window function — chosen over the correlated `LIMIT 1 OFFSET (COUNT-1)/2` + form because SQLite forbids outer aggregate references in scalar-subquery + `OFFSET`. A `mcpMetricsMinTs()` helper surfaces a one-line warning when the + baseline window starts before the first recorded metric. + +- 7dcece2: ICM slice 2 — feedback `record`, `search`, and `stats` MCP tools. + + Adds a new `feedback` lane that records "AI predicted X, real answer + was Y" corrections so a future agent can search prior mistakes by + topic before repeating them. Migration 015 introduces the `feedback` + table plus a porter-unicode61 `feedback_fts` virtual table mirrored + by the standard `ai/ad/au` triggers; importance is a four-level enum + defaulting to `medium`. `prediction`, `correction`, and the optional + `context` flow through `MemoryStore.recordFeedback`, which routes each + body through `prepareMemoryText` — the same redact-then-compress path + observations use — so the compression invariant holds at the write + boundary. + + MCP surface (progressive disclosure): + + - `feedback_record({ topic, prediction, correction, context?, importance?, created_by? })` → `{ id }` + - `feedback_search({ query, topic?, limit? })` → compact hits (`id`, `topic`, `importance`, `score`, `snippet`, `created_at`) + - `feedback_stats({ topic? })` → per-topic counts and `last_created_at` + + Follow-up (separate PR): a pre-tool-use hook that surfaces prior + corrections on inbound prompts. This PR keeps the slice scoped to the + storage + search surface so it can ship behind a manual query first. + + Reference: `docs/icm-integration-plan.md` slice 2. + +- 0950b42: ICM slice 3 — observation importance + temporal decay. + + Every observation now carries an `importance` tier + (`critical | high | medium | low`, default `medium`), a rolling + `access_count`, a `last_accessed_at` timestamp, and a `weight` value. + Critical/high pin their weight to the base value and never decay; + medium/low decay as `baseWeight / (1 + access_count * 0.1)` whenever + they are read. Read paths (`MemoryStore.search`, `getObservations`, + `semanticSearch`) coalesce ids into a debounced 50ms batch and flush + the access bookkeeping in one transactional UPDATE, so heavy read + loops trade at most one extra write per ~50ms window. + + Search and `get_observations` MCP responses now include `importance` + and `weight` on each row (additive — older callers ignore them). + `task_post` accepts an optional `importance` parameter forwarded to + the underlying observation insert. + + New CLI subcommand `colony memory prune` deletes near-zero-weight + medium/low rows; `--min-weight ` overrides the default 0.1 + threshold and `--dry-run` reports the candidate count without + deleting. Critical/high are never affected. + + Storage: schema bumped to version 17 with four additive columns on + `observations` and two new indexes. `Storage.recordAccess`, + `Storage.pruneLowDecay`, and `Storage.countLowDecayCandidates` are + the public primitives. (Originally targeted version 15 in isolation; + landed at 17 alongside slice 1 memoirs and slice 2 feedback.) + +- 0950b42: ICM slice 1 — memoirs (typed knowledge graphs). + + A memoir is a named container of typed concepts (graph nodes) connected + by typed relations (graph edges). Concept content routes through the + same `prepareMemoryText` redact → compress pipeline used for + observations, so the compression invariant holds at the write boundary. + The nine relation types + (`part_of`, `depends_on`, `related_to`, `contradicts`, `refines`, + `alternative_to`, `caused_by`, `instance_of`, `superseded_by`) mirror + ICM's taxonomy and let agents express "what supersedes X", "what + contradicts decision Y", and similar structural reasoning without + abandoning the flat-observations primary store. + + Schema bump 14 → 15 adds three tables (`memoirs`, + `memoir_concepts`, `memoir_relations`) and one virtual table + (`memoir_concepts_fts`) with `(ai, ad, au)` triggers mirroring + `observations_fts`. Migrations are forward-only. + + `MemoryStore` exposes `createMemoir`, `listMemoirs`, `addConcept`, + `refineConcept`, `linkConcepts`, `searchConcepts`, and `inspectConcept`. + Seven MCP tools (`memoir_create`, `memoir_list`, `memoir_add_concept`, + `memoir_refine`, `memoir_link`, `memoir_search`, `memoir_inspect`) wrap + them with progressive disclosure — `memoir_search` returns compact hits + and `memoir_inspect` is the only path that returns full bodies plus a + BFS neighbourhood. + +- ccd51b6: Add `semantic_search` MCP tool for pure-vector recall. + + The existing `search` tool starts with a BM25 candidate pool and then + vector-reranks it. Queries whose terms never appear in any stored + observation return zero candidates and miss the vector path entirely — + that hurts cross-language queries, concept-level queries, and any case + where the agent and the writer used different words for the same idea. + + `semantic_search` is the escape hatch: skip FTS entirely, embed the + query, score every stored observation vector by cosine, return top-K. + Same compact return shape as `search` (id + session_id + kind + snippet + + - score + ts + task_id) and the same progressive-disclosure rule + (callers fetch full bodies via `get_observations`). Requires an + embedding provider; returns a structured error otherwise. + + The implementation is intentionally O(N) over stored vectors. At 50k + observations on the dev box it still fits inside the 50 ms p95 budget + for `search`; a future ANN index (sqlite-vss / HNSW) goes behind the + same method signature when scale demands it. + +- f7b490a: Cross-agent awareness: unified liveness + working-note "now lines". `readHivemind` accepts an optional `sqliteLiveness` source — sessions with stale heartbeat files but fresh SQLite observations are reclassified as working (`liveness_source: 'sqlite'`); every hivemind session now carries `liveness_source`. `attention_inbox` gains `active_working_notes` (latest task_note_working note per other live session, 30-min window) plus `summary.active_working_note_count`. The SessionStart task preface shows up to 3 co-participant "now:" lines so agents start each session knowing what everyone else is mid-flight on. + +### Patch Changes + +- cdf22de: Batch worker observation embedding calls and add batch-capable embedder providers. +- 4a68470: Fix read-then-write race in claim cleanup paths + + `releaseExpiredQuotaClaims` and `bulkRescueStrandedSessions` previously read + eligible claims outside their DEFERRED transaction, allowing two concurrent + callers to both snapshot the same rows and each emit a duplicate + `claim-weakened` or `rescue-stranded` audit observation. + + The fix moves the claim read inside a `BEGIN IMMEDIATE` transaction on both + paths so the write lock is acquired before any row is inspected. The storage + `transaction()` helper gains an `{ immediate: true }` option that maps to + better-sqlite3's `.immediate()` mode. A new idempotency test confirms that + calling each cleanup path twice produces exactly one audit observation. + +- 9e1a791: Explain `task_claim_file` rejections instead of returning a generic "not claimable" + + `task_claim_file` (and the `TaskThread.claimFile` / + `normalizeOptionalClaimPath` paths inside `@colony/core`) used to throw + `INVALID_CLAIM_PATH: claim path is not claimable` with no hint at the + reason. Telemetry showed agents bouncing off the same surface for the same + input — e.g. `colony/packages/core/test` (a directory) — because the + message gave them nothing to act on. + + The rejection branch now classifies the failure and renders a specific + message per reason: + + - `directory` — _"claim path "X" is a directory; claim individual files inside it instead."_ + - `pseudo` — _"claim path "X" is a pseudo path (e.g. /dev/null) and cannot be claimed."_ + - `outside_repo` — _"claim path "X" resolves outside this task's repo_root and cannot be claimed."_ + - `empty` — _"claim path is empty."_ + - fallback — the legacy generic message, still keyed on the input path. + + New exports from `@colony/storage`: + + - `classifyClaimPathRejection(context)` — pure classifier paralleling + `normalizeRepoFilePath`. Returns the reason or `null`. + - `claimPathRejectionMessage(reason, file_path)` — single source of + truth for the user-facing message so the MCP `task_claim_file` + handler and `TaskThread.claimFile` stay in sync. + - New storage method `classifyTaskFilePathRejection(task_id, file_path, +cwd?)` plumbs the task → repo_root lookup that the existing + `normalizeTaskFilePath` already does, so callers only pay for the + classifier on the error branch. + + No behavior change: the same inputs that used to be rejected are still + rejected; only the error message and code surface improve. Existing + INVALID_CLAIM_PATH error code is preserved. + +- dafe17b: Fix worktree-contention detection on macOS and Windows. `isGitWorktree` compared + `git rev-parse --show-toplevel` (a fully realpath'd path) against a merely + `resolve()`'d scan path. `resolve()` does not collapse symlinks (macOS + `/var` -> `/private/var`), 8.3 short names, or case (Windows), so every managed + worktree was dropped and `worktree_count` came back 0 — failing + `readWorktreeContentionReport` on the macOS/Windows CI matrix. Both sides are now + canonicalised with `realpathSync.native` (falling back to `resolve` when the path + can't be stat'd). Added a regression test that reaches the repo through an + absolute symlink, reproducing the condition on any platform. +- 950a95d: Drop the default `listPlans` limit from 50 → 10. + + Companion to the compact-default `task_plan_list` from #531. Even with compact responses, callers that don't pass `limit` were getting 50 plans by default. Lowering the implicit default trims the long tail for `task_plan_list` callers that omit `limit` — the explicit `.max(50)` ceiling on the MCP schema is preserved for callers that want more. Verified post-#531 that `colony gain` shows the predicted ~87% reduction on per-call token cost when the new binary is live; this change squeezes the remaining ~10-15% for non-compact-aware callers. + +- 7770b58: Session-start preface now enforces a global token budget (`sessionStart.prefaceTokenBudget`, default 800): low-priority sections (scope check, foraging, suggestions, prior sessions) drop first with a one-line trailer naming the trim. Prior-session summaries are capped at 300 chars each. MCP `get_observations` stops forcing expansion — it now honors `compression.expandForModel` (default false), so model-facing reads stay compressed unless `expand: true` is passed. `@colony/core` re-exports `countTokens`. +- e6c5766: Reject `task_claim_file` at the MCP layer when the task's branch is a protected base branch. + + `guardedClaimFile` already returned `protected_branch_rejected` (controlled by the `rejectProtectedBranchClaims` setting, default `true`) but the MCP handler silently fell through and recorded the claim anyway. The handler now checks for that status and returns a distinct `PROTECTED_BRANCH_CLAIM_REJECTED` error code with a message directing the agent to start a sandbox worktree first. + + `PROTECTED_BRANCH_CLAIM_REJECTED` is added to `TASK_THREAD_ERROR_CODES` in `@colony/core`. Two new integration tests cover the reject and allow cases. + + Note: the same `guardedClaimFile` call in `task_plan_claim_subtask` has the same gap; that is out of scope for this patch. + +- 60c3123: Changed the embedding backfill loop to send one batch of texts to embedders that support `embedBatch`, default worker batches to 32 observations, and persist each batch in a single SQLite transaction. The codex-gpu provider now calls `/embed/batch`, while storage copies returned embedding buffers so vector reads do not alias SQLite row memory. +- 2e8fba1: Stop attributing the storage-at-rest compression claim to live `savings_report` calls + + The `Storage at rest (per observation)` reference row used to map to `['savings_report']`. Live `savings_report` output is structured JSON (~3.5k tokens per call) where the caveman compressor preserves technical tokens byte-for-byte, so the live comparison projected the row's 1k-token baseline against ~3.5k actual tokens and reported negative savings (e.g. `-155%`). + + The row stays in the static reference — caveman compression really does shrink prose observations on disk — but it is now a structural claim about the storage layer rather than a per-call cost, so `mcp_operations` is empty. `savings_report` calls now show up under `unmatched_operations` instead of inflating the row. + +- 66fa52c: Surface unpublished on-disk plan workspaces in `task_plan_list`, and chain `plan create` into `plan publish` + + Two related improvements so orchestrators (and fleets of codex workers) don't waste cap on a plan they have a workspace for but never registered in Colony: + + - `task_plan_list` now scans `openspec/plans/*` and merges any disk workspace whose slug is not already registered, marked `registry_status: 'unpublished'`. Workers cannot claim from these, but seeing them lets the orchestrator notice and run `colony plan publish `. Pass `include_unpublished: false` to mirror the legacy registered-only behavior. + - `colony plan create` now accepts `--publish` (plus optional `--publish-session`, `--publish-agent`, `--publish-auto-archive`) which chains into the same publish path immediately after the workspace is created, eliminating the "I created a plan but workers don't see it" failure mode. + - `PlanInfo.registry_status` gains an `'unpublished'` variant. + +- a87921e: `task_claim_file` now surfaces the task's `repo_root` in the + `INVALID_CLAIM_PATH` rejection message so agents see the exact anchor their + path failed to resolve against. The `outside_repo` and `unknown` branches of + `claimPathRejectionMessage(reason, file_path, { repo_root })` switch from a + terse "claim path is not claimable: …" to an actionable + "… resolves outside this task's repo_root \"\" …" / "… could not be + resolved relative to this task's repo_root \"\". Either retarget a task + whose repo_root matches the path being claimed, or pass a path that resolves + inside that anchor." So the agent can immediately tell whether to rewrite the + path or claim a different task. + + The MCP handler in `apps/mcp-server/src/tools/task.ts` and both + `TaskThread.claimFile` / `TaskThread.normalizeOptionalClaimPath` paths in + `packages/core/src/task-thread.ts` thread the task's repo_root through. + Backward compatible — the `context` arg is optional and existing callers see + the original messages. + +- 9a36e5e: Fix the worktree-contention report silently dropping worktrees whose path + crosses a symlink (e.g. macOS's `/var` -> `/private/var` tmpdir). `git +rev-parse --show-toplevel` returns a symlink-resolved path, but the detector + compared it with `resolve()`, which only normalizes — so the paths never + matched and the worktree was skipped. It now compares `realpathSync` + -canonicalized paths on both sides. +- Updated dependencies [8f33724] +- Updated dependencies [4a68470] +- Updated dependencies [b6e2ad4] +- Updated dependencies [86a3d1a] +- Updated dependencies [edc318f] +- Updated dependencies [9e1a791] +- Updated dependencies [8917c73] +- Updated dependencies [a83eeea] +- Updated dependencies [e52cd83] +- Updated dependencies [53836ff] +- Updated dependencies [7dcece2] +- Updated dependencies [0950b42] +- Updated dependencies [0950b42] +- Updated dependencies [7aba1eb] +- Updated dependencies [3b86d74] +- Updated dependencies [7770b58] +- Updated dependencies [60c3123] +- Updated dependencies [71ee50d] +- Updated dependencies [3898ff3] +- Updated dependencies [a87921e] +- Updated dependencies [8a15958] + - @colony/storage@0.8.0 + - @colony/config@0.8.0 + ## 0.7.0 ### Minor Changes diff --git a/packages/core/package.json b/packages/core/package.json index c105983b..4fe34cbc 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,6 +1,6 @@ { "name": "@colony/core", - "version": "0.7.0", + "version": "0.8.0", "private": true, "license": "MIT", "type": "module", @@ -12,7 +12,9 @@ "import": "./dist/index.js" } }, - "files": ["dist"], + "files": [ + "dist" + ], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", diff --git a/packages/embedding/CHANGELOG.md b/packages/embedding/CHANGELOG.md index 04dd6d7f..b0aef87f 100644 --- a/packages/embedding/CHANGELOG.md +++ b/packages/embedding/CHANGELOG.md @@ -1,5 +1,36 @@ # @colony/embedding +## 0.8.0 + +### Minor Changes + +- b6e2ad4: Add `codex-gpu` embedding provider that targets the recodee + `codex-gpu-embedder` HTTP service (`POST /embed`). + + When `settings.embedding.provider = 'codex-gpu'`, the worker hits the + local GPU embedder over HTTP instead of running Transformers.js in + process. Endpoint defaults to `http://127.0.0.1:8100`; override via + `settings.embedding.endpoint`. Captures `dim` from a one-shot warm-up + probe at init time, matching every other provider's contract. + + The recodee dev box measures ~16 ms per single embed on the GPU vs ~200 + ms on local CPU, so the worker's embedding-backfill loop completes + roughly 14× faster when configured this way. Behavior unchanged for any + deployment that does not opt in via the new provider value. + +### Patch Changes + +- cdf22de: Batch worker observation embedding calls and add batch-capable embedder providers. +- 60c3123: Changed the embedding backfill loop to send one batch of texts to embedders that support `embedBatch`, default worker batches to 32 observations, and persist each batch in a single SQLite transaction. The codex-gpu provider now calls `/embed/batch`, while storage copies returned embedding buffers so vector reads do not alias SQLite row memory. +- Updated dependencies [b6e2ad4] +- Updated dependencies [86a3d1a] +- Updated dependencies [7aba1eb] +- Updated dependencies [3b86d74] +- Updated dependencies [7770b58] +- Updated dependencies [60c3123] +- Updated dependencies [8a15958] + - @colony/config@0.8.0 + ## 0.7.0 ### Patch Changes diff --git a/packages/embedding/package.json b/packages/embedding/package.json index 18b77b8a..ddbc6caf 100644 --- a/packages/embedding/package.json +++ b/packages/embedding/package.json @@ -1,6 +1,6 @@ { "name": "@colony/embedding", - "version": "0.7.0", + "version": "0.8.0", "private": true, "license": "MIT", "type": "module", @@ -12,7 +12,9 @@ "import": "./dist/index.js" } }, - "files": ["dist"], + "files": [ + "dist" + ], "scripts": { "build": "tsup", "dev": "tsup --watch", diff --git a/packages/foraging/CHANGELOG.md b/packages/foraging/CHANGELOG.md index 3df1d4e4..95694227 100644 --- a/packages/foraging/CHANGELOG.md +++ b/packages/foraging/CHANGELOG.md @@ -1,5 +1,49 @@ # @colony/foraging +## 0.8.0 + +### Patch Changes + +- 829556b: Surface silent `catch {}` failures to stderr (rule #9). + + Every empty catch in `session-start`, `scanner`, and the foraging MCP tool now either logs a `[colony] : ` line to stderr or carries a one-line comment explaining why silence is intentional (fs-stat races, missing-directory guards, best-effort cleanup). Previously a whole session's 43/43 MCP call failures could vanish with no trace. + +- Updated dependencies [8f33724] +- Updated dependencies [819660d] +- Updated dependencies [cdf22de] +- Updated dependencies [4a68470] +- Updated dependencies [782ddb6] +- Updated dependencies [b6e2ad4] +- Updated dependencies [86a3d1a] +- Updated dependencies [edc318f] +- Updated dependencies [9e1a791] +- Updated dependencies [8917c73] +- Updated dependencies [dafe17b] +- Updated dependencies [a83eeea] +- Updated dependencies [e52cd83] +- Updated dependencies [53836ff] +- Updated dependencies [7dcece2] +- Updated dependencies [0950b42] +- Updated dependencies [0950b42] +- Updated dependencies [7aba1eb] +- Updated dependencies [950a95d] +- Updated dependencies [3b86d74] +- Updated dependencies [7770b58] +- Updated dependencies [e6c5766] +- Updated dependencies [60c3123] +- Updated dependencies [71ee50d] +- Updated dependencies [2e8fba1] +- Updated dependencies [3898ff3] +- Updated dependencies [ccd51b6] +- Updated dependencies [66fa52c] +- Updated dependencies [a87921e] +- Updated dependencies [8a15958] +- Updated dependencies [f7b490a] +- Updated dependencies [9a36e5e] + - @colony/storage@0.8.0 + - @colony/core@0.8.0 + - @colony/config@0.8.0 + ## 0.7.0 ### Patch Changes diff --git a/packages/foraging/package.json b/packages/foraging/package.json index 2adbe28e..0fafa527 100644 --- a/packages/foraging/package.json +++ b/packages/foraging/package.json @@ -1,6 +1,6 @@ { "name": "@colony/foraging", - "version": "0.7.0", + "version": "0.8.0", "private": true, "license": "MIT", "type": "module", @@ -12,7 +12,9 @@ "import": "./dist/index.js" } }, - "files": ["dist"], + "files": [ + "dist" + ], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", diff --git a/packages/hooks/CHANGELOG.md b/packages/hooks/CHANGELOG.md index a19728a3..fc94c7c1 100644 --- a/packages/hooks/CHANGELOG.md +++ b/packages/hooks/CHANGELOG.md @@ -1,5 +1,70 @@ # @colony/hooks +## 0.8.0 + +### Minor Changes + +- 86a3d1a: Colony is context-aware by default, never forcing. The PreToolUse hook no + longer hard-denies a tool call on a protected-branch claim conflict under the + default `warn` policy — it warns and records telemetry but lets the edit + proceed. Repos that want the old hard block opt in with + `bridge.policyMode = "block-on-conflict"`. The per-turn Claude Code + read-before-edit reminder is dropped (the harness already enforces it), and the + compact session-start contract is reframed from imperative "claim/hand off + before X" mandates to availability framing ("coordination is available, pull it + when it helps"). No write-path persistence changes. +- 1d78c99: Push awareness: when an edit touches a file another live session holds, the PostToolUse hook now injects a one-line `[Colony] session X recently claimed …` note into the agent's context immediately (Claude Code `additionalContext`), instead of waiting for the next turn's preface. Debounced to once per 2 minutes per session via an `awareness-push` observation marker (hook processes are one-shot, so the marker doubles as audit trail). `autoClaimFromToolUse` now reports live-owner blocked takeovers in its `conflicts` result. +- 7770b58: Session-start preface now enforces a global token budget (`sessionStart.prefaceTokenBudget`, default 800): low-priority sections (scope check, foraging, suggestions, prior sessions) drop first with a one-line trailer naming the trim. Prior-session summaries are capped at 300 chars each. MCP `get_observations` stops forcing expansion — it now honors `compression.expandForModel` (default false), so model-facing reads stay compressed unless `expand: true` is passed. `@colony/core` re-exports `countTokens`. +- 8a15958: SessionStart hook now emits a compact one-line pointer to the quota-safe + operating contract by default instead of the full ~14-line verbose block, + saving ~350 tokens per SessionStart fire in every repo. New + `sessionStart.contractMode` setting (`'compact' | 'full' | 'none'`, + default `'compact'`) restores the legacy verbose preface (`'full'`) or + suppresses the contract entirely (`'none'`). AGENTS.md / CLAUDE.md keep + carrying the full protocol, so the compact pointer is enough for active + agents; one-time onboarding flows can opt into `'full'`. +- f7b490a: Cross-agent awareness: unified liveness + working-note "now lines". `readHivemind` accepts an optional `sqliteLiveness` source — sessions with stale heartbeat files but fresh SQLite observations are reclassified as working (`liveness_source: 'sqlite'`); every hivemind session now carries `liveness_source`. `attention_inbox` gains `active_working_notes` (latest task_note_working note per other live session, 30-min window) plus `summary.active_working_note_count`. The SessionStart task preface shows up to 3 co-participant "now:" lines so agents start each session knowing what everyone else is mid-flight on. + +### Patch Changes + +- 829556b: Surface silent `catch {}` failures to stderr (rule #9). + + Every empty catch in `session-start`, `scanner`, and the foraging MCP tool now either logs a `[colony] : ` line to stderr or carries a one-line comment explaining why silence is intentional (fs-stat races, missing-directory guards, best-effort cleanup). Previously a whole session's 43/43 MCP call failures could vanish with no trace. + +- 3898ff3: Stop scanning the full task table on every PreToolUse tool call + + `protectedLiveClaimConflict` in the PreToolUse hook used `listTasks(1_000_000)` to find conflicting protected-branch claims and then linearly filtered the result by `repo_root` and `isProtectedBranch(branch)`. With the task table growing into the thousands across all agents, that scan dominated p95 latency on every editor tool call and violated the <150ms hook-handler budget. + + `@colony/storage` now exposes `listProtectedBranchTasksByRepo(repoRoot)`, a single index-backed query against the existing `UNIQUE(repo_root, branch)` constraint. The PreToolUse hook calls this in place of the unbounded scan; defensive `resolve()` and `isProtectedBranch()` checks remain inside the loop so storage path inconsistencies still get filtered out. No new migration is needed — the unique index already covers the new query shape. + +- Updated dependencies [819660d] +- Updated dependencies [cdf22de] +- Updated dependencies [4a68470] +- Updated dependencies [782ddb6] +- Updated dependencies [b6e2ad4] +- Updated dependencies [86a3d1a] +- Updated dependencies [9e1a791] +- Updated dependencies [dafe17b] +- Updated dependencies [a83eeea] +- Updated dependencies [7dcece2] +- Updated dependencies [0950b42] +- Updated dependencies [0950b42] +- Updated dependencies [7aba1eb] +- Updated dependencies [950a95d] +- Updated dependencies [3b86d74] +- Updated dependencies [7770b58] +- Updated dependencies [e6c5766] +- Updated dependencies [60c3123] +- Updated dependencies [2e8fba1] +- Updated dependencies [ccd51b6] +- Updated dependencies [66fa52c] +- Updated dependencies [a87921e] +- Updated dependencies [8a15958] +- Updated dependencies [f7b490a] +- Updated dependencies [9a36e5e] + - @colony/core@0.8.0 + - @colony/config@0.8.0 + ## 0.7.0 ### Minor Changes diff --git a/packages/hooks/package.json b/packages/hooks/package.json index 20b67a7a..3ad24857 100644 --- a/packages/hooks/package.json +++ b/packages/hooks/package.json @@ -1,6 +1,6 @@ { "name": "@colony/hooks", - "version": "0.7.0", + "version": "0.8.0", "private": true, "license": "MIT", "type": "module", @@ -12,7 +12,9 @@ "import": "./dist/index.js" } }, - "files": ["dist"], + "files": [ + "dist" + ], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", diff --git a/packages/installers/CHANGELOG.md b/packages/installers/CHANGELOG.md index c351631f..df79fce6 100644 --- a/packages/installers/CHANGELOG.md +++ b/packages/installers/CHANGELOG.md @@ -1,5 +1,28 @@ # @colony/installers +## 0.8.0 + +### Minor Changes + +- 86a3d1a: `colony install --ide claude-code --verify` now works. The claude-code installer + gained a `verify()` implementation (it previously threw "does not support + --verify" despite the flag being advertised): it validates ~/.claude/settings.json + for the colony MCP server, all six hooks, and any detected OMX-layer MCP servers, + reporting missing/stale hooks per the same contract codex uses. The generic + validation helpers (sameArgs, missingDetectedOmxServers, validationIssue) moved + to a shared `validation.ts` module so both installers reuse them. + +### Patch Changes + +- Updated dependencies [b6e2ad4] +- Updated dependencies [86a3d1a] +- Updated dependencies [7aba1eb] +- Updated dependencies [3b86d74] +- Updated dependencies [7770b58] +- Updated dependencies [60c3123] +- Updated dependencies [8a15958] + - @colony/config@0.8.0 + ## 0.7.0 ### Patch Changes diff --git a/packages/installers/package.json b/packages/installers/package.json index 7d749664..29e44090 100644 --- a/packages/installers/package.json +++ b/packages/installers/package.json @@ -1,6 +1,6 @@ { "name": "@colony/installers", - "version": "0.7.0", + "version": "0.8.0", "private": true, "license": "MIT", "type": "module", @@ -12,7 +12,9 @@ "import": "./dist/index.js" } }, - "files": ["dist"], + "files": [ + "dist" + ], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", diff --git a/packages/queen/CHANGELOG.md b/packages/queen/CHANGELOG.md index 6bdc7c10..69ffbd04 100644 --- a/packages/queen/CHANGELOG.md +++ b/packages/queen/CHANGELOG.md @@ -1,5 +1,41 @@ # @colony/queen +## 0.8.0 + +### Patch Changes + +- Updated dependencies [8f33724] +- Updated dependencies [819660d] +- Updated dependencies [cdf22de] +- Updated dependencies [4a68470] +- Updated dependencies [782ddb6] +- Updated dependencies [edc318f] +- Updated dependencies [9e1a791] +- Updated dependencies [8917c73] +- Updated dependencies [dafe17b] +- Updated dependencies [a83eeea] +- Updated dependencies [e52cd83] +- Updated dependencies [53836ff] +- Updated dependencies [7dcece2] +- Updated dependencies [0950b42] +- Updated dependencies [0950b42] +- Updated dependencies [950a95d] +- Updated dependencies [7770b58] +- Updated dependencies [e6c5766] +- Updated dependencies [60c3123] +- Updated dependencies [71ee50d] +- Updated dependencies [2e8fba1] +- Updated dependencies [3898ff3] +- Updated dependencies [ccd51b6] +- Updated dependencies [66fa52c] +- Updated dependencies [a87921e] +- Updated dependencies [f7b490a] +- Updated dependencies [2cc5ff8] +- Updated dependencies [9a36e5e] + - @colony/storage@0.8.0 + - @colony/core@0.8.0 + - @colony/spec@0.8.0 + ## 0.7.0 ### Patch Changes diff --git a/packages/queen/package.json b/packages/queen/package.json index 4bedc589..f53361e1 100644 --- a/packages/queen/package.json +++ b/packages/queen/package.json @@ -1,6 +1,6 @@ { "name": "@colony/queen", - "version": "0.7.0", + "version": "0.8.0", "private": true, "license": "MIT", "type": "module", @@ -12,7 +12,9 @@ "import": "./dist/index.js" } }, - "files": ["dist"], + "files": [ + "dist" + ], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", diff --git a/packages/spec/CHANGELOG.md b/packages/spec/CHANGELOG.md index 695b572f..0abc4ac5 100644 --- a/packages/spec/CHANGELOG.md +++ b/packages/spec/CHANGELOG.md @@ -1,5 +1,40 @@ # @colony/spec +## 0.8.0 + +### Patch Changes + +- 2cc5ff8: Windows CI green: `publishPlan` returns forward-slash `spec_change_path` / `plan_workspace_path` (separator-stable for MCP callers on every OS), and `archiveChange` fails deterministically when the archive target already exists (POSIX rename threw; Windows MoveFileEx silently clobbered). +- Updated dependencies [8f33724] +- Updated dependencies [819660d] +- Updated dependencies [cdf22de] +- Updated dependencies [4a68470] +- Updated dependencies [782ddb6] +- Updated dependencies [edc318f] +- Updated dependencies [9e1a791] +- Updated dependencies [8917c73] +- Updated dependencies [dafe17b] +- Updated dependencies [a83eeea] +- Updated dependencies [e52cd83] +- Updated dependencies [53836ff] +- Updated dependencies [7dcece2] +- Updated dependencies [0950b42] +- Updated dependencies [0950b42] +- Updated dependencies [950a95d] +- Updated dependencies [7770b58] +- Updated dependencies [e6c5766] +- Updated dependencies [60c3123] +- Updated dependencies [71ee50d] +- Updated dependencies [2e8fba1] +- Updated dependencies [3898ff3] +- Updated dependencies [ccd51b6] +- Updated dependencies [66fa52c] +- Updated dependencies [a87921e] +- Updated dependencies [f7b490a] +- Updated dependencies [9a36e5e] + - @colony/storage@0.8.0 + - @colony/core@0.8.0 + ## 0.7.0 ### Patch Changes diff --git a/packages/spec/package.json b/packages/spec/package.json index 48b38931..e16aa7e8 100644 --- a/packages/spec/package.json +++ b/packages/spec/package.json @@ -1,6 +1,6 @@ { "name": "@colony/spec", - "version": "0.7.0", + "version": "0.8.0", "license": "MIT", "private": true, "type": "module", diff --git a/packages/storage/CHANGELOG.md b/packages/storage/CHANGELOG.md index de5ee363..13b51c6a 100644 --- a/packages/storage/CHANGELOG.md +++ b/packages/storage/CHANGELOG.md @@ -1,5 +1,240 @@ # @colony/storage +## 0.8.0 + +### Minor Changes + +- 8f33724: Add `account_claims` table and three new MCP tools for binding Codex accounts to planner waves. + + `task_claim_account`, `task_release_account_claim`, and `task_list_account_claims` let the recodee planner Account Capacity rail bind a Codex account to a planner wave so multiple operators on the same plan see the same dispatch state. Bindings are keyed by `(plan_slug, wave_id)` — a planner-logical coordinate that exists before any Colony task is spawned — and persist across operators via a new `account_claims` SQLite table. A partial unique index enforces at-most-one-active claim per wave; released claims stay as audit history. + + Schema migrates forward-only from version 10 to 11. No data backfill is required: the table starts empty and is populated by user action. The contract is regression-tested via an MCP-inspector test (`apps/mcp-server/test/account-claims.test.ts`) exercising the full claim → rebind → release → list lifecycle. + +- edc318f: `colony gain --summary` now renders an rtk-style compact view over the same + `mcp_metrics` receipts: headline KPI stack (total calls, input/output/total + tokens, tokens saved, total exec time), efficiency meter, top-N **By + Operation** table with proportional impact bars, a 30-day **Daily Activity** + bar graph, and a 12-day **Daily Breakdown** table. `--graph` and `--daily` + narrow the output to a single section; `--days ` and `--top-ops ` tune + the window and table size. Per-operation saved-token credit is distributed + across each comparison row's `matched_operations` proportionally to call share + so the `Saved` column lines up with the headline total. + + Storage gains `Storage.aggregateMcpMetricsDaily({ since, until, operation })` + returning per-UTC-day rollups (`{ day, calls, input_tokens, output_tokens, +total_tokens, total_duration_ms }`) ordered newest-first. Type exports + `AggregateMcpMetricsDailyOptions` and `McpMetricsDailyRow` come along. + +- 9e1a791: Explain `task_claim_file` rejections instead of returning a generic "not claimable" + + `task_claim_file` (and the `TaskThread.claimFile` / + `normalizeOptionalClaimPath` paths inside `@colony/core`) used to throw + `INVALID_CLAIM_PATH: claim path is not claimable` with no hint at the + reason. Telemetry showed agents bouncing off the same surface for the same + input — e.g. `colony/packages/core/test` (a directory) — because the + message gave them nothing to act on. + + The rejection branch now classifies the failure and renders a specific + message per reason: + + - `directory` — _"claim path "X" is a directory; claim individual files inside it instead."_ + - `pseudo` — _"claim path "X" is a pseudo path (e.g. /dev/null) and cannot be claimed."_ + - `outside_repo` — _"claim path "X" resolves outside this task's repo_root and cannot be claimed."_ + - `empty` — _"claim path is empty."_ + - fallback — the legacy generic message, still keyed on the input path. + + New exports from `@colony/storage`: + + - `classifyClaimPathRejection(context)` — pure classifier paralleling + `normalizeRepoFilePath`. Returns the reason or `null`. + - `claimPathRejectionMessage(reason, file_path)` — single source of + truth for the user-facing message so the MCP `task_claim_file` + handler and `TaskThread.claimFile` stay in sync. + - New storage method `classifyTaskFilePathRejection(task_id, file_path, +cwd?)` plumbs the task → repo_root lookup that the existing + `normalizeTaskFilePath` already does, so callers only pay for the + classifier on the error branch. + + No behavior change: the same inputs that used to be rejected are still + rejected; only the error message and code surface improve. Existing + INVALID_CLAIM_PATH error code is preserved. + +- a83eeea: `colony gain drift` and a matching `savings_drift_report` MCP tool flag + tools whose median tokens-per-call has drifted up or down. Default windows + are non-overlapping: recent = last 3 days, baseline = 14 days ending 3 days + before recent. Default thresholds: `--threshold 1.25` (up), `--down-threshold +0.75`, `--min-calls 20` per window. Classifications: `up_drift`, + `down_drift`, `new_tool` (no baseline), `gone` (no recent), `insufficient_data`, + `stable`. + + Storage gains `Storage.mcpTokenDriftPerOperation()` which computes per-operation + medians with a `ROW_NUMBER() OVER (PARTITION BY operation ORDER BY tpc)` + window function — chosen over the correlated `LIMIT 1 OFFSET (COUNT-1)/2` + form because SQLite forbids outer aggregate references in scalar-subquery + `OFFSET`. A `mcpMetricsMinTs()` helper surfaces a one-line warning when the + baseline window starts before the first recorded metric. + +- 53836ff: `colony health --coach` walks a repo through first-week setup. It detects + adoption stage (`fresh` / `installed_no_signal` / `early` / `mid_adoption`) + from cheap signals (`countObservations`, installed-IDE flags, + `firstObservationTs`, `Math.max(toolCallsSince, countMcpMetricsSince)`), + then surfaces the NEXT incomplete step from a fixed 7-step ladder: + `install_runtime` → `first_task_post` → `first_task_claim_file` → + `first_task_hand_off` → `first_plan_claim` → `first_quota_release` → + `first_gain_review`. Each step carries an exact `cmd:` and `tool:` string. + + Progress is persisted in a new `coach_progress` SQLite table (migration + `014-coach-progress.ts`, schema_version 13 → 14). Step completion is + event-observed via `mcp_metrics` / `observations`, never user-clicked. + `colony gain` records a `coach_gain_review` observation so step 7 can + self-detect. `--coach` is mutually exclusive with `--fix-plan` and respects + `--json`. + +- 7dcece2: ICM slice 2 — feedback `record`, `search`, and `stats` MCP tools. + + Adds a new `feedback` lane that records "AI predicted X, real answer + was Y" corrections so a future agent can search prior mistakes by + topic before repeating them. Migration 015 introduces the `feedback` + table plus a porter-unicode61 `feedback_fts` virtual table mirrored + by the standard `ai/ad/au` triggers; importance is a four-level enum + defaulting to `medium`. `prediction`, `correction`, and the optional + `context` flow through `MemoryStore.recordFeedback`, which routes each + body through `prepareMemoryText` — the same redact-then-compress path + observations use — so the compression invariant holds at the write + boundary. + + MCP surface (progressive disclosure): + + - `feedback_record({ topic, prediction, correction, context?, importance?, created_by? })` → `{ id }` + - `feedback_search({ query, topic?, limit? })` → compact hits (`id`, `topic`, `importance`, `score`, `snippet`, `created_at`) + - `feedback_stats({ topic? })` → per-topic counts and `last_created_at` + + Follow-up (separate PR): a pre-tool-use hook that surfaces prior + corrections on inbound prompts. This PR keeps the slice scoped to the + storage + search surface so it can ship behind a manual query first. + + Reference: `docs/icm-integration-plan.md` slice 2. + +- 0950b42: ICM slice 3 — observation importance + temporal decay. + + Every observation now carries an `importance` tier + (`critical | high | medium | low`, default `medium`), a rolling + `access_count`, a `last_accessed_at` timestamp, and a `weight` value. + Critical/high pin their weight to the base value and never decay; + medium/low decay as `baseWeight / (1 + access_count * 0.1)` whenever + they are read. Read paths (`MemoryStore.search`, `getObservations`, + `semanticSearch`) coalesce ids into a debounced 50ms batch and flush + the access bookkeeping in one transactional UPDATE, so heavy read + loops trade at most one extra write per ~50ms window. + + Search and `get_observations` MCP responses now include `importance` + and `weight` on each row (additive — older callers ignore them). + `task_post` accepts an optional `importance` parameter forwarded to + the underlying observation insert. + + New CLI subcommand `colony memory prune` deletes near-zero-weight + medium/low rows; `--min-weight ` overrides the default 0.1 + threshold and `--dry-run` reports the candidate count without + deleting. Critical/high are never affected. + + Storage: schema bumped to version 17 with four additive columns on + `observations` and two new indexes. `Storage.recordAccess`, + `Storage.pruneLowDecay`, and `Storage.countLowDecayCandidates` are + the public primitives. (Originally targeted version 15 in isolation; + landed at 17 alongside slice 1 memoirs and slice 2 feedback.) + +- 0950b42: ICM slice 1 — memoirs (typed knowledge graphs). + + A memoir is a named container of typed concepts (graph nodes) connected + by typed relations (graph edges). Concept content routes through the + same `prepareMemoryText` redact → compress pipeline used for + observations, so the compression invariant holds at the write boundary. + The nine relation types + (`part_of`, `depends_on`, `related_to`, `contradicts`, `refines`, + `alternative_to`, `caused_by`, `instance_of`, `superseded_by`) mirror + ICM's taxonomy and let agents express "what supersedes X", "what + contradicts decision Y", and similar structural reasoning without + abandoning the flat-observations primary store. + + Schema bump 14 → 15 adds three tables (`memoirs`, + `memoir_concepts`, `memoir_relations`) and one virtual table + (`memoir_concepts_fts`) with `(ai, ad, au)` triggers mirroring + `observations_fts`. Migrations are forward-only. + + `MemoryStore` exposes `createMemoir`, `listMemoirs`, `addConcept`, + `refineConcept`, `linkConcepts`, `searchConcepts`, and `inspectConcept`. + Seven MCP tools (`memoir_create`, `memoir_list`, `memoir_add_concept`, + `memoir_refine`, `memoir_link`, `memoir_search`, `memoir_inspect`) wrap + them with progressive disclosure — `memoir_search` returns compact hits + and `memoir_inspect` is the only path that returns full bodies plus a + BFS neighbourhood. + +- 71ee50d: Add `task_run_attempts` table + repository helpers (Symphony §4.1.5 / §7.2 — run-attempt lifecycle). New exports: `createRunAttempt`, `getRunAttempt`, `listRunAttemptsByTask`, `updateRunAttemptStatus`, `recordRunAttemptEvent`, `finishRunAttempt`, `RunAttemptError`, plus types `TaskRunAttemptRow`, `NewTaskRunAttempt`, `TaskRunAttemptEventUpdate`, `TaskRunAttemptFinish`, `RunAttemptStatus`, `RunAttemptTerminalStatus` and constants `RUN_ATTEMPT_ACTIVE_STATUSES` / `RUN_ATTEMPT_TERMINAL_STATUSES`. Foundation for Symphony Wave 3 MCP tools (Agents 209/210/211). + +### Patch Changes + +- 4a68470: Fix read-then-write race in claim cleanup paths + + `releaseExpiredQuotaClaims` and `bulkRescueStrandedSessions` previously read + eligible claims outside their DEFERRED transaction, allowing two concurrent + callers to both snapshot the same rows and each emit a duplicate + `claim-weakened` or `rescue-stranded` audit observation. + + The fix moves the claim read inside a `BEGIN IMMEDIATE` transaction on both + paths so the write lock is acquired before any row is inspected. The storage + `transaction()` helper gains an `{ immediate: true }` option that maps to + better-sqlite3's `.immediate()` mode. A new idempotency test confirms that + calling each cleanup path twice produces exactly one audit observation. + +- 8917c73: Fix two `colony health` scoring bugs that surfaced as "bad" readiness areas with no real defect: + + - **`colony_mcp_share.mcp_tool_calls = 0` despite live MCP traffic.** The counter only read `tool_calls` rows, missing MCP traffic when the calling agent's PostToolUse hook didn't fire for `mcp__*` tools. The counter now takes the max of that observed count and `mcp_metrics` row count (colony MCP server's own per-call receipt), with the source surfaced in `source_breakdown.colony_mcp_metrics`. New storage helper `countMcpMetricsSince(since, until?)`. + - **`claim_before_edit_ratio = null` when any edits lacked file_path metadata.** Forcing the ratio to null whenever `edit_tool_calls !== edits_with_file_path` turned a real 200/363 = 55% signal into a bare `n/a` headline. The ratio is now computed over measurable edits whenever `edits_with_file_path > 0`; the `status` field still communicates partial measurability for downstream consumers. + +- e52cd83: Fix `aggregateMcpMetrics` error_reasons grouping so per-row counts sum to + `error_count`. The grouping previously partitioned by `(operation, error_code, +error_message)`, but several handlers embed unique session IDs in their error + messages (e.g. `sub-task is claimed by codex-session-XYZ`), so each race loss + produced a distinct group. Combined with a 3-row truncation per operation, the + result was that nearly all errors were hidden — `task_plan_claim_subtask` would + report 7 errors in the Top error reasons table while the Operations table showed + 93 for the same row. Grouping now drops `error_message` from the key (SQLite + picks the row with the latest `ts` for the sample message via its bare-column- + with-MAX optimization) and the per-operation cap is bumped from 3 to 8 since + codes are low-cardinality. Sum-of-reasons now matches error_count exactly. +- 60c3123: Changed the embedding backfill loop to send one batch of texts to embedders that support `embedBatch`, default worker batches to 32 observations, and persist each batch in a single SQLite transaction. The codex-gpu provider now calls `/embed/batch`, while storage copies returned embedding buffers so vector reads do not alias SQLite row memory. +- 3898ff3: Stop scanning the full task table on every PreToolUse tool call + + `protectedLiveClaimConflict` in the PreToolUse hook used `listTasks(1_000_000)` to find conflicting protected-branch claims and then linearly filtered the result by `repo_root` and `isProtectedBranch(branch)`. With the task table growing into the thousands across all agents, that scan dominated p95 latency on every editor tool call and violated the <150ms hook-handler budget. + + `@colony/storage` now exposes `listProtectedBranchTasksByRepo(repoRoot)`, a single index-backed query against the existing `UNIQUE(repo_root, branch)` constraint. The PreToolUse hook calls this in place of the unbounded scan; defensive `resolve()` and `isProtectedBranch()` checks remain inside the loop so storage path inconsistencies still get filtered out. No new migration is needed — the unique index already covers the new query shape. + +- a87921e: `task_claim_file` now surfaces the task's `repo_root` in the + `INVALID_CLAIM_PATH` rejection message so agents see the exact anchor their + path failed to resolve against. The `outside_repo` and `unknown` branches of + `claimPathRejectionMessage(reason, file_path, { repo_root })` switch from a + terse "claim path is not claimable: …" to an actionable + "… resolves outside this task's repo_root \"\" …" / "… could not be + resolved relative to this task's repo_root \"\". Either retarget a task + whose repo_root matches the path being claimed, or pass a path that resolves + inside that anchor." So the agent can immediately tell whether to rewrite the + path or claim a different task. + + The MCP handler in `apps/mcp-server/src/tools/task.ts` and both + `TaskThread.claimFile` / `TaskThread.normalizeOptionalClaimPath` paths in + `packages/core/src/task-thread.ts` thread the task's repo_root through. + Backward compatible — the `context` arg is optional and existing callers see + the original messages. + +- Updated dependencies [b6e2ad4] +- Updated dependencies [86a3d1a] +- Updated dependencies [7aba1eb] +- Updated dependencies [3b86d74] +- Updated dependencies [7770b58] +- Updated dependencies [60c3123] +- Updated dependencies [8a15958] + - @colony/config@0.8.0 + ## 0.7.0 ### Minor Changes diff --git a/packages/storage/package.json b/packages/storage/package.json index 722453bc..6eeb02f7 100644 --- a/packages/storage/package.json +++ b/packages/storage/package.json @@ -1,6 +1,6 @@ { "name": "@colony/storage", - "version": "0.7.0", + "version": "0.8.0", "private": true, "license": "MIT", "type": "module", @@ -12,7 +12,9 @@ "import": "./dist/index.js" } }, - "files": ["dist"], + "files": [ + "dist" + ], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", From 56d123de3abd8f53b16cfdfcfa0e54c7a5c0cab1 Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Fri, 12 Jun 2026 17:05:50 +0200 Subject: [PATCH 2/2] chore(release): biome-format package.json files rewritten by changeset version --- apps/cli/package.json | 8 +------- packages/config/package.json | 4 +--- packages/core/package.json | 4 +--- packages/embedding/package.json | 4 +--- packages/foraging/package.json | 4 +--- packages/hooks/package.json | 4 +--- packages/installers/package.json | 4 +--- packages/queen/package.json | 4 +--- packages/storage/package.json | 4 +--- 9 files changed, 9 insertions(+), 31 deletions(-) diff --git a/apps/cli/package.json b/apps/cli/package.json index fa95c910..34a40439 100644 --- a/apps/cli/package.json +++ b/apps/cli/package.json @@ -32,13 +32,7 @@ "colony": "bin/colony.mjs" }, "main": "./dist/index.js", - "files": [ - "bin/colony.mjs", - "dist", - "hooks-scripts", - "README.md", - "LICENSE" - ], + "files": ["bin/colony.mjs", "dist", "hooks-scripts", "README.md", "LICENSE"], "scripts": { "build": "tsup", "dev": "tsup --watch --onSuccess \"node dist/index.js\"", diff --git a/packages/config/package.json b/packages/config/package.json index 30566ea4..ff9bc8db 100644 --- a/packages/config/package.json +++ b/packages/config/package.json @@ -12,9 +12,7 @@ "import": "./dist/index.js" } }, - "files": [ - "dist" - ], + "files": ["dist"], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", diff --git a/packages/core/package.json b/packages/core/package.json index 4fe34cbc..c57a4d31 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -12,9 +12,7 @@ "import": "./dist/index.js" } }, - "files": [ - "dist" - ], + "files": ["dist"], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", diff --git a/packages/embedding/package.json b/packages/embedding/package.json index ddbc6caf..9aedd60d 100644 --- a/packages/embedding/package.json +++ b/packages/embedding/package.json @@ -12,9 +12,7 @@ "import": "./dist/index.js" } }, - "files": [ - "dist" - ], + "files": ["dist"], "scripts": { "build": "tsup", "dev": "tsup --watch", diff --git a/packages/foraging/package.json b/packages/foraging/package.json index 0fafa527..79b74c44 100644 --- a/packages/foraging/package.json +++ b/packages/foraging/package.json @@ -12,9 +12,7 @@ "import": "./dist/index.js" } }, - "files": [ - "dist" - ], + "files": ["dist"], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", diff --git a/packages/hooks/package.json b/packages/hooks/package.json index 3ad24857..22056bf4 100644 --- a/packages/hooks/package.json +++ b/packages/hooks/package.json @@ -12,9 +12,7 @@ "import": "./dist/index.js" } }, - "files": [ - "dist" - ], + "files": ["dist"], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", diff --git a/packages/installers/package.json b/packages/installers/package.json index 29e44090..79823341 100644 --- a/packages/installers/package.json +++ b/packages/installers/package.json @@ -12,9 +12,7 @@ "import": "./dist/index.js" } }, - "files": [ - "dist" - ], + "files": ["dist"], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", diff --git a/packages/queen/package.json b/packages/queen/package.json index f53361e1..b845fdd1 100644 --- a/packages/queen/package.json +++ b/packages/queen/package.json @@ -12,9 +12,7 @@ "import": "./dist/index.js" } }, - "files": [ - "dist" - ], + "files": ["dist"], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch", diff --git a/packages/storage/package.json b/packages/storage/package.json index 6eeb02f7..4b56f45e 100644 --- a/packages/storage/package.json +++ b/packages/storage/package.json @@ -12,9 +12,7 @@ "import": "./dist/index.js" } }, - "files": [ - "dist" - ], + "files": ["dist"], "scripts": { "build": "tsup src/index.ts --format esm --dts --clean", "dev": "tsup src/index.ts --format esm --dts --watch",