From 774cd615c71905f8bc17fdfae0bed72096b37b21 Mon Sep 17 00:00:00 2001 From: chitcommit <208086304+chitcommit@users.noreply.github.com> Date: Wed, 3 Jun 2026 22:58:14 +0000 Subject: [PATCH 1/3] feat(daemon): supervisor unit + chittyserv-vm bootstrap (stops before start) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stacked on #101. Adds the runtime supervisor surface for the meta-orchestrator daemon foundation: - daemon/runtime/entrypoint.ts — Node entrypoint wiring runLeaderLoop with SIGTERM/SIGINT graceful shutdown that releases the lease before exit. - daemon/runtime/chittycommand-daemon.service — hardened systemd unit (Restart=always, dedicated chittycommand user, EnvironmentFile from /etc/chittycommand/env, journal logging). - daemon/runtime/env.tmpl — op inject template; only op:// references, no secret values. 1Password stays the cold source of truth. - daemon/runtime/launchd/com.chittyos.chittycommand-daemon.plist — macOS artifact for the Mac Mini cluster (not used on the VM, included so next-node bring-up doesn't need a separate PR). - daemon/runtime/tsconfig.daemon.json + package.json build:daemon script — emits dist/daemon/runtime/entrypoint.js for the service ExecStart. - scripts/install-daemon-vm.sh — idempotent VM bootstrap; supports --dry-run; creates user, builds, syncs to /opt/chittycommand, renders env via op inject, installs+enables the unit, STOPS WITHOUT STARTING. - docs/runbooks/daemon-bring-up-vm.md — operator runbook (mint flow, verify, heartbeat, failure modes, rollback). - daemon/supervisor.md — links to the runbook and pins chittyserv-vm as first-node target. Operator action required: sudo systemctl start chittycommand-daemon.service (after sudo -E ./scripts/install-daemon-vm.sh). See PR body for dry-run output. Co-Authored-By: Claude Opus 4.7 (1M context) --- daemon/runtime/chittycommand-daemon.service | 39 ++++ daemon/runtime/entrypoint.ts | 131 ++++++++++++ daemon/runtime/env.tmpl | 34 ++++ .../com.chittyos.chittycommand-daemon.plist | 78 +++++++ daemon/runtime/tsconfig.daemon.json | 29 +++ daemon/supervisor.md | 2 + docs/runbooks/daemon-bring-up-vm.md | 190 ++++++++++++++++++ package.json | 1 + scripts/install-daemon-vm.sh | 154 ++++++++++++++ 9 files changed, 658 insertions(+) create mode 100644 daemon/runtime/chittycommand-daemon.service create mode 100644 daemon/runtime/entrypoint.ts create mode 100644 daemon/runtime/env.tmpl create mode 100644 daemon/runtime/launchd/com.chittyos.chittycommand-daemon.plist create mode 100644 daemon/runtime/tsconfig.daemon.json create mode 100644 docs/runbooks/daemon-bring-up-vm.md create mode 100755 scripts/install-daemon-vm.sh diff --git a/daemon/runtime/chittycommand-daemon.service b/daemon/runtime/chittycommand-daemon.service new file mode 100644 index 0000000..0f8c215 --- /dev/null +++ b/daemon/runtime/chittycommand-daemon.service @@ -0,0 +1,39 @@ +[Unit] +Description=ChittyCommand cluster daemon (meta-orchestrator leader) +Documentation=chittycanon://docs/architecture/chittycommand/ADR-001 +Documentation=chittycanon://docs/runbooks/chittycommand/daemon-bring-up-vm +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=chittycommand +Group=chittycommand +WorkingDirectory=/opt/chittycommand +EnvironmentFile=/etc/chittycommand/env +ExecStart=/usr/bin/node /opt/chittycommand/dist/daemon/runtime/entrypoint.js +Restart=always +RestartSec=5 +KillSignal=SIGTERM +TimeoutStopSec=30 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=chittycommand-daemon + +# Hardening +NoNewPrivileges=true +ProtectSystem=strict +ProtectHome=true +PrivateTmp=true +PrivateDevices=true +ProtectKernelTunables=true +ProtectKernelModules=true +ProtectControlGroups=true +RestrictNamespaces=true +RestrictRealtime=true +LockPersonality=true +MemoryDenyWriteExecute=true +ReadWritePaths=/var/log/chittycommand + +[Install] +WantedBy=multi-user.target diff --git a/daemon/runtime/entrypoint.ts b/daemon/runtime/entrypoint.ts new file mode 100644 index 0000000..feb0444 --- /dev/null +++ b/daemon/runtime/entrypoint.ts @@ -0,0 +1,131 @@ +/** + * ChittyCommand cluster daemon — process entrypoint. + * + * Reads required environment, runs `runLeaderLoop`, and traps SIGTERM/SIGINT + * so the lease is released cleanly before the process exits. + * + * This is the file launchd/systemd invokes via `node dist/daemon/entrypoint.js`. + * It is intentionally thin: all logic lives in `daemon/loop.ts` and `daemon/leader.ts`. + * + * @canonical-uri chittycanon://docs/architecture/chittycommand/daemon-supervisor + */ + +import { runLeaderLoop } from '../loop'; +import { releaseLeadership, META_LEADER_ROLE } from '../leader'; + +interface RequiredEnv { + NODE_CHITTY_ID: string; + DATABASE_URL: string; + NODE_DESCRIPTOR: string; +} + +function readEnv(): RequiredEnv { + const missing: string[] = []; + const nodeId = process.env.NODE_CHITTY_ID; + const dbUrl = process.env.DATABASE_URL; + const descriptor = process.env.NODE_DESCRIPTOR ?? process.env.HOSTNAME ?? ''; + + if (!nodeId) missing.push('NODE_CHITTY_ID'); + if (!dbUrl) missing.push('DATABASE_URL'); + if (!descriptor) missing.push('NODE_DESCRIPTOR or HOSTNAME'); + + if (missing.length > 0) { + process.stderr.write( + `[chittycommand-daemon] fatal: missing required env: ${missing.join(', ')}\n`, + ); + process.exit(2); + } + + return { + NODE_CHITTY_ID: nodeId!, + DATABASE_URL: dbUrl!, + NODE_DESCRIPTOR: descriptor, + }; +} + +function log(msg: string, meta?: Record): void { + const line = { + ts: new Date().toISOString(), + svc: 'chittycommand-daemon', + msg, + ...(meta ?? {}), + }; + process.stdout.write(`${JSON.stringify(line)}\n`); +} + +async function main(): Promise { + const env = readEnv(); + const sessionId = `${process.pid}@${Date.now()}`; + const controller = new AbortController(); + + log('daemon_start', { + nodeId: env.NODE_CHITTY_ID, + descriptor: env.NODE_DESCRIPTOR, + sessionId, + role: META_LEADER_ROLE, + }); + + let shuttingDown = false; + const shutdown = (signal: string) => { + if (shuttingDown) return; + shuttingDown = true; + log('signal_received', { signal }); + controller.abort(); + // Belt-and-suspenders release in case the loop is wedged before the + // abort path reaches releaseLeadership. + releaseLeadership({ DATABASE_URL: env.DATABASE_URL }, env.NODE_CHITTY_ID, { + role: META_LEADER_ROLE, + }) + .then((released) => log('release_on_signal', { released })) + .catch((err) => + log('release_on_signal_error', { + error: err instanceof Error ? err.message : String(err), + }), + ); + }; + + process.on('SIGTERM', () => shutdown('SIGTERM')); + process.on('SIGINT', () => shutdown('SIGINT')); + + const executor = async (intent: { id: string; intentType: string }) => { + // Foundation entrypoint: no real executor wired yet — the ActionAgent + // bridge ships in the follow-up PR per ADR-001 out-of-scope list. + // We mark the intent as dispatched to a sentinel ID so the leader loop + // makes forward progress in smoke tests without inventing fake work. + log('intent_executor_stub', { + intentId: intent.id, + intentType: intent.intentType, + note: 'executor wiring deferred to follow-up PR', + }); + return { dispatchedTaskId: `pending-executor:${intent.id}` }; + }; + + try { + const result = await runLeaderLoop( + { DATABASE_URL: env.DATABASE_URL }, + { + nodeId: env.NODE_CHITTY_ID, + nodeDescriptor: env.NODE_DESCRIPTOR, + sessionId, + signal: controller.signal, + log, + executor, + }, + ); + log('daemon_loop_returned', { ...result }); + } catch (err) { + log('daemon_fatal', { + error: err instanceof Error ? err.message : String(err), + }); + process.exitCode = 1; + } finally { + log('daemon_exit', { exitCode: process.exitCode ?? 0 }); + } +} + +main().catch((err) => { + process.stderr.write( + `[chittycommand-daemon] unhandled: ${err instanceof Error ? err.stack ?? err.message : String(err)}\n`, + ); + process.exit(1); +}); diff --git a/daemon/runtime/env.tmpl b/daemon/runtime/env.tmpl new file mode 100644 index 0000000..4b1e9be --- /dev/null +++ b/daemon/runtime/env.tmpl @@ -0,0 +1,34 @@ +# ChittyCommand cluster daemon environment template. +# +# Rendered at install time via: +# op inject -i daemon/runtime/env.tmpl -o /etc/chittycommand/env +# +# 1Password is the cold source of truth (operator-manifest policy). This file +# carries ONLY `op://` references — never real secret values. Do not commit +# a rendered copy. +# +# Vault layout assumed: +# - Vault: "ChittyOS-Core" +# - Items: CHITTYCOMMAND_DAEMON, CHITTYCOMMAND_NODES +# +# canonical-uri: chittycanon://docs/runbooks/chittycommand/daemon-bring-up-vm + +# --- Node identity (Location-type ChittyID minted via chittyid.chitty.cc) --- +NODE_CHITTY_ID="op://ChittyOS-Core/CHITTYCOMMAND_NODES/chittyserv-vm/chitty_id" +NODE_DESCRIPTOR="chittyserv-vm" + +# --- Neon connection for cc_node_leases + meta-orchestrator state --- +DATABASE_URL="op://ChittyOS-Core/CHITTYCOMMAND_DAEMON/database_url" + +# --- Ecosystem URLs (overridable; defaults baked into the code) --- +REGISTRY_URL="https://registry.chitty.cc" +CHITTYAGENT_URL="https://agent.chitty.cc" +CHITTYTRUST_URL="https://trust.chitty.cc" +CHITTYCONNECT_URL="https://connect.chitty.cc" + +# --- ChittyConnect token for context/sensitive-intent routing --- +CHITTYCONNECT_TOKEN="op://ChittyOS-Core/CHITTYCOMMAND_DAEMON/chittyconnect_token" + +# --- Node runtime --- +NODE_ENV="production" +NODE_OPTIONS="--enable-source-maps" diff --git a/daemon/runtime/launchd/com.chittyos.chittycommand-daemon.plist b/daemon/runtime/launchd/com.chittyos.chittycommand-daemon.plist new file mode 100644 index 0000000..dfb2227 --- /dev/null +++ b/daemon/runtime/launchd/com.chittyos.chittycommand-daemon.plist @@ -0,0 +1,78 @@ + + + + + + Label + com.chittyos.chittycommand-daemon + + ProgramArguments + + /usr/local/bin/node + /opt/chittycommand/dist/daemon/runtime/entrypoint.js + + + WorkingDirectory + /opt/chittycommand + + UserName + chittycommand + + GroupName + chittycommand + + RunAtLoad + + + KeepAlive + + SuccessfulExit + + Crashed + + + + ThrottleInterval + 5 + + ExitTimeOut + 30 + + + EnvironmentVariables + + NODE_ENV + production + NODE_OPTIONS + --enable-source-maps + + + StandardOutPath + /var/log/chittycommand-daemon.out.log + + StandardErrorPath + /var/log/chittycommand-daemon.err.log + + ProcessType + Background + + diff --git a/daemon/runtime/tsconfig.daemon.json b/daemon/runtime/tsconfig.daemon.json new file mode 100644 index 0000000..1f3b68f --- /dev/null +++ b/daemon/runtime/tsconfig.daemon.json @@ -0,0 +1,29 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "module": "NodeNext", + "moduleResolution": "NodeNext", + "target": "ES2022", + "lib": ["ES2022"], + "types": ["node"], + "noEmit": false, + "outDir": "../../dist", + "rootDir": "../..", + "declaration": false, + "sourceMap": true, + "isolatedModules": false, + "paths": {} + }, + "include": [ + "../../daemon/**/*.ts", + "../../meta/intent.ts", + "../../src/db/schema.ts" + ], + "exclude": [ + "../../node_modules", + "../../ui", + "../../tests", + "../../src/agents", + "../../src/index.ts" + ] +} diff --git a/daemon/supervisor.md b/daemon/supervisor.md index 60ab6bc..fb113cc 100644 --- a/daemon/supervisor.md +++ b/daemon/supervisor.md @@ -7,6 +7,8 @@ related_adr: chittycanon://docs/architecture/chittycommand/ADR-001 # Cluster daemon — supervision plan +> **First-node target: `chittyserv-vm`.** Bring-up runbook + real systemd unit and bootstrap script live at [`docs/runbooks/daemon-bring-up-vm.md`](../docs/runbooks/daemon-bring-up-vm.md) (added in the stacked follow-on PR). The runtime artifacts are under [`daemon/runtime/`](./runtime/). + This document is doc-only. No runtime supervisor code ships in the foundation PR. The targets below are the homelab cluster of 6 Mac Minis (`chittymini-01..06`) plus `chittyserv-vm`. Each node runs **one** instance of diff --git a/docs/runbooks/daemon-bring-up-vm.md b/docs/runbooks/daemon-bring-up-vm.md new file mode 100644 index 0000000..9e97414 --- /dev/null +++ b/docs/runbooks/daemon-bring-up-vm.md @@ -0,0 +1,190 @@ +--- +canonical_uri: chittycanon://docs/runbooks/chittycommand/daemon-bring-up-vm +title: ChittyCommand cluster daemon — first-node bring-up (chittyserv-vm) +status: draft +related_adr: chittycanon://docs/architecture/chittycommand/ADR-001 +related_supervisor: chittycanon://docs/architecture/chittycommand/daemon-supervisor +target_node: chittyserv-vm (Oracle Cloud, Tailscale 100.96.187.36, Ubuntu) +--- + +# Daemon bring-up — chittyserv-vm + +This runbook brings the meta-orchestrator daemon up on the **first** cluster +node, `chittyserv-vm`. Subsequent nodes (`chittymini-02..06` on Ubuntu, +`chittymini-01` on macOS via launchd) follow the same shape; only the +templated env values change. + +The install script stops short of `systemctl start` on purpose. The operator +runs the final start command. + +## Pre-requisites + +1. **1Password Connect** reachable on the VM (`OP_CONNECT_URL` set; default is + `http://100.96.187.36:8080` per `chittyserv/docs/network.md`). +2. **`op` CLI** installed and signed in: + ```bash + command -v op && op whoami + ``` + If not signed in: `eval "$(op signin)"`. +3. **Node.js 20+** on the VM: + ```bash + node --version # expect v20.x or later + ``` +4. **Neon project provisioned** for ChittyCommand with the `cc_node_leases`, + `cc_goals`, `cc_plans`, `cc_intents` tables (migrations `0001_*`/`0002_*` + from the foundation PR applied). +5. **`postgresql-client`** (for `psql` smoke tests): + ```bash + sudo apt-get install -y postgresql-client + ``` + +## One-time: mint the node ChittyID + +`chittyserv-vm` needs a **Location-type** ChittyID stored in 1Password under +`op://ChittyOS-Core/CHITTYCOMMAND_NODES/chittyserv-vm/chitty_id`. + +**Operator must mint this — the public `chittyid.chitty.cc` landing page does +not expose an unauthenticated mint endpoint.** Two paths: + +- **Preferred:** call the chittyid worker via the registered MCP gateway + (`ch1tty -> chittyid`), entity type `L`, descriptor `chittyserv-vm`. +- **Fallback:** use the `chittyid-mint` action in ChittyCommand's existing + 43-tool MCP (`command.chitty.cc`) with the same parameters. + +Format check (must match `VV-G-LLL-SSSS-L-YM-C-X`, type segment = `L`): +```bash +echo "$CHITTY_ID" | grep -E '^[0-9A-Z]{2}-[0-9A-Z]-[0-9A-Z]{3}-[0-9A-Z]{4}-L-[0-9A-Z]{2}-[0-9A-Z]-[0-9A-Z]$' +``` + +Then store it: +```bash +op item edit "CHITTYCOMMAND_NODES" "chittyserv-vm.chitty_id=" --vault ChittyOS-Core +``` + +Also populate: +- `op://ChittyOS-Core/CHITTYCOMMAND_DAEMON/database_url` — Neon connection string for the daemon role +- `op://ChittyOS-Core/CHITTYCOMMAND_DAEMON/chittyconnect_token` — ChittyConnect bearer token + +## Install (idempotent) + +On `chittyserv-vm`, from the repo root: + +```bash +# Dry-run first to inspect the plan (no sudo, no changes): +./scripts/install-daemon-vm.sh --dry-run + +# Real install: +sudo -E ./scripts/install-daemon-vm.sh +``` + +The script: +1. Creates the `chittycommand` system user. +2. Builds `dist/daemon/runtime/entrypoint.js` via `npm run build:daemon`. +3. Syncs artifacts to `/opt/chittycommand/`. +4. Runs `op inject -i daemon/runtime/env.tmpl -o /etc/chittycommand/env` + (1Password renders every `op://` reference — no secret ever touches shell + history or the repo). +5. Installs `/etc/systemd/system/chittycommand-daemon.service`. +6. `systemctl daemon-reload && systemctl enable chittycommand-daemon.service`. +7. **Stops without starting.** + +## Operator final command (the one thing the script will not do) + +```bash +sudo systemctl start chittycommand-daemon.service +``` + +## Verify + +### Leadership claim landed in Neon + +```bash +psql "$(sudo cat /etc/chittycommand/env | grep ^DATABASE_URL= | cut -d= -f2- | tr -d '"')" -c \ + "SELECT role, node_id, node_descriptor, session_id, claimed_at, heartbeat_at, lease_expires_at + FROM cc_node_leases + WHERE role='meta-orchestrator-leader';" +``` + +Expected: one row with `node_id` = the ChittyID you minted, `node_descriptor` = +`chittyserv-vm`, `lease_expires_at` ~30s in the future. + +### Heartbeat advancing + +Re-run the same query after ~15s. `heartbeat_at` and `lease_expires_at` should +both have advanced (no rebound, no new `claimed_at`). + +### Process health + +```bash +systemctl status chittycommand-daemon.service --no-pager +journalctl -u chittycommand-daemon.service -n 100 --no-pager +journalctl -u chittycommand-daemon.service -f # live tail +``` + +Look for these structured lines: +- `daemon_start` — process came up, read env +- `leader_acquired` — lease claimed +- `heartbeat_ok` — recurring every ~10s +- (Optionally) `intent_claimed` / `intent_completed` — only fires if real + intents exist in `cc_intents`. None expected on a clean foundation install. + +## Failure modes + +### Neon unreachable + +Per ADR-001: the node **parks**. You will see repeating `claimLeadership_error` +followed by `not_leader_parking` lines, no LAN gossip, no local election. This +is correct foundation behavior. Restore Neon reachability and the daemon +self-recovers on the next park interval (5s default). + +### Lease lost (another node claimed) + +`lease_lost_parking` is normal in a multi-node cluster — only one node holds +the leader role at a time. With just `chittyserv-vm` running, you should never +see this. If you do: another process is reusing this node's ChittyID — fix +that first. + +### `op inject` fails at install + +``` +[install] fatal: 'op' is not signed in. Run: eval $(op signin) +``` +Sign in to 1Password and re-run the install script. It is idempotent. + +## Stop / uninstall + +```bash +# Graceful stop (daemon releases lease via SIGTERM handler) +sudo systemctl stop chittycommand-daemon.service + +# Disable autostart +sudo systemctl disable chittycommand-daemon.service + +# Full uninstall +sudo systemctl disable --now chittycommand-daemon.service +sudo rm -f /etc/systemd/system/chittycommand-daemon.service +sudo rm -rf /etc/chittycommand /opt/chittycommand /var/log/chittycommand +sudo systemctl daemon-reload +sudo userdel chittycommand 2>/dev/null || true +``` + +Confirm the lease is released: +```bash +psql "$DATABASE_URL" -c \ + "SELECT role, node_id FROM cc_node_leases WHERE role='meta-orchestrator-leader';" +``` +Expected: `node_id` is NULL (graceful release) within seconds of `systemctl stop`. + +## Rollback + +If the bring-up misbehaves and you need to abort cleanly: + +1. `sudo systemctl stop chittycommand-daemon.service` (daemon releases lease) +2. `sudo systemctl disable chittycommand-daemon.service` +3. Confirm `node_id` is NULL in `cc_node_leases` (above) +4. Leave the installed artifacts in place if you intend to re-run install + after a fix — re-running `install-daemon-vm.sh` is safe. +5. If you need to scrub completely, use the full uninstall block above. + +The foundation PR's `src/` (dashboard + ActionAgent + 43-tool MCP) is +untouched by this install — rollback affects only the daemon process. diff --git a/package.json b/package.json index 61cc381..c6b1796 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ "backend:build": "npm run backend:typecheck", "deploy": "wrangler deploy", "predeploy": "npm run typecheck", + "build:daemon": "tsc -p daemon/runtime/tsconfig.daemon.json", "db:generate": "drizzle-kit generate", "db:migrate": "drizzle-kit migrate", "kv:seed": "bash scripts/seed-kv.sh ${KV_NAMESPACE_ID:-}", diff --git a/scripts/install-daemon-vm.sh b/scripts/install-daemon-vm.sh new file mode 100755 index 0000000..161eaec --- /dev/null +++ b/scripts/install-daemon-vm.sh @@ -0,0 +1,154 @@ +#!/usr/bin/env bash +# +# install-daemon-vm.sh — idempotent bootstrap for the ChittyCommand cluster +# daemon on chittyserv-vm (Oracle Cloud Ubuntu, Tailscale 100.96.187.36). +# +# Performs: +# 1. Create system user `chittycommand` +# 2. Build daemon (`npm run build:daemon` -> ./dist/daemon/runtime/entrypoint.js) +# 3. Sync built artifacts + runtime deps into /opt/chittycommand +# 4. Render /etc/chittycommand/env from 1Password via `op inject` +# 5. Install systemd unit; daemon-reload; enable (NOT start) +# +# Hard-stops before `systemctl start`. The operator runs the final command. +# +# Usage: +# sudo ./scripts/install-daemon-vm.sh # real install +# ./scripts/install-daemon-vm.sh --dry-run # print plan, no changes (no sudo needed) +# +# canonical-uri: chittycanon://docs/runbooks/chittycommand/daemon-bring-up-vm + +set -euo pipefail + +DRY_RUN=0 +if [[ "${1:-}" == "--dry-run" ]]; then + DRY_RUN=1 +fi + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +INSTALL_DIR="/opt/chittycommand" +ENV_DIR="/etc/chittycommand" +ENV_FILE="${ENV_DIR}/env" +ENV_TMPL="${REPO_ROOT}/daemon/runtime/env.tmpl" +UNIT_SRC="${REPO_ROOT}/daemon/runtime/chittycommand-daemon.service" +UNIT_DST="/etc/systemd/system/chittycommand-daemon.service" +LOG_DIR="/var/log/chittycommand" +SERVICE_USER="chittycommand" +NODE_BIN="$(command -v node || echo /usr/bin/node)" + +log() { printf '[install] %s\n' "$*"; } +plan() { printf '[plan] %s\n' "$*"; } + +run() { + if (( DRY_RUN )); then + plan "$*" + else + eval "$@" + fi +} + +require_or_warn() { + local bin="$1" + if ! command -v "$bin" >/dev/null 2>&1; then + if (( DRY_RUN )); then + plan "MISSING (would fail in real run): $bin" + else + echo "[install] fatal: required binary not found: $bin" >&2 + exit 3 + fi + fi +} + +log "ChittyCommand daemon bootstrap — $(date -u +%FT%TZ)" +log "Mode: $([[ $DRY_RUN -eq 1 ]] && echo DRY-RUN || echo REAL)" +log "Repo: ${REPO_ROOT}" +log "Target: ${INSTALL_DIR}" + +# 0. Sanity: required commands +for bin in node npm op systemctl useradd install; do + require_or_warn "$bin" +done + +# 0a. Sanity: target OS +if [[ ! -d /run/systemd/system ]] && (( DRY_RUN == 0 )); then + echo "[install] fatal: no /run/systemd/system — this host is not systemd-managed." >&2 + exit 4 +fi + +# 1. System user +if id "${SERVICE_USER}" >/dev/null 2>&1; then + log "user ${SERVICE_USER} already exists" +else + run "useradd --system --home-dir ${INSTALL_DIR} --shell /usr/sbin/nologin ${SERVICE_USER}" +fi + +# 2. Build +log "building daemon (npm run build:daemon)" +run "cd ${REPO_ROOT} && npm ci --omit=dev --no-audit --no-fund || npm install --no-audit --no-fund" +run "cd ${REPO_ROOT} && npm run build:daemon" + +# 3. Install dir + artifact sync +run "install -d -m 0755 -o ${SERVICE_USER} -g ${SERVICE_USER} ${INSTALL_DIR}" +run "install -d -m 0755 -o ${SERVICE_USER} -g ${SERVICE_USER} ${INSTALL_DIR}/dist" +run "cp -R ${REPO_ROOT}/dist/. ${INSTALL_DIR}/dist/" +run "cp ${REPO_ROOT}/package.json ${INSTALL_DIR}/package.json" +# Runtime deps only (no devDeps); production install into install dir. +run "cd ${INSTALL_DIR} && npm install --omit=dev --no-audit --no-fund" +run "chown -R ${SERVICE_USER}:${SERVICE_USER} ${INSTALL_DIR}" + +# 4. Logs dir +run "install -d -m 0755 -o ${SERVICE_USER} -g ${SERVICE_USER} ${LOG_DIR}" + +# 5. Environment via op inject +run "install -d -m 0750 -o root -g ${SERVICE_USER} ${ENV_DIR}" +if (( DRY_RUN )); then + plan "op inject -i ${ENV_TMPL} -o ${ENV_FILE} # 1Password renders op:// refs" + plan "chmod 0640 ${ENV_FILE}; chown root:${SERVICE_USER} ${ENV_FILE}" +else + if [[ ! -f "${ENV_TMPL}" ]]; then + echo "[install] fatal: env template missing at ${ENV_TMPL}" >&2 + exit 5 + fi + if ! op whoami >/dev/null 2>&1; then + echo "[install] fatal: 'op' is not signed in. Run: eval \$(op signin)" >&2 + exit 6 + fi + op inject -i "${ENV_TMPL}" -o "${ENV_FILE}" + chmod 0640 "${ENV_FILE}" + chown "root:${SERVICE_USER}" "${ENV_FILE}" +fi + +# 6. systemd unit +run "install -m 0644 -o root -g root ${UNIT_SRC} ${UNIT_DST}" +run "systemctl daemon-reload" +run "systemctl enable chittycommand-daemon.service" + +# 7. Stop here. Operator runs `systemctl start`. +cat < Date: Thu, 4 Jun 2026 06:16:01 +0000 Subject: [PATCH 2/3] fix(daemon,docs,upload): resolve P1+6 P2 Codex findings on PR #105 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1 entrypoint stub executor → refusal (codex 3352439151): The stub returned a sentinel dispatchedTaskId, causing the leader loop to call markIntentDispatched + completeIntent and record real pending intents as 'done' without execution. Replace with an explicit throw so the loop's failure path runs (intent → failed with clear refusal message). The real executor lands in PR #107. P2 entrypoint SIGTERM release sessionId (codex 3353069328): releaseLeadership gates on session ownership (codex-p2 PR#101); the fallback shutdown release passed no sessionId, so it always no-op'd against our own session-stamped lease. Pass the loop's sessionId. P2 install-daemon-vm.sh devDeps prune (codex 3352439158): npm ci --omit=dev pruned typescript, breaking 'npm run build:daemon'. Install full deps for the repo build step; runtime image in still gets --omit=dev separately. P2 systemd unit NODE_BIN (codex 3352439162): Hard-coded /usr/bin/node breaks nvm / /usr/local/bin installs. Ship unit with @@NODE_BIN@@ placeholder; install script substitutes the detected node path before installing. P2 systemd MDWE+JIT (codex 3352439169): MemoryDenyWriteExecute=true is documented incompatible with V8 JIT and would abort node at startup. Remove the flag; document why. P2 launchd plist missing env (codex 3352439167): launchd has no EnvironmentFile equivalent. Plist invoked node directly without DATABASE_URL/NODE_CHITTY_ID/NODE_DESCRIPTOR, hitting entrypoint.ts's fatal-missing-env branch on Mac Mini nodes. Add a launchd-shim.sh that sources /etc/chittycommand/env (same shape as systemd EnvironmentFile) then execs node. P2 migration 0016 single-upload conflict (codex 3352439160): Unique partial index on r2_key broke single /upload's plain INSERT...RETURNING — re-uploads now 500'd on unique violation. Add ON CONFLICT (r2_key) WHERE (r2_key IS NOT NULL) DO NOTHING to match batch path; fall back to SELECT-existing returning 200. Co-Authored-By: Claude Opus 4.7 (1M context) --- daemon/runtime/chittycommand-daemon.service | 13 +++++- daemon/runtime/entrypoint.ts | 26 ++++++++--- daemon/runtime/launchd-shim.sh | 46 +++++++++++++++++++ .../com.chittyos.chittycommand-daemon.plist | 10 +++- scripts/install-daemon-vm.sh | 20 ++++++-- src/routes/documents.ts | 16 ++++++- 6 files changed, 115 insertions(+), 16 deletions(-) create mode 100755 daemon/runtime/launchd-shim.sh diff --git a/daemon/runtime/chittycommand-daemon.service b/daemon/runtime/chittycommand-daemon.service index 0f8c215..2ff1b71 100644 --- a/daemon/runtime/chittycommand-daemon.service +++ b/daemon/runtime/chittycommand-daemon.service @@ -11,7 +11,13 @@ User=chittycommand Group=chittycommand WorkingDirectory=/opt/chittycommand EnvironmentFile=/etc/chittycommand/env -ExecStart=/usr/bin/node /opt/chittycommand/dist/daemon/runtime/entrypoint.js +# NOTE: install-daemon-vm.sh substitutes @@NODE_BIN@@ with the detected +# node path (command -v node) before installing this unit, so deployments +# using nvm or /usr/local/bin/node still start. If editing this file by +# hand, replace @@NODE_BIN@@ with the absolute path to node. +# Codex P2 PR#105: previously hard-coded /usr/bin/node failed when node was +# installed elsewhere (nvm, /usr/local/bin). +ExecStart=@@NODE_BIN@@ /opt/chittycommand/dist/daemon/runtime/entrypoint.js Restart=always RestartSec=5 KillSignal=SIGTERM @@ -32,7 +38,10 @@ ProtectControlGroups=true RestrictNamespaces=true RestrictRealtime=true LockPersonality=true -MemoryDenyWriteExecute=true +# MemoryDenyWriteExecute is intentionally OMITTED. +# Codex P2 PR#105: systemd documents MDWE as incompatible with JIT engines +# (V8 generates executable code pages at runtime). Enabling it would abort +# Node at startup. Source maps in NODE_OPTIONS do not affect this. ReadWritePaths=/var/log/chittycommand [Install] diff --git a/daemon/runtime/entrypoint.ts b/daemon/runtime/entrypoint.ts index feb0444..fda3da7 100644 --- a/daemon/runtime/entrypoint.ts +++ b/daemon/runtime/entrypoint.ts @@ -72,9 +72,12 @@ async function main(): Promise { log('signal_received', { signal }); controller.abort(); // Belt-and-suspenders release in case the loop is wedged before the - // abort path reaches releaseLeadership. + // abort path reaches releaseLeadership. Pass sessionId — releaseLeadership + // gates on session ownership (codex-p2 PR#101 finding-2), so omitting it + // would no-op against a lease claimed with our sessionId. releaseLeadership({ DATABASE_URL: env.DATABASE_URL }, env.NODE_CHITTY_ID, { role: META_LEADER_ROLE, + sessionId, }) .then((released) => log('release_on_signal', { released })) .catch((err) => @@ -89,15 +92,24 @@ async function main(): Promise { const executor = async (intent: { id: string; intentType: string }) => { // Foundation entrypoint: no real executor wired yet — the ActionAgent - // bridge ships in the follow-up PR per ADR-001 out-of-scope list. - // We mark the intent as dispatched to a sentinel ID so the leader loop - // makes forward progress in smoke tests without inventing fake work. - log('intent_executor_stub', { + // bridge ships in PR #107 (feat/daemon-loop-executes-intents) per + // ADR-001 out-of-scope list. Until then, claimed intents must NOT be + // recorded as `done`. Throwing here routes the intent through the + // loop's failure path (failIntent), which records a clear refusal + // reason instead of inventing a successful dispatch. + // + // Codex P1 PR#105: previously returned a sentinel dispatchedTaskId, + // causing markIntentDispatched + completeIntent to record real work + // as `done` without execution. Refuse instead. + log('intent_executor_unwired', { intentId: intent.id, intentType: intent.intentType, - note: 'executor wiring deferred to follow-up PR', + note: 'real executor lands in PR #107; refusing to record fake success', }); - return { dispatchedTaskId: `pending-executor:${intent.id}` }; + throw new Error( + `daemon executor not wired on PR #105 (foundation only); ` + + `intent ${intent.id} routed to failed — real executor lands in PR #107`, + ); }; try { diff --git a/daemon/runtime/launchd-shim.sh b/daemon/runtime/launchd-shim.sh new file mode 100755 index 0000000..f14017e --- /dev/null +++ b/daemon/runtime/launchd-shim.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# +# launchd-shim.sh — macOS env-loading shim for the ChittyCommand daemon. +# +# launchd has no native EnvironmentFile equivalent (unlike systemd), so this +# shim sources /etc/chittycommand/env before exec'ing node. The systemd unit +# uses EnvironmentFile=/etc/chittycommand/env directly; this shim keeps the +# macOS path consistent. +# +# Codex P2 PR#105: previously the launchd plist invoked node directly with +# only NODE_ENV/NODE_OPTIONS exported, which meant entrypoint.ts's readEnv() +# always tripped its fatal-missing-env branch on Mac Mini nodes. +# +# Install path: /opt/chittycommand/dist/daemon/runtime/launchd-shim.sh +# Mode: 0755, owned by chittycommand:chittycommand +# +# canonical-uri: chittycanon://docs/architecture/chittycommand/daemon-supervisor + +set -euo pipefail + +ENV_FILE="${CHITTYCOMMAND_ENV_FILE:-/etc/chittycommand/env}" +NODE_BIN="${CHITTYCOMMAND_NODE_BIN:-/usr/local/bin/node}" +ENTRYPOINT="/opt/chittycommand/dist/daemon/runtime/entrypoint.js" + +if [[ ! -r "${ENV_FILE}" ]]; then + echo "[chittycommand-daemon-shim] fatal: env file not readable: ${ENV_FILE}" >&2 + exit 7 +fi + +# Source env file. The file is the same KEY=VALUE format the systemd +# EnvironmentFile expects, rendered by `op inject` at install time. +set -a +# shellcheck disable=SC1090 +. "${ENV_FILE}" +set +a + +# Preserve NODE_ENV / NODE_OPTIONS if launchd set them. +export NODE_ENV="${NODE_ENV:-production}" +export NODE_OPTIONS="${NODE_OPTIONS:---enable-source-maps}" + +if [[ ! -x "${NODE_BIN}" ]]; then + echo "[chittycommand-daemon-shim] fatal: node not executable at ${NODE_BIN}" >&2 + exit 8 +fi + +exec "${NODE_BIN}" "${ENTRYPOINT}" diff --git a/daemon/runtime/launchd/com.chittyos.chittycommand-daemon.plist b/daemon/runtime/launchd/com.chittyos.chittycommand-daemon.plist index dfb2227..04682be 100644 --- a/daemon/runtime/launchd/com.chittyos.chittycommand-daemon.plist +++ b/daemon/runtime/launchd/com.chittyos.chittycommand-daemon.plist @@ -19,10 +19,16 @@ Label com.chittyos.chittycommand-daemon + ProgramArguments - /usr/local/bin/node - /opt/chittycommand/dist/daemon/runtime/entrypoint.js + /opt/chittycommand/dist/daemon/runtime/launchd-shim.sh WorkingDirectory diff --git a/scripts/install-daemon-vm.sh b/scripts/install-daemon-vm.sh index 161eaec..736187b 100755 --- a/scripts/install-daemon-vm.sh +++ b/scripts/install-daemon-vm.sh @@ -83,8 +83,12 @@ else fi # 2. Build +# Build needs typescript (devDependency). We install ALL deps for the repo +# build step here, then install --omit=dev separately into ${INSTALL_DIR} +# below so the runtime image is dev-free. +# Codex P2 PR#105: previously --omit=dev pruned tsc, breaking build:daemon. log "building daemon (npm run build:daemon)" -run "cd ${REPO_ROOT} && npm ci --omit=dev --no-audit --no-fund || npm install --no-audit --no-fund" +run "cd ${REPO_ROOT} && npm ci --no-audit --no-fund || npm install --no-audit --no-fund" run "cd ${REPO_ROOT} && npm run build:daemon" # 3. Install dir + artifact sync @@ -118,8 +122,18 @@ else chown "root:${SERVICE_USER}" "${ENV_FILE}" fi -# 6. systemd unit -run "install -m 0644 -o root -g root ${UNIT_SRC} ${UNIT_DST}" +# 6. systemd unit — substitute @@NODE_BIN@@ with detected node path. +# Codex P2 PR#105: the unit ships with a placeholder so installs that use +# nvm or /usr/local/bin/node don't fail on ExecStart=/usr/bin/node. +if (( DRY_RUN )); then + plan "sed s|@@NODE_BIN@@|${NODE_BIN}| ${UNIT_SRC} > /tmp/chittycommand-daemon.service" + plan "install -m 0644 -o root -g root /tmp/chittycommand-daemon.service ${UNIT_DST}" +else + RENDERED_UNIT="$(mktemp)" + sed "s|@@NODE_BIN@@|${NODE_BIN}|g" "${UNIT_SRC}" > "${RENDERED_UNIT}" + install -m 0644 -o root -g root "${RENDERED_UNIT}" "${UNIT_DST}" + rm -f "${RENDERED_UNIT}" +fi run "systemctl daemon-reload" run "systemctl enable chittycommand-daemon.service" diff --git a/src/routes/documents.ts b/src/routes/documents.ts index e00c5c6..48da95c 100644 --- a/src/routes/documents.ts +++ b/src/routes/documents.ts @@ -104,12 +104,24 @@ documentRoutes.post('/upload', async (c) => { httpMetadata: { contentType: file.type }, customMetadata: { filename: safeName, source: 'chittycommand' }, }); - const [doc] = await sql` + // Codex P2 PR#105: migration 0016 adds a unique partial index on r2_key, + // so re-uploading an already-ingested sha256/* key would raise a unique- + // violation 500 here. Match the batch path's ON CONFLICT semantics: skip + // the insert, then SELECT the existing row to return. + const inserted = await sql` INSERT INTO cc_documents (doc_type, source, filename, r2_key, processing_status) VALUES ('upload', 'manual', ${safeName}, ${r2Key}, 'pending') + ON CONFLICT (r2_key) WHERE (r2_key IS NOT NULL) DO NOTHING RETURNING * `; - return c.json(doc, 201); + if (inserted.length > 0) { + return c.json(inserted[0], 201); + } + // Existing row already had this r2_key — return it with 200 instead of 201. + const [existing] = await sql` + SELECT * FROM cc_documents WHERE r2_key = ${r2Key} LIMIT 1 + `; + return c.json(existing, 200); }); // Batch upload via ChittyStorage From b45162237c653540e9833a7ac17b5ed169e3f9f8 Mon Sep 17 00:00:00 2001 From: chitcommit <208086304+chitcommit@users.noreply.github.com> Date: Wed, 10 Jun 2026 16:19:27 +0000 Subject: [PATCH 3/3] fix(daemon,executors): make executor registry portable for the daemon build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaying #105 onto current main (executor registry from #106) broke the daemon build two ways. Fix both so #105 ships a daemon that builds on the VM. 1. entrypoint.ts passed an `executor` callback to runLeaderLoop — removed in #106 when the loop began dispatching through the canonical executor registry (executeIntent). Drop the stub: an empty registry routes every claimed intent through dispatch's "no executor registered" throw → failIntent → `failed`, never silent `done`, preserving the PR #105 Codex-P1 safety property without a callback. 2. The executor registry (meta/executors/{types,dispatch,update-obligation- status}.ts) hard-imported the Workers `Env` from src/index, neither available nor compilable in the daemon's NodeNext + node-types build. Per ADR-001 the registry is consumed by BOTH the Worker and the daemon, so it must not depend on Workers-only types. Introduce a minimal structural `ExecutorEnv { DATABASE_URL?, HYPERDRIVE? }` (the only env the registry reads; getSql already cast to exactly this). Worker `Env` stays structurally assignable — ActionAgent callers unchanged. No index signature: keeps typo-safety on env reads for the real-money mercury path. Also add the explicit `/index.js` extension to meta/intent.ts's dynamic `import('./executors')` (required under NodeNext; accepted by the Worker's Bundler resolution). Verified: `npm run typecheck` (Worker, Bundler) and `npm run build:daemon` (NodeNext) both exit 0. Follow-up (needs `workflow` scope, separate push): add `tsc -p daemon/runtime/tsconfig.daemon.json --noEmit` as a CI step so the daemon build is gated — catches the #108 ripple (mercury also imports Workers `Env`) at PR time instead of VM install. Co-Authored-By: Claude Opus 4.8 (1M context) --- daemon/runtime/entrypoint.ts | 31 ++++++---------------- meta/executors/dispatch.ts | 12 +++------ meta/executors/types.ts | 22 +++++++++++++-- meta/executors/update-obligation-status.ts | 1 - meta/intent.ts | 2 +- 5 files changed, 33 insertions(+), 35 deletions(-) diff --git a/daemon/runtime/entrypoint.ts b/daemon/runtime/entrypoint.ts index fda3da7..ece2520 100644 --- a/daemon/runtime/entrypoint.ts +++ b/daemon/runtime/entrypoint.ts @@ -90,28 +90,14 @@ async function main(): Promise { process.on('SIGTERM', () => shutdown('SIGTERM')); process.on('SIGINT', () => shutdown('SIGINT')); - const executor = async (intent: { id: string; intentType: string }) => { - // Foundation entrypoint: no real executor wired yet — the ActionAgent - // bridge ships in PR #107 (feat/daemon-loop-executes-intents) per - // ADR-001 out-of-scope list. Until then, claimed intents must NOT be - // recorded as `done`. Throwing here routes the intent through the - // loop's failure path (failIntent), which records a clear refusal - // reason instead of inventing a successful dispatch. - // - // Codex P1 PR#105: previously returned a sentinel dispatchedTaskId, - // causing markIntentDispatched + completeIntent to record real work - // as `done` without execution. Refuse instead. - log('intent_executor_unwired', { - intentId: intent.id, - intentType: intent.intentType, - note: 'real executor lands in PR #107; refusing to record fake success', - }); - throw new Error( - `daemon executor not wired on PR #105 (foundation only); ` + - `intent ${intent.id} routed to failed — real executor lands in PR #107`, - ); - }; - + // No executor callback is passed: as of PR #106 the leader loop dispatches + // through the canonical executor registry (`meta/executors/*`) via + // `executeIntent`, not an injected callback. This foundation entrypoint + // imports no executor modules, so the registry is empty — every claimed + // intent hits dispatch's "no executor registered" path and is routed to + // `failed` (never silently `done`). That preserves the PR #105 Codex P1 + // safety property without a stub callback. Real executors self-register + // once their modules are imported (mercury_payment lands in PR #108). try { const result = await runLeaderLoop( { DATABASE_URL: env.DATABASE_URL }, @@ -121,7 +107,6 @@ async function main(): Promise { sessionId, signal: controller.signal, log, - executor, }, ); log('daemon_loop_returned', { ...result }); diff --git a/meta/executors/dispatch.ts b/meta/executors/dispatch.ts index 4d16b68..f933e56 100644 --- a/meta/executors/dispatch.ts +++ b/meta/executors/dispatch.ts @@ -20,19 +20,15 @@ */ import { neon, type NeonQueryFunction } from '@neondatabase/serverless'; -import type { Env } from '../../src/index'; import type { Intent, SovereigntyAssessmentSnapshot } from '../intent'; import { failIntent } from '../intent'; import { assessSovereignty } from '../sovereignty'; import { getExecutor } from './registry'; import { SOVEREIGNTY_FRESHNESS_MS } from './types'; -import type { ExecutorContext, ExecutorResult, ExecutorRunOutput } from './types'; +import type { ExecutorContext, ExecutorEnv, ExecutorResult, ExecutorRunOutput } from './types'; -function getSql(env: Env): NeonQueryFunction { - const conn = - (env as unknown as { DATABASE_URL?: string }).DATABASE_URL || - (env as unknown as { HYPERDRIVE?: { connectionString: string } }).HYPERDRIVE - ?.connectionString; +function getSql(env: ExecutorEnv): NeonQueryFunction { + const conn = env.DATABASE_URL || env.HYPERDRIVE?.connectionString; if (!conn) { throw new Error('[meta/executors/dispatch] No DATABASE_URL or HYPERDRIVE binding'); } @@ -78,7 +74,7 @@ export interface DispatchOptions { export async function dispatch( intent: Intent, - env: Env, + env: ExecutorEnv, options: DispatchOptions = {}, ): Promise { const sql = getSql(env); diff --git a/meta/executors/types.ts b/meta/executors/types.ts index 3c662ee..9bad280 100644 --- a/meta/executors/types.ts +++ b/meta/executors/types.ts @@ -13,9 +13,27 @@ */ import type { NeonQueryFunction } from '@neondatabase/serverless'; -import type { Env } from '../../src/index'; import type { Intent, SovereigntyAssessmentSnapshot } from '../intent'; +/** + * Minimal env contract the executor registry depends on. + * + * Per ADR-001 the registry is consumed by BOTH the Cloudflare Worker + * (ActionAgent) and the meta-orchestrator daemon. The Worker's `Env` + * (Workers-typed: Hyperdrive, KV, R2, …) is neither available nor compilable + * in the daemon's NodeNext + node-types build. Depending on a structural slice + * instead of `src/index`'s Worker `Env` keeps the registry portable across both + * consumers. The Worker `Env` is structurally assignable to this (it has both + * fields), so Worker-side callers pass through unchanged. + * + * Executors needing bindings beyond the DB connection cast `ctx.env` at their + * use site. + */ +export interface ExecutorEnv { + DATABASE_URL?: string; + HYPERDRIVE?: { connectionString: string }; +} + /** * Re-reckon window. If `intent.sovereigntyAssessment.assessedAt` is older * than this at executor entry, dispatch() re-runs the gate. @@ -23,7 +41,7 @@ import type { Intent, SovereigntyAssessmentSnapshot } from '../intent'; export const SOVEREIGNTY_FRESHNESS_MS = 5 * 60 * 1000; // 5 minutes export interface ExecutorContext { - env: Env; + env: ExecutorEnv; sql: NeonQueryFunction; intent: Intent; /** The assessment snapshot in force at execution time (possibly re-reckoned). */ diff --git a/meta/executors/update-obligation-status.ts b/meta/executors/update-obligation-status.ts index 442acd7..30c164d 100644 --- a/meta/executors/update-obligation-status.ts +++ b/meta/executors/update-obligation-status.ts @@ -14,7 +14,6 @@ import { z } from 'zod'; import type { NeonQueryFunction } from '@neondatabase/serverless'; -import type { Env } from '../../src/index'; import type { ExecutorContext, ExecutorRunOutput, IntentExecutor } from './types'; import { registerExecutor } from './registry'; diff --git a/meta/intent.ts b/meta/intent.ts index 7dbb7b3..75edfcc 100644 --- a/meta/intent.ts +++ b/meta/intent.ts @@ -487,7 +487,7 @@ export async function executeIntent( ): Promise { // Lazy import to avoid forcing the executor registry on every meta/intent // consumer (and to keep the existing module's surface stable). - const { dispatch } = await import('./executors'); + const { dispatch } = await import('./executors/index.js'); const sql = getSql(env);