From a42da3a7c06291ccb96a3421b6ad4cec811848fa Mon Sep 17 00:00:00 2001 From: David Hyrule Date: Mon, 29 Jun 2026 17:19:26 +0200 Subject: [PATCH 1/2] engineering-loop: ship loop producer traces to collector --- ansible/inventory/host_vars/loop.yml | 13 ++++++++----- ansible/roles/engineering_loop/defaults/main.yml | 1 + ansible/roles/knowledge_mcp/defaults/main.yml | 2 ++ .../templates/hyrule-knowledge-mcp.service.j2 | 2 ++ .../templates/engineering-loop.env.ctmpl.j2 | 1 + 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/ansible/inventory/host_vars/loop.yml b/ansible/inventory/host_vars/loop.yml index 5823337a..36db16e0 100644 --- a/ansible/inventory/host_vars/loop.yml +++ b/ansible/inventory/host_vars/loop.yml @@ -4,23 +4,26 @@ # not a deploy source for the infra fleet: no fleet SSH key, no production # Vault breadth, and no public listener beyond node_exporter for mon. -# Bumped to the build containing the Phase 3 agent-core emitter + agent-core dep -# (engineering-loop #24, #25). Required before enabling trace emission below: the prior -# pin (eb37b1c) has no reader for HYRULE_ENGINEERING_AGENT_CORE_TRACE (per codex review). -engineering_loop_version: "280063080b0d216b5ce26b81586708cf31010590" +# Bumped to the build containing agent-core v0.4.0 sink_from_env support +# (engineering-loop #26), so live loop cycles can fan out to the collector while +# retaining the local JSONL trace sink. +engineering_loop_version: "789e90bc5a791629d3b7ff69c6fc3bd22a92da30" engineering_loop_timer_enabled: true engineering_loop_noc_lhp_base_url: "http://[{{ peers.noc.ipv6 }}]:8000" engineering_loop_lhp_callback_enabled: true # Turn agent-core TraceEvent/CostUsage emission on for the live loop cycles. engineering_loop_agent_core_trace_enabled: true +engineering_loop_agent_core_trace_collector_url: "http://[{{ peers.loop.ipv6 }}]:8770/v1/trace" # Local read-only knowledge MCP server for Engineering Loop context. It is # deployed as a Docker container and bound to host loopback only. -knowledge_mcp_version: "ae40865a920292efd65e50768ffee85ca0eb9fe7" +knowledge_mcp_version: "32bca0c345988f2a504f00f9f30937424ac66c4e" knowledge_mcp_enabled: true knowledge_mcp_host: 127.0.0.1 knowledge_mcp_port: 8767 knowledge_mcp_transport: streamable-http +knowledge_mcp_agent_core_trace_enabled: true +knowledge_mcp_agent_core_trace_collector_url: "http://[{{ peers.loop.ipv6 }}]:8770/v1/trace" # Docker uses routed IPv6 for egress: containers receive GUA addresses from # this /64, and rtr has a static/iBGP return route via loop's infra address. knowledge_mcp_docker_ipv6_enabled: true diff --git a/ansible/roles/engineering_loop/defaults/main.yml b/ansible/roles/engineering_loop/defaults/main.yml index 5834ef6d..acc00268 100644 --- a/ansible/roles/engineering_loop/defaults/main.yml +++ b/ansible/roles/engineering_loop/defaults/main.yml @@ -91,6 +91,7 @@ engineering_loop_lhp_callback_enabled: false # (must sit under a service ReadWritePaths entry, i.e. the state dir). engineering_loop_agent_core_trace_enabled: false engineering_loop_agent_core_trace_path: "{{ engineering_loop_state_dir }}/agent-core-trace.jsonl" +engineering_loop_agent_core_trace_collector_url: "" engineering_loop_git_user_name: "hyrule-engineering-loop[bot]" engineering_loop_git_user_email: "engineering-loop@as215932.net" diff --git a/ansible/roles/knowledge_mcp/defaults/main.yml b/ansible/roles/knowledge_mcp/defaults/main.yml index 0e005c0c..42a89aac 100644 --- a/ansible/roles/knowledge_mcp/defaults/main.yml +++ b/ansible/roles/knowledge_mcp/defaults/main.yml @@ -28,6 +28,8 @@ knowledge_mcp_mount_path: / knowledge_mcp_log_level: INFO knowledge_mcp_memory: 512m knowledge_mcp_pids_limit: 256 +knowledge_mcp_agent_core_trace_enabled: false +knowledge_mcp_agent_core_trace_collector_url: "" # Optional routed-GUA IPv6 Docker bridge support. This keeps Docker usable on # IPv6-only hosts without falling back to IPv4-only public resolvers or IPv4 diff --git a/ansible/roles/knowledge_mcp/templates/hyrule-knowledge-mcp.service.j2 b/ansible/roles/knowledge_mcp/templates/hyrule-knowledge-mcp.service.j2 index 4c800734..631cedf5 100644 --- a/ansible/roles/knowledge_mcp/templates/hyrule-knowledge-mcp.service.j2 +++ b/ansible/roles/knowledge_mcp/templates/hyrule-knowledge-mcp.service.j2 @@ -33,6 +33,8 @@ ExecStart=/usr/bin/docker run \ -e HYRULE_KNOWLEDGE_MCP_MESSAGE_PATH={{ knowledge_mcp_message_path }} \ -e HYRULE_KNOWLEDGE_MCP_MOUNT_PATH={{ knowledge_mcp_mount_path }} \ -e HYRULE_KNOWLEDGE_MCP_LOG_LEVEL={{ knowledge_mcp_log_level }} \ + -e HYRULE_KNOWLEDGE_AGENT_CORE_TRACE={{ '1' if knowledge_mcp_agent_core_trace_enabled | bool else '0' }} \ + -e HYRULE_KNOWLEDGE_AGENT_CORE_TRACE_COLLECTOR_URL={{ knowledge_mcp_agent_core_trace_collector_url }} \ {{ knowledge_mcp_image }} ExecStop=/usr/bin/docker stop {{ knowledge_mcp_container_name }} TimeoutStopSec=20 diff --git a/ansible/roles/vault_agent/templates/engineering-loop.env.ctmpl.j2 b/ansible/roles/vault_agent/templates/engineering-loop.env.ctmpl.j2 index 4f4e311e..d991d294 100644 --- a/ansible/roles/vault_agent/templates/engineering-loop.env.ctmpl.j2 +++ b/ansible/roles/vault_agent/templates/engineering-loop.env.ctmpl.j2 @@ -38,3 +38,4 @@ ENGINEERING_LOOP_NOC_LHP_BASE_URL={{ engineering_loop_noc_lhp_base_url }} ENGINEERING_LOOP_LHP_CALLBACK_ENABLED={{ '1' if engineering_loop_lhp_callback_enabled | bool else '0' }} HYRULE_ENGINEERING_AGENT_CORE_TRACE={{ '1' if engineering_loop_agent_core_trace_enabled | bool else '0' }} HYRULE_ENGINEERING_AGENT_CORE_TRACE_PATH={{ engineering_loop_agent_core_trace_path }} +HYRULE_ENGINEERING_AGENT_CORE_TRACE_COLLECTOR_URL={{ engineering_loop_agent_core_trace_collector_url }} From 7ef1909f0e617cdf6cb0d7ab4cd8ab8c99cba57d Mon Sep 17 00:00:00 2001 From: David Hyrule Date: Mon, 29 Jun 2026 17:20:53 +0200 Subject: [PATCH 2/2] knowledge-loop: enable collector-backed canary --- ansible/inventory/host_vars/loop.yml | 18 +++++++++--------- ansible/roles/knowledge_loop/defaults/main.yml | 2 ++ .../templates/knowledge-loop.env.ctmpl.j2 | 2 ++ tests/iac/test_vault_and_runner_contracts.py | 9 +++++++-- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/ansible/inventory/host_vars/loop.yml b/ansible/inventory/host_vars/loop.yml index 36db16e0..5cdc43c6 100644 --- a/ansible/inventory/host_vars/loop.yml +++ b/ansible/inventory/host_vars/loop.yml @@ -39,15 +39,15 @@ agent_core_collector_bind: "{{ peers.loop.ipv6 }}" agent_core_collector_port: 8770 # Governed Knowledge Loop producer agent (distinct from the read-only Knowledge -# MCP above). Pinned to the merged Knowledge revision that ships -# `hyrule-knowledge loop --once` (AS215932/knowledge#18). The workflow enables -# knowledge_loop_apply for engineering-loop, so this pin keeps the first apply -# reproducible instead of tracking floating `main`. The systemd timer stays -# disabled until a reviewed canary flips knowledge_loop_timer_enabled; applying -# now only provisions the runtime (user, pinned checkout, deps, disabled -# service/timer). Live OpenRouter enrichment budget stays the role default 0. -knowledge_loop_version: "0b414aea6777ab067ae69c4fd82f715e847cc58e" -knowledge_loop_timer_enabled: false +# MCP above). Pinned to the merged Knowledge revision that ships agent-core +# v0.4.0 sink_from_env support (AS215932/knowledge#24). The canary runs daily, +# opens at most one reviewed PR/day, and keeps live OpenRouter calls disabled. +knowledge_loop_version: "32bca0c345988f2a504f00f9f30937424ac66c4e" +knowledge_loop_timer_enabled: true +knowledge_loop_agent_core_trace_enabled: true +knowledge_loop_agent_core_trace_collector_url: "http://[{{ peers.loop.ipv6 }}]:8770/v1/trace" +knowledge_loop_max_openrouter_calls_per_day: 0 +knowledge_loop_max_prs_per_day: 1 # Dogfood scope: let the loop author the VPS launch-proof wedge in hyrule-cloud # (still draft-PR + human-merge gated). All other repos stay docs-only. diff --git a/ansible/roles/knowledge_loop/defaults/main.yml b/ansible/roles/knowledge_loop/defaults/main.yml index a8ba32a1..18e76320 100644 --- a/ansible/roles/knowledge_loop/defaults/main.yml +++ b/ansible/roles/knowledge_loop/defaults/main.yml @@ -41,6 +41,8 @@ knowledge_loop_learning_event_paths: [] knowledge_loop_replace_learning_events: false knowledge_loop_skip_validation: false knowledge_loop_icinga_check: loop!knowledge-loop +knowledge_loop_agent_core_trace_enabled: false +knowledge_loop_agent_core_trace_collector_url: "" # Daily instead of hourly: ingest currently refreshes timestamps, so hourly would # create noisy PRs. Increase cadence only after stable no-op detection lands. diff --git a/ansible/roles/vault_agent/templates/knowledge-loop.env.ctmpl.j2 b/ansible/roles/vault_agent/templates/knowledge-loop.env.ctmpl.j2 index 363db5da..c2bc4652 100644 --- a/ansible/roles/vault_agent/templates/knowledge-loop.env.ctmpl.j2 +++ b/ansible/roles/vault_agent/templates/knowledge-loop.env.ctmpl.j2 @@ -31,3 +31,5 @@ HYRULE_KNOWLEDGE_LOOP_ICINGA_CHECK={{ or .Data.data.icinga_check "loop!knowledge GIT_ASKPASS={{ knowledge_loop_git_askpass_path }} GIT_TERMINAL_PROMPT=0 +HYRULE_KNOWLEDGE_AGENT_CORE_TRACE={{ '1' if knowledge_loop_agent_core_trace_enabled | bool else '0' }} +HYRULE_KNOWLEDGE_AGENT_CORE_TRACE_COLLECTOR_URL={{ knowledge_loop_agent_core_trace_collector_url }} diff --git a/tests/iac/test_vault_and_runner_contracts.py b/tests/iac/test_vault_and_runner_contracts.py index 237f727c..96cedba3 100644 --- a/tests/iac/test_vault_and_runner_contracts.py +++ b/tests/iac/test_vault_and_runner_contracts.py @@ -157,9 +157,14 @@ def test_agent_core_collector_uses_dedicated_vault_scope(self): def test_knowledge_loop_checkout_is_pinned_and_runner_policy_documented(self): host_vars = yaml.safe_load((REPO / "ansible/inventory/host_vars/loop.yml").read_text()) # apply.yml forces knowledge_loop_apply for engineering-loop, so the loop - # checkout must be a reviewed 40-char commit, never floating `main`. + # checkout must be a reviewed 40-char commit, never floating `main`. The + # live host may enable the reviewed daily canary, but role defaults remain off. self.assertRegex(str(host_vars["knowledge_loop_version"]), r"^[0-9a-f]{40}$") - self.assertEqual(host_vars["knowledge_loop_timer_enabled"], False) + self.assertEqual(host_vars["knowledge_loop_timer_enabled"], True) + self.assertEqual(host_vars["knowledge_loop_max_openrouter_calls_per_day"], 0) + self.assertEqual(host_vars["knowledge_loop_max_prs_per_day"], 1) + self.assertEqual(host_vars["knowledge_loop_agent_core_trace_enabled"], True) + self.assertIn("/v1/trace", host_vars["knowledge_loop_agent_core_trace_collector_url"]) runbook = (REPO / "docs/runbooks/bootstrap-knowledge-loop-vault.md").read_text() # The runner needs the refreshed github-runner policy before the first apply