From 2bfc4f5e016407cf8ae7adbfb81fc00fb2120405 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Thu, 14 May 2026 13:47:12 +0000 Subject: [PATCH 1/2] =?UTF-8?q?feat(v1.5-C):=20per-agent=20LLM=20proof=20p?= =?UTF-8?q?oint=20=E2=80=94=20intake=20on=20ollama=5Fcloud,=20downstream?= =?UTF-8?q?=20on=20llm.default?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Activates the M8 milestone's per-agent provider story. The framework already resolved ``skill.model`` per-skill via ``graph.py:_build_agent_nodes -> get_llm(cfg.llm, skill.model, role=...)``; v1.5-C uncomments the override on the example incident_management intake skill so a default deployment (with both OLLAMA_API_KEY + OPENROUTER_API_KEY set, or wherever ``llm.default`` resolves) shows intake hitting Ollama Cloud's gpt-oss while the rest of the agents follow the runtime default. Changes: * ``examples/incident_management/skills/intake/config.yaml`` — declare ``model: gpt_oss_cheap`` (was a documented but commented-out hint). Comment block updated to reference v1.5-C and explain the resolver. * ``src/runtime/config.py`` — extend the ``LLMConfig.stub()`` default models map with stub aliases for ``gpt_oss``, ``gpt_oss_cheap``, and ``workhorse``. The skill-validator (``Orchestrator.create``) checks every ``skill.model`` against ``llm.models``; without these aliases the existing test suite would explode the moment intake declares ``model: gpt_oss_cheap`` (because tests build ``LLMConfig.stub()`` which previously only knew ``stub_default``). The aliases route to the same stub provider so behaviour is unchanged for stub-mode callers. * ``tests/test_per_agent_model_dispatch.py`` (new, 2 tests) — pin the dispatch contract: - ``test_build_agent_nodes_passes_skill_model_to_get_llm`` mocks ``runtime.graph.get_llm`` and asserts the framework calls it with ``model_name=skill.model`` per skill (intake gets ``"gpt_oss_cheap"``, triage with ``model=None`` gets ``None`` so ``get_llm`` falls back to ``llm.default`` downstream). - ``test_intake_skill_yaml_has_per_agent_override_uncommented`` pins the YAML edit so a future refactor can't silently drop the override. Live verification (``tests/test_integration_driver_s1.py`` family) continues to require ``OLLAMA_API_KEY`` + ``OPENROUTER_API_KEY`` + ``OLLAMA_BASE_URL`` and remains skipped without them — the human verification gate documented at ``.planning/phases/15-real-llm-tool-loop-termination/15-VERIFICATION.md``. Suite: 1260 passed (was 1258 — added 2), ruff clean, coverage 87.08%. --- .../skills/intake/config.yaml | 13 +- src/runtime/config.py | 9 ++ tests/test_per_agent_model_dispatch.py | 129 ++++++++++++++++++ 3 files changed, 145 insertions(+), 6 deletions(-) create mode 100644 tests/test_per_agent_model_dispatch.py diff --git a/examples/incident_management/skills/intake/config.yaml b/examples/incident_management/skills/intake/config.yaml index 6771985..9733d76 100644 --- a/examples/incident_management/skills/intake/config.yaml +++ b/examples/incident_management/skills/intake/config.yaml @@ -1,11 +1,12 @@ description: Intake supervisor — framework similarity retrieval + ASR memory hydration; dispatches to triage kind: supervisor -# M8: per-agent provider swap. Uncomment the line below to route this -# agent through the Ollama Cloud gpt-oss model defined in -# ``config/config.yaml``; the rest of the agents stay on the default -# ``workhorse`` model. graph.py:_build_agent_nodes reads skill.model -# and resolves it via the per-app LLMConfig.models registry. -# model: gpt_oss_cheap +# v1.5-C (M8 proof point): per-agent provider swap. Intake runs through +# the Ollama Cloud gpt-oss model defined in ``config/config.yaml``; +# downstream agents follow the runtime ``llm.default``. The framework +# resolves this via ``graph.py:_build_agent_nodes`` -> ``get_llm(cfg.llm, +# skill.model, ...)`` which falls back to ``cfg.default`` when ``model`` +# is None. Comment this back out to force every agent onto the default. +model: gpt_oss_cheap subordinates: - triage dispatch_strategy: rule diff --git a/src/runtime/config.py b/src/runtime/config.py index 7eb61c2..e355448 100644 --- a/src/runtime/config.py +++ b/src/runtime/config.py @@ -88,6 +88,15 @@ class LLMConfig(BaseModel): models: dict[str, ModelConfig] = Field( default_factory=lambda: { "stub_default": ModelConfig(provider="stub", model="stub-1"), + # Aliases for the example apps' per-agent model overrides + # (e.g. incident_management's intake skill carries + # ``model: gpt_oss_cheap`` for the v1.5-C / M8 proof point). + # Tests + ``LLMConfig.stub()`` callers route them to the + # same stub provider so the skill validator passes without + # forcing every test to re-declare the registry. + "gpt_oss": ModelConfig(provider="stub", model="stub-1"), + "gpt_oss_cheap": ModelConfig(provider="stub", model="stub-1"), + "workhorse": ModelConfig(provider="stub", model="stub-1"), } ) embedding: EmbeddingConfig | None = None diff --git a/tests/test_per_agent_model_dispatch.py b/tests/test_per_agent_model_dispatch.py new file mode 100644 index 0000000..9966943 --- /dev/null +++ b/tests/test_per_agent_model_dispatch.py @@ -0,0 +1,129 @@ +"""v1.5-C (M8 proof point): per-agent LLM dispatch contract. + +Pins the contract that ``runtime.graph._build_agent_nodes`` resolves +``skill.model`` per-skill, so apps can route different agents through +different providers without touching the framework. + +The live demonstration of this contract — intake on Ollama Cloud +gpt-oss while downstream agents follow ``llm.default`` — lives in +``examples/incident_management/skills/intake/config.yaml`` (the +``model: gpt_oss_cheap`` line) and is exercised by +``tests/test_integration_driver_s1.py`` when the appropriate API +keys are set. + +These tests run without keys: they intercept ``runtime.graph.get_llm`` +and assert the model name passed for each skill matches the skill's +``model`` field (or the ``LLMConfig.default`` fallback when ``model`` +is None). +""" +from __future__ import annotations + +from unittest.mock import patch + +from runtime.config import ( + AppConfig, + LLMConfig, + MCPConfig, + OrchestratorConfig, + Paths, + RuntimeConfig, +) +from runtime.mcp_loader import ToolRegistry +from runtime.skill import RouteRule, Skill + + +def _stub_app_cfg() -> AppConfig: + """Minimal AppConfig with two named models — the framework picks + between them by ``skill.model`` only.""" + llm_cfg = LLMConfig.stub() + return AppConfig( + llm=llm_cfg, + mcp=MCPConfig(servers=[]), + paths=Paths(skills_dir="config/skills", incidents_dir="/tmp"), + runtime=RuntimeConfig(state_class=None), + orchestrator=OrchestratorConfig(), + ) + + +def test_build_agent_nodes_passes_skill_model_to_get_llm(): + """The framework must call ``get_llm(cfg.llm, skill.model, ...)`` for + every responsive skill. Without this, per-agent provider swaps + silently collapse to the default model. + + We fully mock ``get_llm`` to capture the (role, model_name) tuple + per skill — this isolates the test from the LLMConfig.models + registry shape, which is what the production code resolves the + name through downstream. + """ + from runtime.graph import _build_agent_nodes + from runtime.llm import StubChatModel + + skills = { + "intake": Skill( + name="intake", + description="d", + kind="responsive", + model="gpt_oss_cheap", + routes=[RouteRule(when="default", next="triage")], + system_prompt="x", + ), + "triage": Skill( + name="triage", + description="d", + kind="responsive", + model=None, # falls back to llm.default downstream + routes=[RouteRule(when="default", next="__end__")], + system_prompt="x", + ), + } + + captured: list[tuple[str, str | None]] = [] + + def _fake_get_llm(cfg, model_name, *, role, **kwargs): + captured.append((role, model_name)) + return StubChatModel(role=role) + + cfg = _stub_app_cfg() + with patch("runtime.graph.get_llm", side_effect=_fake_get_llm): + nodes = _build_agent_nodes( + cfg=cfg, + skills=skills, + store=None, # type: ignore[arg-type] — _build_agent_nodes + # only forwards ``store`` to make_agent_node, never reads it + # itself; tests of the dispatch contract leave it None. + registry=ToolRegistry(entries={}), + ) + + # Both skills produced a node. + assert set(nodes.keys()) == {"intake", "triage"} + + # Per-skill model resolution: intake got its override, triage got + # None (which get_llm resolves to llm.default downstream). + by_role = dict(captured) + assert by_role.get("intake") == "gpt_oss_cheap", ( + f"intake should resolve to its skill.model override; got {by_role!r}" + ) + assert by_role.get("triage") is None, ( + f"triage skill.model was None; should pass None through so " + f"get_llm falls back to llm.default; got {by_role!r}" + ) + + +def test_intake_skill_yaml_has_per_agent_override_uncommented(): + """The intake skill config must carry ``model: gpt_oss_cheap`` — the + v1.5-C deliverable. A human flipping this back to a comment is + intentional (e.g. forcing all-default for a benchmark run); the + test fails if that happens silently in a refactor. + """ + import yaml + from pathlib import Path + + cfg_path = Path( + "examples/incident_management/skills/intake/config.yaml" + ) + parsed = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) + assert parsed.get("model") == "gpt_oss_cheap", ( + f"intake skill must declare ``model: gpt_oss_cheap`` per the " + f"v1.5-C M8 proof point; got {parsed.get('model')!r}. If " + f"intentionally rolling back, remove this test guard too." + ) From 8d04eb31f260141f26ba9b477b6dc88d8c14b72b Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Thu, 14 May 2026 13:47:23 +0000 Subject: [PATCH 2/2] build: regenerate dist bundles for v1.5-C per-agent override Bundles dist/app.py + dist/apps/{code-review,incident-management}.py in line with the LLMConfig stub-aliases extension from the preceding commit. No bundle-only edits. --- dist/app.py | 9 +++++++++ dist/apps/code-review.py | 9 +++++++++ dist/apps/incident-management.py | 9 +++++++++ 3 files changed, 27 insertions(+) diff --git a/dist/app.py b/dist/app.py index 266467d..816fc0c 100644 --- a/dist/app.py +++ b/dist/app.py @@ -1638,6 +1638,15 @@ class LLMConfig(BaseModel): models: dict[str, ModelConfig] = Field( default_factory=lambda: { "stub_default": ModelConfig(provider="stub", model="stub-1"), + # Aliases for the example apps' per-agent model overrides + # (e.g. incident_management's intake skill carries + # ``model: gpt_oss_cheap`` for the v1.5-C / M8 proof point). + # Tests + ``LLMConfig.stub()`` callers route them to the + # same stub provider so the skill validator passes without + # forcing every test to re-declare the registry. + "gpt_oss": ModelConfig(provider="stub", model="stub-1"), + "gpt_oss_cheap": ModelConfig(provider="stub", model="stub-1"), + "workhorse": ModelConfig(provider="stub", model="stub-1"), } ) embedding: EmbeddingConfig | None = None diff --git a/dist/apps/code-review.py b/dist/apps/code-review.py index d437b45..8d6e5f3 100644 --- a/dist/apps/code-review.py +++ b/dist/apps/code-review.py @@ -1691,6 +1691,15 @@ class LLMConfig(BaseModel): models: dict[str, ModelConfig] = Field( default_factory=lambda: { "stub_default": ModelConfig(provider="stub", model="stub-1"), + # Aliases for the example apps' per-agent model overrides + # (e.g. incident_management's intake skill carries + # ``model: gpt_oss_cheap`` for the v1.5-C / M8 proof point). + # Tests + ``LLMConfig.stub()`` callers route them to the + # same stub provider so the skill validator passes without + # forcing every test to re-declare the registry. + "gpt_oss": ModelConfig(provider="stub", model="stub-1"), + "gpt_oss_cheap": ModelConfig(provider="stub", model="stub-1"), + "workhorse": ModelConfig(provider="stub", model="stub-1"), } ) embedding: EmbeddingConfig | None = None diff --git a/dist/apps/incident-management.py b/dist/apps/incident-management.py index 180145e..53e0b12 100644 --- a/dist/apps/incident-management.py +++ b/dist/apps/incident-management.py @@ -1703,6 +1703,15 @@ class LLMConfig(BaseModel): models: dict[str, ModelConfig] = Field( default_factory=lambda: { "stub_default": ModelConfig(provider="stub", model="stub-1"), + # Aliases for the example apps' per-agent model overrides + # (e.g. incident_management's intake skill carries + # ``model: gpt_oss_cheap`` for the v1.5-C / M8 proof point). + # Tests + ``LLMConfig.stub()`` callers route them to the + # same stub provider so the skill validator passes without + # forcing every test to re-declare the registry. + "gpt_oss": ModelConfig(provider="stub", model="stub-1"), + "gpt_oss_cheap": ModelConfig(provider="stub", model="stub-1"), + "workhorse": ModelConfig(provider="stub", model="stub-1"), } ) embedding: EmbeddingConfig | None = None