From 2bfc4f5e016407cf8ae7adbfb81fc00fb2120405 Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Thu, 14 May 2026 13:47:12 +0000
Subject: [PATCH 1/2] =?UTF-8?q?feat(v1.5-C):=20per-agent=20LLM=20proof=20p?=
 =?UTF-8?q?oint=20=E2=80=94=20intake=20on=20ollama=5Fcloud,=20downstream?=
 =?UTF-8?q?=20on=20llm.default?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Activates the M8 milestone's per-agent provider story. The framework
already resolved ``skill.model`` per-skill via
``graph.py:_build_agent_nodes -> get_llm(cfg.llm, skill.model, role=...)``;
v1.5-C uncomments the override on the example incident_management
intake skill so a default deployment (with both OLLAMA_API_KEY +
OPENROUTER_API_KEY set, or wherever ``llm.default`` resolves) shows
intake hitting Ollama Cloud's gpt-oss while the rest of the agents
follow the runtime default.

Changes:

* ``examples/incident_management/skills/intake/config.yaml`` — declare
  ``model: gpt_oss_cheap`` (was a documented but commented-out hint).
  Comment block updated to reference v1.5-C and explain the resolver.

* ``src/runtime/config.py`` — extend the ``LLMConfig.stub()`` default
  models map with stub aliases for ``gpt_oss``, ``gpt_oss_cheap``, and
  ``workhorse``. The skill-validator (``Orchestrator.create``) checks
  every ``skill.model`` against ``llm.models``; without these aliases
  the existing test suite would explode the moment intake declares
  ``model: gpt_oss_cheap`` (because tests build ``LLMConfig.stub()``
  which previously only knew ``stub_default``). The aliases route to
  the same stub provider so behaviour is unchanged for stub-mode
  callers.

* ``tests/test_per_agent_model_dispatch.py`` (new, 2 tests) — pin
  the dispatch contract:

  - ``test_build_agent_nodes_passes_skill_model_to_get_llm`` mocks
    ``runtime.graph.get_llm`` and asserts the framework calls it
    with ``model_name=skill.model`` per skill (intake gets
    ``"gpt_oss_cheap"``, triage with ``model=None`` gets ``None`` so
    ``get_llm`` falls back to ``llm.default`` downstream).
  - ``test_intake_skill_yaml_has_per_agent_override_uncommented``
    pins the YAML edit so a future refactor can't silently drop the
    override.

Live verification (``tests/test_integration_driver_s1.py`` family)
continues to require ``OLLAMA_API_KEY`` + ``OPENROUTER_API_KEY``
+ ``OLLAMA_BASE_URL`` and remains skipped without them — the human
verification gate documented at
``.planning/phases/15-real-llm-tool-loop-termination/15-VERIFICATION.md``.

Suite: 1260 passed (was 1258 — added 2), ruff clean, coverage 87.08%.
---
 .../skills/intake/config.yaml                 |  13 +-
 src/runtime/config.py                         |   9 ++
 tests/test_per_agent_model_dispatch.py        | 129 ++++++++++++++++++
 3 files changed, 145 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_per_agent_model_dispatch.py

diff --git a/examples/incident_management/skills/intake/config.yaml b/examples/incident_management/skills/intake/config.yaml
index 6771985..9733d76 100644
--- a/examples/incident_management/skills/intake/config.yaml
+++ b/examples/incident_management/skills/intake/config.yaml
@@ -1,11 +1,12 @@
 description: Intake supervisor — framework similarity retrieval + ASR memory hydration; dispatches to triage
 kind: supervisor
-# M8: per-agent provider swap. Uncomment the line below to route this
-# agent through the Ollama Cloud gpt-oss model defined in
-# ``config/config.yaml``; the rest of the agents stay on the default
-# ``workhorse`` model. graph.py:_build_agent_nodes reads skill.model
-# and resolves it via the per-app LLMConfig.models registry.
-#   model: gpt_oss_cheap
+# v1.5-C (M8 proof point): per-agent provider swap. Intake runs through
+# the Ollama Cloud gpt-oss model defined in ``config/config.yaml``;
+# downstream agents follow the runtime ``llm.default``. The framework
+# resolves this via ``graph.py:_build_agent_nodes`` -> ``get_llm(cfg.llm,
+# skill.model, ...)`` which falls back to ``cfg.default`` when ``model``
+# is None. Comment this back out to force every agent onto the default.
+model: gpt_oss_cheap
 subordinates:
   - triage
 dispatch_strategy: rule
diff --git a/src/runtime/config.py b/src/runtime/config.py
index 7eb61c2..e355448 100644
--- a/src/runtime/config.py
+++ b/src/runtime/config.py
@@ -88,6 +88,15 @@ class LLMConfig(BaseModel):
     models: dict[str, ModelConfig] = Field(
         default_factory=lambda: {
             "stub_default": ModelConfig(provider="stub", model="stub-1"),
+            # Aliases for the example apps' per-agent model overrides
+            # (e.g. incident_management's intake skill carries
+            # ``model: gpt_oss_cheap`` for the v1.5-C / M8 proof point).
+            # Tests + ``LLMConfig.stub()`` callers route them to the
+            # same stub provider so the skill validator passes without
+            # forcing every test to re-declare the registry.
+            "gpt_oss": ModelConfig(provider="stub", model="stub-1"),
+            "gpt_oss_cheap": ModelConfig(provider="stub", model="stub-1"),
+            "workhorse": ModelConfig(provider="stub", model="stub-1"),
         }
     )
     embedding: EmbeddingConfig | None = None
diff --git a/tests/test_per_agent_model_dispatch.py b/tests/test_per_agent_model_dispatch.py
new file mode 100644
index 0000000..9966943
--- /dev/null
+++ b/tests/test_per_agent_model_dispatch.py
@@ -0,0 +1,129 @@
+"""v1.5-C (M8 proof point): per-agent LLM dispatch contract.
+
+Pins the contract that ``runtime.graph._build_agent_nodes`` resolves
+``skill.model`` per-skill, so apps can route different agents through
+different providers without touching the framework.
+
+The live demonstration of this contract — intake on Ollama Cloud
+gpt-oss while downstream agents follow ``llm.default`` — lives in
+``examples/incident_management/skills/intake/config.yaml`` (the
+``model: gpt_oss_cheap`` line) and is exercised by
+``tests/test_integration_driver_s1.py`` when the appropriate API
+keys are set.
+
+These tests run without keys: they intercept ``runtime.graph.get_llm``
+and assert the model name passed for each skill matches the skill's
+``model`` field (or the ``LLMConfig.default`` fallback when ``model``
+is None).
+"""
+from __future__ import annotations
+
+from unittest.mock import patch
+
+from runtime.config import (
+    AppConfig,
+    LLMConfig,
+    MCPConfig,
+    OrchestratorConfig,
+    Paths,
+    RuntimeConfig,
+)
+from runtime.mcp_loader import ToolRegistry
+from runtime.skill import RouteRule, Skill
+
+
+def _stub_app_cfg() -> AppConfig:
+    """Minimal AppConfig with two named models — the framework picks
+    between them by ``skill.model`` only."""
+    llm_cfg = LLMConfig.stub()
+    return AppConfig(
+        llm=llm_cfg,
+        mcp=MCPConfig(servers=[]),
+        paths=Paths(skills_dir="config/skills", incidents_dir="/tmp"),
+        runtime=RuntimeConfig(state_class=None),
+        orchestrator=OrchestratorConfig(),
+    )
+
+
+def test_build_agent_nodes_passes_skill_model_to_get_llm():
+    """The framework must call ``get_llm(cfg.llm, skill.model, ...)`` for
+    every responsive skill. Without this, per-agent provider swaps
+    silently collapse to the default model.
+
+    We fully mock ``get_llm`` to capture the (role, model_name) tuple
+    per skill — this isolates the test from the LLMConfig.models
+    registry shape, which is what the production code resolves the
+    name through downstream.
+    """
+    from runtime.graph import _build_agent_nodes
+    from runtime.llm import StubChatModel
+
+    skills = {
+        "intake": Skill(
+            name="intake",
+            description="d",
+            kind="responsive",
+            model="gpt_oss_cheap",
+            routes=[RouteRule(when="default", next="triage")],
+            system_prompt="x",
+        ),
+        "triage": Skill(
+            name="triage",
+            description="d",
+            kind="responsive",
+            model=None,  # falls back to llm.default downstream
+            routes=[RouteRule(when="default", next="__end__")],
+            system_prompt="x",
+        ),
+    }
+
+    captured: list[tuple[str, str | None]] = []
+
+    def _fake_get_llm(cfg, model_name, *, role, **kwargs):
+        captured.append((role, model_name))
+        return StubChatModel(role=role)
+
+    cfg = _stub_app_cfg()
+    with patch("runtime.graph.get_llm", side_effect=_fake_get_llm):
+        nodes = _build_agent_nodes(
+            cfg=cfg,
+            skills=skills,
+            store=None,  # type: ignore[arg-type] — _build_agent_nodes
+            # only forwards ``store`` to make_agent_node, never reads it
+            # itself; tests of the dispatch contract leave it None.
+            registry=ToolRegistry(entries={}),
+        )
+
+    # Both skills produced a node.
+    assert set(nodes.keys()) == {"intake", "triage"}
+
+    # Per-skill model resolution: intake got its override, triage got
+    # None (which get_llm resolves to llm.default downstream).
+    by_role = dict(captured)
+    assert by_role.get("intake") == "gpt_oss_cheap", (
+        f"intake should resolve to its skill.model override; got {by_role!r}"
+    )
+    assert by_role.get("triage") is None, (
+        f"triage skill.model was None; should pass None through so "
+        f"get_llm falls back to llm.default; got {by_role!r}"
+    )
+
+
+def test_intake_skill_yaml_has_per_agent_override_uncommented():
+    """The intake skill config must carry ``model: gpt_oss_cheap`` — the
+    v1.5-C deliverable. A human flipping this back to a comment is
+    intentional (e.g. forcing all-default for a benchmark run); the
+    test fails if that happens silently in a refactor.
+    """
+    import yaml
+    from pathlib import Path
+
+    cfg_path = Path(
+        "examples/incident_management/skills/intake/config.yaml"
+    )
+    parsed = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
+    assert parsed.get("model") == "gpt_oss_cheap", (
+        f"intake skill must declare ``model: gpt_oss_cheap`` per the "
+        f"v1.5-C M8 proof point; got {parsed.get('model')!r}. If "
+        f"intentionally rolling back, remove this test guard too."
+    )

From 8d04eb31f260141f26ba9b477b6dc88d8c14b72b Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Thu, 14 May 2026 13:47:23 +0000
Subject: [PATCH 2/2] build: regenerate dist bundles for v1.5-C per-agent
 override

Bundles dist/app.py + dist/apps/{code-review,incident-management}.py
in line with the LLMConfig stub-aliases extension from the preceding
commit. No bundle-only edits.
---
 dist/app.py                      | 9 +++++++++
 dist/apps/code-review.py         | 9 +++++++++
 dist/apps/incident-management.py | 9 +++++++++
 3 files changed, 27 insertions(+)

diff --git a/dist/app.py b/dist/app.py
index 266467d..816fc0c 100644
--- a/dist/app.py
+++ b/dist/app.py
@@ -1638,6 +1638,15 @@ class LLMConfig(BaseModel):
     models: dict[str, ModelConfig] = Field(
         default_factory=lambda: {
             "stub_default": ModelConfig(provider="stub", model="stub-1"),
+            # Aliases for the example apps' per-agent model overrides
+            # (e.g. incident_management's intake skill carries
+            # ``model: gpt_oss_cheap`` for the v1.5-C / M8 proof point).
+            # Tests + ``LLMConfig.stub()`` callers route them to the
+            # same stub provider so the skill validator passes without
+            # forcing every test to re-declare the registry.
+            "gpt_oss": ModelConfig(provider="stub", model="stub-1"),
+            "gpt_oss_cheap": ModelConfig(provider="stub", model="stub-1"),
+            "workhorse": ModelConfig(provider="stub", model="stub-1"),
         }
     )
     embedding: EmbeddingConfig | None = None
diff --git a/dist/apps/code-review.py b/dist/apps/code-review.py
index d437b45..8d6e5f3 100644
--- a/dist/apps/code-review.py
+++ b/dist/apps/code-review.py
@@ -1691,6 +1691,15 @@ class LLMConfig(BaseModel):
     models: dict[str, ModelConfig] = Field(
         default_factory=lambda: {
             "stub_default": ModelConfig(provider="stub", model="stub-1"),
+            # Aliases for the example apps' per-agent model overrides
+            # (e.g. incident_management's intake skill carries
+            # ``model: gpt_oss_cheap`` for the v1.5-C / M8 proof point).
+            # Tests + ``LLMConfig.stub()`` callers route them to the
+            # same stub provider so the skill validator passes without
+            # forcing every test to re-declare the registry.
+            "gpt_oss": ModelConfig(provider="stub", model="stub-1"),
+            "gpt_oss_cheap": ModelConfig(provider="stub", model="stub-1"),
+            "workhorse": ModelConfig(provider="stub", model="stub-1"),
         }
     )
     embedding: EmbeddingConfig | None = None
diff --git a/dist/apps/incident-management.py b/dist/apps/incident-management.py
index 180145e..53e0b12 100644
--- a/dist/apps/incident-management.py
+++ b/dist/apps/incident-management.py
@@ -1703,6 +1703,15 @@ class LLMConfig(BaseModel):
     models: dict[str, ModelConfig] = Field(
         default_factory=lambda: {
             "stub_default": ModelConfig(provider="stub", model="stub-1"),
+            # Aliases for the example apps' per-agent model overrides
+            # (e.g. incident_management's intake skill carries
+            # ``model: gpt_oss_cheap`` for the v1.5-C / M8 proof point).
+            # Tests + ``LLMConfig.stub()`` callers route them to the
+            # same stub provider so the skill validator passes without
+            # forcing every test to re-declare the registry.
+            "gpt_oss": ModelConfig(provider="stub", model="stub-1"),
+            "gpt_oss_cheap": ModelConfig(provider="stub", model="stub-1"),
+            "workhorse": ModelConfig(provider="stub", model="stub-1"),
         }
     )
     embedding: EmbeddingConfig | None = None