From 4b92236453765fbc5ce94abd7bc4d55a97460c16 Mon Sep 17 00:00:00 2001 From: Matt Galligan Date: Thu, 11 Jun 2026 12:44:39 -0400 Subject: [PATCH] feat: add app-server model registry --- README.md | 4 + ...ommand-config-presets-and-name-prefixes.md | 3 +- docs/development/design.md | 6 +- docs/research/app-server-verification.md | 11 +- docs/usage/README.md | 33 ++- plugins/dispatch/README.md | 5 +- pyproject.toml | 2 +- skills/dispatch/SKILL.md | 18 ++ src/outfitter/dispatch/client/client.py | 14 ++ src/outfitter/dispatch/client/models.py | 66 ++++- src/outfitter/dispatch/contracts/context.py | 6 + .../dispatch/contracts/derive_cli.py | 1 + .../dispatch/contracts/derive_mcp.py | 4 +- src/outfitter/dispatch/core/handlers.py | 231 +++++++++++++++++- src/outfitter/dispatch/core/model_registry.py | 177 ++++++++++++++ src/outfitter/dispatch/core/models.py | 62 +++++ src/outfitter/dispatch/core/ops.py | 41 ++++ src/outfitter/dispatch/doctor.py | 26 +- src/outfitter/dispatch/registry/models.py | 36 +++ src/outfitter/dispatch/registry/store.py | 216 +++++++++++++++- tests/client/test_client.py | 48 ++++ tests/client/test_models.py | 73 ++++++ tests/core/test_examples.py | 1 + tests/core/test_handlers.py | 128 +++++++++- tests/fakes.py | 33 +++ tests/registry/test_store.py | 58 ++++- tests/surfaces/test_derive_mcp.py | 9 + tests/surfaces/test_parity.py | 1 + tests/test_doctor.py | 5 +- uv.lock | 2 +- 30 files changed, 1276 insertions(+), 44 deletions(-) create mode 100644 src/outfitter/dispatch/core/model_registry.py diff --git a/README.md b/README.md index 8ad9cc7..884d17a 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ From a source checkout: uv sync uv run dispatch --help uv run dispatch doctor --no-app-server +uv run dispatch models --no-refresh uv run dispatch up --json uv run dispatch daemon status ``` @@ -55,6 +56,9 @@ indexed view of an attached thread. assistant work completed. Use `get` to inspect the latest turn state and persisted App Server errors, or `watch` for a bounded live event sample. Slash commands in `--text` are plain text; use `--goal` when creating a native App Server goal. +Use `dispatch models` before pinning model or service-tier presets; Dispatch +resolves aliases such as `fast` from the live App Server model catalog and keeps +omitted model/tier values on Codex defaults. For the operator guide, CLI/MCP examples, triggers, and plugin setup, start at [`docs/usage/README.md`](docs/usage/README.md). diff --git a/docs/adrs/0015-new-command-config-presets-and-name-prefixes.md b/docs/adrs/0015-new-command-config-presets-and-name-prefixes.md index 1e2452b..842ffcd 100644 --- a/docs/adrs/0015-new-command-config-presets-and-name-prefixes.md +++ b/docs/adrs/0015-new-command-config-presets-and-name-prefixes.md @@ -69,8 +69,7 @@ effort = "low" Omit `model` unless you intentionally want Codex to use an explicit model. When a preset does pin a model, choose it from the live App Server catalog -(`dispatch model list` once that surface exists) rather than from docs or stale -examples. +(`dispatch models`) rather than from docs or stale examples. Merge order: diff --git a/docs/development/design.md b/docs/development/design.md index 6097203..301b428 100644 --- a/docs/development/design.md +++ b/docs/development/design.md @@ -80,6 +80,7 @@ Projections (pure functions over the registry, mirroring Trails' `derive* → cr - Thread management/search: `attach [--sync]` · `rename ` · `archive ` · `restore ` · `search ` with `--thread`/repo/directory/date/managed filters +- Model catalog: `models [--no-refresh]` - Sending: `send "…"` with `--mode send|steer|queue|interject|context` and equivalent mutually exclusive `--steer`, `--queue`, `--interject`, `--context`; `stop ` is cancel-only. @@ -105,7 +106,7 @@ for collisions. Titles and `@handles` are mutable convenience labels. | Op | App Server call | Notes (verified) | | --- | --- | --- | | `open` | `thread/start` (then register) | `sandbox` is a STRING enum (`read-only`/`workspace-write`/`danger-full-access`); persists by default (`ephemeral:false`) → spawned lanes show in desktop app, matching the `→ @project:name` convention. | -| `new` | `thread/start` + `thread/name/set` + optional `thread/goal/set` + optional `turn/start` | Applies `.dispatch/config.toml` defaults/presets, name prefixes, verified session/turn options, optional native goal, and optional initial payload. `service_tier` is sent to both thread creation and the initial turn when configured. Output reports request acceptance, not assistant completion. | +| `new` | `thread/start` + `thread/name/set` + optional `thread/goal/set` + optional `turn/start` | Applies `.dispatch/config.toml` defaults/presets, name prefixes, verified session/turn options, optional native goal, and optional initial payload. Explicit `service_tier` values are resolved through the App Server model catalog before being sent to thread creation and the initial turn; omitted model/tier values preserve Codex defaults. Output reports request acceptance, not assistant completion. | | `attach` | `thread/read(includeTurns:false)` (+ register) | Metadata-only by default: verifies the thread id, registers a turn-write locked attached lane, assigns a dispatch ref, and stores sync state without loading turn history. `--sync` runs a quick local index refresh after registration. | | `sync` | `thread/read(includeTurns:false)` + bounded local JSONL parsing | Refreshes dispatch's index/cache for a managed thread: source file identity, sync state, latest event timestamp, latest turn id, preview, and selected metadata. Does not copy transcripts wholesale or grant attached-lane write authority. | | `send` (`mode=send`) | `turn/start` | Delivers a message the lane processes + answers. The DM/`send_message_to_thread` equivalent. `sandboxPolicy` here is an OBJECT (`{type:"readOnly"}`) — different encoding than `thread/start.sandbox`. | @@ -120,6 +121,7 @@ for collisions. Titles and `@handles` are mutable convenience labels. | `search` (`search`) | experimental `thread/search` for broad search; `thread/read(includeTurns:true)` for one-thread search | Broad search uses App Server search plus dispatch-side managed/unmanaged, repo/directory, and date filters. Thread-focused search reads one transcript and scans locally because App Server search has no thread-id filter. | | `roster` (`list`) | `thread/list` + registry + status | List results are under `result.data` (NOT `result.threads`); `useStateDbOnly:true` reads the persisted store. Current App Server also supports native `archived`, `cwd`, `searchTerm`, `sourceKinds`, and sort filters. | | `discover` (`list --unmanaged`) | `thread/list` state DB only | Lists persisted active Codex sessions that could be attached; asks for recently updated rows and does not resume or register them. | +| `models` | `config/read` + optional `model/list` | Reports current Codex model defaults and the App Server model catalog, including service-tier aliases such as user-facing `fast` to server-facing ids like `priority`. `--no-refresh` reads the registry cache plus current config defaults. | | `show` (`get`) | registry + optional `thread/read(includeTurns:true)` | Compact managed-thread summary with sync state and latest observed turn runtime/error state; optional transcript convenience. | | `transcript` (`tail`) | `thread/read(includeTurns:true)` | Persisted turn/item snapshot, not a full execution log. | | `watch` (`watch`) | raw app-server event stream, bounded by limit/timeout | Request/response bounded sample; a true infinite tail needs a subscription control-socket extension. | @@ -164,6 +166,8 @@ The client supports the full responder loop. v1 surfaces `waiting_on_approval` a - `lanes`: id, ref, ref_source/ref_payload/ref_mixer, handle (`@name` / `→ @project:name`), role, cwd, source (`own`|`attached`), status, pinned, created_at, updated_at, last_event_at. - `lane_sync_sources`: lane, sync state, source path/file identity, source size/mtime, parsed offsets, line count, last synced timestamp, error. - `lane_snapshots`: lane, display name, preview, cwd, source/model/session facts, latest event timestamp, latest turn id, transcript-partial flag. +- `model_catalog`: provider/model rows refreshed from App Server `model/list`, including reasoning efforts, service tiers, aliases, and first/last seen timestamps. +- `lane_model_settings`: per-lane model/provider/reasoning/service-tier provenance, distinguishing Dispatch-authored settings from configured defaults and observed metadata. - `triggers`: id, name, lane selector, when-spec (json), action-spec (json), guard-spec (json), enabled, last_fired_at. - `actions_log`: id, ts, lane, op, trigger_id?, request/decision, outcome — full audit of every send/action. diff --git a/docs/research/app-server-verification.md b/docs/research/app-server-verification.md index effec34..1f53e0b 100644 --- a/docs/research/app-server-verification.md +++ b/docs/research/app-server-verification.md @@ -59,7 +59,16 @@ Lifecycle/threads/turns: `thread/start resume fork read list loaded/list archive those when they match existing CLI/MCP semantics, then keep dispatch-side filters for managed/unmanaged and date predicates. - `turn/start` accepts `serviceTier` plus richer context/environment metadata. - Dispatch projects `service_tier` through configured `new` turns. + Dispatch resolves explicit `service_tier` values before projecting them + through configured `new` turns. +- `model/list` is the authoritative catalog for model ids, reasoning-effort + support, and service tiers. Prefer `serviceTiers` over the deprecated + `additionalSpeedTiers`; user-facing labels such as `fast` can map to a + server-facing tier id such as `priority` when the catalog advertises a tier + named `Fast`. +- `config/read` reports the current Codex defaults (model/provider, + reasoning effort, service tier). Dispatch records those defaults for output + truth but does not send omitted model/tier values just to mirror config. - `thread/resume` accepts `excludeTurns` / `initialTurnsPage`, useful for future live observation without heavy initial history hydration. diff --git a/docs/usage/README.md b/docs/usage/README.md index 2303951..8a285a7 100644 --- a/docs/usage/README.md +++ b/docs/usage/README.md @@ -35,6 +35,7 @@ Use this clean-machine smoke after installing or upgrading: ```bash dispatch doctor dispatch schema send +dispatch models --no-refresh dispatch up --json dispatch daemon status dispatch down --json @@ -199,11 +200,32 @@ developer_file = ".dispatch/instructions/reviewer.md" sandbox = "workspace-write" approval_policy = "on-request" developer_file = ".dispatch/instructions/builder.md" + +[presets.fast] +service_tier = "fast" +effort = "low" ``` Preset order matters: later presets win, and CLI flags win over presets. -When `service_tier` is configured, dispatch sends it to both `thread/start` and -the optional initial `turn/start` request. +Omit `model` unless you intentionally want Codex to use an explicit model. An +omitted model or service tier keeps the Codex default call shape; Dispatch still +records the configured default reported by `config/read` when it is available. + +Use `models` before pinning model or service-tier presets: + +```bash +uv run dispatch models +uv run dispatch models --no-refresh +uv run dispatch schema models +``` + +`models` refreshes from App Server `model/list` by default and reports the +configured default from `config/read`, each model's reasoning efforts, service +tiers, and aliases. For example, the user-facing `fast` alias resolves through +the advertised service tier named `Fast` and may send `serviceTier:"priority"` +to the App Server. If a requested tier is unavailable for the selected/default +model, `new` fails before starting the thread and prints the available tiers. +`--no-refresh` reads the local catalog cache plus current config defaults. Use `--goal` to create a native App Server goal before the initial message is sent. Slash commands in `--text` are not interpreted by dispatch; `--text "/goal ..."` @@ -473,6 +495,7 @@ uv run dispatch schema send uv run dispatch schema "list --unmanaged" uv run dispatch schema sync uv run dispatch schema watch +uv run dispatch schema models uv run dispatch schema "goal set" ``` @@ -490,8 +513,10 @@ uv run dispatch mcp MCP is grouped for agent ergonomics rather than one tool per op. Tools are grouped by workflow and safety boundary, for example thread read/write/destroy, trigger -read/write/destroy, and daemon read tools. Each grouped call chooses an `op` inside the -tool, and that op's arguments/schema still derive from the same contract registry. +read/write/destroy, and daemon read tools. The daemon read tool includes the +`models` op so agents can discover valid model/service-tier choices without +guessing. Each grouped call chooses an `op` inside the tool, and that op's +arguments/schema still derive from the same contract registry. Structured MCP outputs that identify a managed thread include the dispatch `ref`, full Codex id, title/handle, managed/source/status, and cwd when available. diff --git a/plugins/dispatch/README.md b/plugins/dispatch/README.md index 56eb3a4..cf919db 100644 --- a/plugins/dispatch/README.md +++ b/plugins/dispatch/README.md @@ -9,9 +9,12 @@ This workspace-local plugin exposes: The MCP server and skills expose the same derived operation registry as the CLI, including managed-thread creation/messaging, dispatch refs, persisted `tail`, -bounded live `watch`, native goals, triggers, schemas, and daemon status/log reads. +bounded live `watch`, native goals, triggers, schemas, model catalog reads, and +daemon status/log reads. `new --goal` creates native App Server goal state; `/goal ...` in message text is plain text and should not be used as a goal substitute. +Use `dispatch models` or the MCP daemon-read `models` op before pinning explicit +model/service-tier presets. Run `dispatch doctor` after installing or upgrading dispatch. It verifies the CLI entrypoints, Codex CLI/auth footprint, daemon socket/pidfile state, registry diff --git a/pyproject.toml b/pyproject.toml index f8aae4b..83b944e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "outfitter-dispatch" -version = "0.4.1" +version = "0.5.0" description = "Local control plane for orchestrating Codex agent lanes over the Codex App Server." readme = "README.md" requires-python = ">=3.13" diff --git a/skills/dispatch/SKILL.md b/skills/dispatch/SKILL.md index 088bff0..b360b7c 100644 --- a/skills/dispatch/SKILL.md +++ b/skills/dispatch/SKILL.md @@ -28,6 +28,7 @@ The current canonical operator grammar is: - daemon process: `up`, `down` - daemon reads: `daemon status`, `daemon log` - registry recovery: `registry migrate` +- model catalog: `models` - thread lifecycle/read/search: `new`, `attach`, `list`, `list --unmanaged`, `get`, `sync`, `tail`, `watch`, `search` - thread actions: `rename`, `archive`, `restore` @@ -93,6 +94,20 @@ that shape so agents do not create a thread that only looks goal-driven. use `get` to check `latest_turn`, `tail` for persisted history, or `watch` for a bounded live sample. +Before choosing explicit `--model`, `--model-provider`, or `--service-tier` +values, ask dispatch for the live catalog: + +```bash +uv run dispatch models +uv run dispatch models --no-refresh +uv run dispatch schema models +``` + +Omit model/tier values when Codex defaults are acceptable. If a preset uses a +user-facing tier such as `fast`, Dispatch resolves it through `model/list` +service tiers before starting the thread. Do not guess current model ids from +memory; use the catalog output and its `aliases` field. + Attached lanes are existing desktop Codex threads registered by raw thread id: ```bash @@ -289,6 +304,7 @@ Use `schema` for derived input/output schemas: ```bash uv run dispatch schema send uv run dispatch schema "list --unmanaged" +uv run dispatch schema models uv run dispatch schema "goal set" ``` @@ -307,6 +323,8 @@ The MCP surface is grouped for agent ergonomics, not one tool per CLI subcommand. Tools are grouped by workflow and safety boundary, and each call selects an `op` inside the tool. In this repo, the workspace-local Codex plugin lives at `plugins/dispatch`. It exposes these skills and the same MCP registry. +Use the daemon-read MCP tool's `models` op before setting explicit model or +service-tier arguments. If the plugin does not appear immediately, restart Codex for the workspace. Installed PyPI packages also include read-only copies of these skills and the plugin bundle under `outfitter.dispatch.assets`; use the repo copies for editing. diff --git a/src/outfitter/dispatch/client/client.py b/src/outfitter/dispatch/client/client.py index 1d59e99..104037f 100644 --- a/src/outfitter/dispatch/client/client.py +++ b/src/outfitter/dispatch/client/client.py @@ -18,14 +18,17 @@ from .errors import ClientError, TransportError from .events import LaneEvent from .models import ( + AppModel, ApprovalPolicy, ApprovalsReviewer, ClientInfo, + ConfigInfo, Decision, Effort, InitializeParams, InitializeResult, InjectItemsParams, + ModelListResult, Personality, ReasoningSummary, SandboxPolicy, @@ -164,6 +167,17 @@ async def initialize( await self._notify("initialized", {}) return InitializeResult.model_validate(result) + # --- config/models --------------------------------------------------------- + + async def config_read(self) -> ConfigInfo: + result = await self._request("config/read", {}) + payload = result.get("config") if isinstance(result.get("config"), dict) else result + return ConfigInfo.model_validate(payload) + + async def model_list(self) -> list[AppModel]: + result = await self._request("model/list", {}) + return ModelListResult.model_validate(result).data + # --- threads -------------------------------------------------------------- async def thread_start( diff --git a/src/outfitter/dispatch/client/models.py b/src/outfitter/dispatch/client/models.py index f67a6e4..2c9e3b6 100644 --- a/src/outfitter/dispatch/client/models.py +++ b/src/outfitter/dispatch/client/models.py @@ -14,9 +14,9 @@ from __future__ import annotations -from typing import Literal +from typing import Any, Literal -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, field_validator from pydantic.alias_generators import to_camel ThreadSandbox = Literal["read-only", "workspace-write", "danger-full-access"] @@ -78,6 +78,65 @@ class InitializeResult(WireModel): platform_os: str | None = None +# --- config/model catalog ------------------------------------------------------ + + +class ConfigInfo(WireModel): + """Subset of ``config/read`` used for model/service-tier defaults.""" + + model: str | None = None + model_provider: str | None = None + service_tier: str | None = None + model_reasoning_effort: str | None = None + + +class ModelServiceTier(WireModel): + id: str + name: str + description: str + + +class AppModel(WireModel): + """Subset of one ``model/list`` row. + + ``additionalSpeedTiers`` is deprecated by the app-server schema, but kept as + a fallback for older binaries. ``serviceTiers`` is the canonical source. + """ + + id: str + model: str | None = None + display_name: str | None = None + name: str | None = None + description: str | None = None + is_default: bool | None = None + hidden: bool | None = None + default_reasoning_effort: str | None = None + supported_reasoning_efforts: list[str] = Field(default_factory=list) + default_service_tier: str | None = None + service_tiers: list[ModelServiceTier] = Field(default_factory=list) + additional_speed_tiers: list[str] = Field(default_factory=list) + + @field_validator("supported_reasoning_efforts", mode="before") + @classmethod + def _normalize_supported_reasoning_efforts(cls, value: Any) -> list[str]: + if not isinstance(value, list): + return [] + efforts: list[str] = [] + for item in value: + if isinstance(item, str): + efforts.append(item) + elif isinstance(item, dict): + effort = item.get("reasoningEffort") or item.get("effort") or item.get("id") + if isinstance(effort, str): + efforts.append(effort) + return efforts + + +class ModelListResult(WireModel): + data: list[AppModel] = Field(default_factory=list) + next_cursor: str | None = None + + # --- shared shapes ------------------------------------------------------------ @@ -116,6 +175,9 @@ class ThreadInfo(WireModel): source: str | None = None thread_source: str | None = None model_provider: str | None = None + model: str | None = None + reasoning_effort: str | None = None + service_tier: str | None = None created_at: int | None = None updated_at: int | None = None turns: list[dict[str, object]] = Field(default_factory=list) diff --git a/src/outfitter/dispatch/contracts/context.py b/src/outfitter/dispatch/contracts/context.py index 270d777..ffecf98 100644 --- a/src/outfitter/dispatch/contracts/context.py +++ b/src/outfitter/dispatch/contracts/context.py @@ -22,8 +22,10 @@ from outfitter.dispatch.client.events import LaneEvent from outfitter.dispatch.client.models import ( + AppModel, ApprovalPolicy, ApprovalsReviewer, + ConfigInfo, Decision, Effort, Personality, @@ -47,6 +49,10 @@ class LaneClient(Protocol): """The App Server primitives handlers depend on (ADR-0006 DI seam).""" + async def config_read(self) -> ConfigInfo: ... + + async def model_list(self) -> list[AppModel]: ... + async def thread_start( self, cwd: str | None, diff --git a/src/outfitter/dispatch/contracts/derive_cli.py b/src/outfitter/dispatch/contracts/derive_cli.py index f94f192..3ea2720 100644 --- a/src/outfitter/dispatch/contracts/derive_cli.py +++ b/src/outfitter/dispatch/contracts/derive_cli.py @@ -54,6 +54,7 @@ class CliRoute: CliRoute(("rename",), "lane-rename", ("old", "new")), CliRoute(("archive",), "archive", ("target",)), CliRoute(("restore",), "restore", ("target",)), + CliRoute(("models",), "models"), CliRoute(("goal", "status"), "goal-get", ("lane",)), CliRoute(("goal", "clear"), "goal-clear", ("lane",)), CliRoute(("trigger", "add"), "trigger-add"), diff --git a/src/outfitter/dispatch/contracts/derive_mcp.py b/src/outfitter/dispatch/contracts/derive_mcp.py index ae523c4..69c724c 100644 --- a/src/outfitter/dispatch/contracts/derive_mcp.py +++ b/src/outfitter/dispatch/contracts/derive_mcp.py @@ -104,9 +104,9 @@ class _ToolGroup: ), _ToolGroup( name="dispatch_daemon_read", - summary="Read daemon health and audit log state.", + summary="Read daemon, model catalog, and audit log state.", intent="read", - actions=(("status", "status"), ("log", "log")), + actions=(("status", "status"), ("log", "log"), ("models", "models")), ), ) diff --git a/src/outfitter/dispatch/core/handlers.py b/src/outfitter/dispatch/core/handlers.py index 2163963..c8889d5 100644 --- a/src/outfitter/dispatch/core/handlers.py +++ b/src/outfitter/dispatch/core/handlers.py @@ -34,9 +34,19 @@ ValidationError, project_error, ) -from outfitter.dispatch.registry.models import Lane, LaneSource, LaneStatus, LaneSync, SyncState +from outfitter.dispatch.registry.models import ( + Lane, + LaneModelSettings, + LaneSource, + LaneStatus, + LaneSync, + ModelCatalogEntry, + ServiceTierSource, + SyncState, +) from . import queue +from .model_registry import refresh_model_catalog, resolve_model_settings from .models import ( ActionAck, ActionView, @@ -63,6 +73,11 @@ LatestTurnView, LogInput, LogOutput, + ModelCatalogItem, + ModelCatalogOutput, + ModelConfigView, + ModelServiceTierView, + ModelsInput, NewInput, NewLane, OpenInput, @@ -73,10 +88,12 @@ SearchMatch, SearchOutput, SendInput, + ServiceTierView, ShowInput, StatusInput, StatusOutput, ThreadActionRef, + ThreadModelView, ThreadTargetInput, TranscriptInput, TranscriptItem, @@ -195,9 +212,78 @@ def _latest_turn_view(lane: Lane) -> LatestTurnView: ) -def _list_item(lane: Lane, sync: LaneSync | None) -> LaneListItem: +def _model_view_from_values( + *, + provider: str | None = None, + model: str | None = None, + reasoning_effort: str | None = None, + requested_service_tier: str | None = None, + resolved_service_tier: str | None = None, + service_tier_name: str | None = None, + service_tier_source: ServiceTierSource = "unknown", +) -> ThreadModelView: + return ThreadModelView( + provider=provider, + model=model, + reasoning_effort=reasoning_effort, + service_tier=ServiceTierView( + requested=requested_service_tier, + resolved=resolved_service_tier, + name=service_tier_name, + source=service_tier_source, + ), + ) + + +def _model_view( + settings: LaneModelSettings | None = None, sync: LaneSync | None = None +) -> ThreadModelView: + if settings is not None: + return _model_view_from_values( + provider=settings.model_provider, + model=settings.model, + reasoning_effort=settings.reasoning_effort, + requested_service_tier=settings.requested_service_tier, + resolved_service_tier=settings.resolved_service_tier, + service_tier_name=settings.service_tier_name, + service_tier_source=settings.service_tier_source, + ) + if sync is not None: + return _model_view_from_values( + provider=sync.model_provider, + model=sync.model, + reasoning_effort=sync.reasoning_effort, + service_tier_source="observed" + if sync.model_provider or sync.model or sync.reasoning_effort + else "unknown", + ) + return ThreadModelView() + + +def _model_view_from_thread(thread: ThreadInfo) -> ThreadModelView: + has_model_data = any( + ( + thread.model_provider, + thread.model, + thread.reasoning_effort, + thread.service_tier, + ) + ) + return _model_view_from_values( + provider=thread.model_provider, + model=thread.model, + reasoning_effort=thread.reasoning_effort, + resolved_service_tier=thread.service_tier, + service_tier_source="observed" if has_model_data else "unknown", + ) + + +def _list_item(lane: Lane, sync: LaneSync | None, model: LaneModelSettings | None) -> LaneListItem: return LaneListItem( - **_ref(lane).model_dump(), sync=_sync_view(sync), latest_turn=_latest_turn_view(lane) + **_ref(lane).model_dump(), + sync=_sync_view(sync), + latest_turn=_latest_turn_view(lane), + model=_model_view(model, sync), ) @@ -281,6 +367,14 @@ async def new_lane(inp: NewInput, ctx: Ctx) -> NewLane: raise ValidationError("native goals require non-ephemeral threads") sandbox = settings.sandbox or "read-only" approval_policy = settings.approval_policy or "never" + resolved_model = await resolve_model_settings( + ctx, + model=settings.model, + model_provider=settings.model_provider, + reasoning_effort=settings.effort, + service_tier=settings.service_tier, + ) + explicit_service_tier = resolved_model.resolved_service_tier if settings.service_tier else None thread = await ctx.client.thread_start( cwd=str(resolved.cwd), sandbox=sandbox, @@ -289,7 +383,7 @@ async def new_lane(inp: NewInput, ctx: Ctx) -> NewLane: base_instructions=resolved.base_instructions, developer_instructions=resolved.developer_instructions, personality=settings.personality, - service_tier=settings.service_tier, + service_tier=explicit_service_tier, model=settings.model, model_provider=settings.model_provider, ephemeral=bool(settings.ephemeral), @@ -297,6 +391,8 @@ async def new_lane(inp: NewInput, ctx: Ctx) -> NewLane: lane = await ctx.registry.add_lane( id=thread.id, handle=resolved.handle, source="own", cwd=str(resolved.cwd), status="idle" ) + lane_model = resolved_model.for_lane(lane.id, ctx.registry.now_iso()) + await ctx.registry.upsert_lane_model_settings(lane_model) await ctx.registry.log_action("new", lane=lane.id, detail=resolved.display_name) try: await ctx.client.thread_set_name(thread.id, resolved.display_name) @@ -331,7 +427,7 @@ async def new_lane(inp: NewInput, ctx: Ctx) -> NewLane: effort=settings.effort, summary=settings.summary, model=settings.model, - service_tier=settings.service_tier, + service_tier=explicit_service_tier, output_schema=settings.output_schema, personality=settings.personality, ) @@ -353,6 +449,7 @@ async def new_lane(inp: NewInput, ctx: Ctx) -> NewLane: message_accepted=message_accepted, goal_set=goal_set, latest_turn=_latest_turn_view(lane), + model=_model_view(lane_model), ) @@ -398,6 +495,7 @@ async def attach_lane(inp: AttachInput, ctx: Ctx) -> LaneRef: audit_op="attach", audit_detail=handle, ) + await _record_observed_model(lane, thread, sync, ctx) ctx.log.info("lane.attach", lane=lane.id, handle=handle) return _ref(lane) @@ -416,7 +514,36 @@ async def _sync_lane( lane: Lane, ctx: Ctx, *, full: bool, metadata: ThreadInfo | None = None ) -> LaneSync: thread = metadata or await _read_thread_metadata(ctx, lane.id) - return await ctx.registry.upsert_lane_sync(await _sync_from_thread(lane.id, thread, full=full)) + sync = await ctx.registry.upsert_lane_sync(await _sync_from_thread(lane.id, thread, full=full)) + await _record_observed_model(lane, thread, sync, ctx) + return sync + + +async def _record_observed_model( + lane: Lane, thread: ThreadInfo, sync: LaneSync, ctx: Ctx +) -> LaneModelSettings | None: + existing = await ctx.registry.get_lane_model_settings(lane.id) + if existing is not None and existing.service_tier_source == "dispatch": + return existing + provider = sync.model_provider or thread.model_provider + model = sync.model or thread.model + reasoning_effort = sync.reasoning_effort or thread.reasoning_effort + service_tier = thread.service_tier + if not any((provider, model, reasoning_effort, service_tier)): + return existing + observed = LaneModelSettings( + lane=lane.id, + model_provider=provider, + model=model, + reasoning_effort=reasoning_effort, + requested_service_tier=None, + resolved_service_tier=service_tier, + service_tier_name=None, + service_tier_source="observed", + updated_at=ctx.registry.now_iso(), + ) + await ctx.registry.upsert_lane_model_settings(observed) + return observed async def _sync_from_thread(lane_id: str, thread: ThreadInfo, *, full: bool) -> LaneSync: @@ -493,8 +620,8 @@ def _metadata_sync( source=source or thread.source, thread_source=thread_source or thread.thread_source, model_provider=model_provider or thread.model_provider, - model=model, - reasoning_effort=reasoning_effort, + model=model or thread.model, + reasoning_effort=reasoning_effort or thread.reasoning_effort, session_id=session_id or thread.session_id, latest_event_at=latest_event_at, latest_turn_id=latest_turn_id, @@ -609,6 +736,7 @@ async def show(inp: ShowInput, ctx: Ctx) -> LaneDetail: raise NotFoundError(f"no managed thread {inp.lane!r}") lane = resolved.lane sync = await ctx.registry.get_lane_sync(lane.id) + model_settings = await ctx.registry.get_lane_model_settings(lane.id) transcript: list[TranscriptItem] = [] if inp.include_transcript: result = await ctx.client.thread_read(lane.id, include_turns=True) @@ -623,6 +751,7 @@ async def show(inp: ShowInput, ctx: Ctx) -> LaneDetail: active_turn_id=lane.active_turn_id, latest_turn=_latest_turn_view(lane), sync=_sync_view(sync), + model=_model_view(model_settings, sync), transcript=transcript, ) @@ -630,10 +759,15 @@ async def show(inp: ShowInput, ctx: Ctx) -> LaneDetail: async def sync_lane(inp: LaneSyncInput, ctx: Ctx) -> LaneSyncResult: lane = await _resolve(ctx, inp.lane) sync = await _sync_lane(lane, ctx, full=inp.full) + model_settings = await ctx.registry.get_lane_model_settings(lane.id) await ctx.registry.log_action( "sync", lane=lane.id, detail=f"state={sync.state}; full={inp.full}" ) - return LaneSyncResult(**_managed_identity(lane), sync=_sync_view(sync)) + return LaneSyncResult( + **_managed_identity(lane), + sync=_sync_view(sync), + model=_model_view(model_settings, sync), + ) async def rename_lane(inp: LaneRenameInput, ctx: Ctx) -> ThreadActionRef: @@ -1049,6 +1183,14 @@ async def goal_clear(inp: GoalClearInput, ctx: Ctx) -> GoalView: async def fork(inp: ForkInput, ctx: Ctx) -> LaneRef: source = await _resolve(ctx, inp.lane) _require_writable(source) + resolved_model = await resolve_model_settings( + ctx, + model=inp.model, + model_provider=inp.model_provider, + reasoning_effort=None, + service_tier=inp.service_tier, + ) + explicit_service_tier = resolved_model.resolved_service_tier if inp.service_tier else None thread = await ctx.client.thread_fork( source.id, cwd=inp.cwd or source.cwd, @@ -1057,7 +1199,7 @@ async def fork(inp: ForkInput, ctx: Ctx) -> LaneRef: approvals_reviewer=inp.approvals_reviewer, base_instructions=inp.base_instructions, developer_instructions=inp.developer_instructions, - service_tier=inp.service_tier, + service_tier=explicit_service_tier, model=inp.model, model_provider=inp.model_provider, ephemeral=inp.ephemeral, @@ -1070,6 +1212,9 @@ async def fork(inp: ForkInput, ctx: Ctx) -> LaneRef: cwd=thread.cwd or inp.cwd or source.cwd, status="idle", ) + await ctx.registry.upsert_lane_model_settings( + resolved_model.for_lane(lane.id, ctx.registry.now_iso()) + ) await ctx.registry.log_action("fork", lane=lane.id, detail=f"from {source.id}") try: await ctx.client.thread_set_name(thread.id, handle.removeprefix("@")) @@ -1099,7 +1244,10 @@ async def compact(inp: CompactInput, ctx: Ctx) -> ActionAck: async def roster(inp: RosterInput, ctx: Ctx) -> Roster: lanes = await ctx.registry.list_lanes(include_archived=inp.include_archived) syncs = await ctx.registry.get_lane_sync_many([lane.id for lane in lanes]) - return Roster(lanes=[_list_item(lane, syncs.get(lane.id)) for lane in lanes]) + models = await ctx.registry.get_lane_model_settings_many([lane.id for lane in lanes]) + return Roster( + lanes=[_list_item(lane, syncs.get(lane.id), models.get(lane.id)) for lane in lanes] + ) def _short(text: str | None, limit: int = _PREVIEW_MAX) -> str | None: @@ -1120,6 +1268,7 @@ def _session(thread: ThreadInfo) -> DiscoveredSession: status=thread.status.type if thread.status is not None else None, source=thread.source, ephemeral=thread.ephemeral, + model=_model_view_from_thread(thread), ) @@ -1137,6 +1286,66 @@ async def discover(inp: DiscoverInput, ctx: Ctx) -> Discovery: return Discovery(sessions=[_session(thread) for thread in threads]) +async def models(inp: ModelsInput, ctx: Ctx) -> ModelCatalogOutput: + refreshed_at: str | None + if inp.refresh: + snapshot = await refresh_model_catalog(ctx) + entries = snapshot.models + config = snapshot.config + refreshed_at = snapshot.refreshed_at + source = "app-server" + else: + config = await ctx.client.config_read() + entries = await ctx.registry.list_model_catalog() + source = "registry" + refreshed_at = max((entry.last_seen_at for entry in entries), default=None) + if not inp.include_hidden: + entries = [entry for entry in entries if not entry.hidden] + return ModelCatalogOutput( + refreshed_at=refreshed_at, + source=source, + configured_default=ModelConfigView( + model=config.model, + model_provider=config.model_provider, + service_tier=config.service_tier, + model_reasoning_effort=config.model_reasoning_effort, + ), + models=[_model_catalog_item(entry) for entry in entries], + ) + + +def _model_catalog_item(entry: ModelCatalogEntry) -> ModelCatalogItem: + aliases: dict[str, str] = {} + for tier in entry.service_tiers: + if tier.name.lower() == "fast": + aliases["fast"] = tier.id + if "fast" not in aliases and any( + tier.lower() == "fast" for tier in entry.additional_speed_tiers + ): + aliases["fast"] = "fast" + return ModelCatalogItem( + id=entry.id, + provider=entry.provider, + display_name=entry.display_name, + description=entry.description, + is_default=entry.is_default, + hidden=entry.hidden, + default_reasoning_effort=entry.default_reasoning_effort, + supported_reasoning_efforts=entry.supported_reasoning_efforts, + default_service_tier=entry.default_service_tier, + service_tiers=[ + ModelServiceTierView( + id=tier.id, + name=tier.name, + description=tier.description, + ) + for tier in entry.service_tiers + ], + aliases=aliases, + last_seen_at=entry.last_seen_at, + ) + + async def archive(inp: ThreadTargetInput, ctx: Ctx) -> ThreadActionRef: thread_id, lane = await _resolve_thread_target(ctx, inp.target) try: diff --git a/src/outfitter/dispatch/core/model_registry.py b/src/outfitter/dispatch/core/model_registry.py new file mode 100644 index 0000000..a6c398c --- /dev/null +++ b/src/outfitter/dispatch/core/model_registry.py @@ -0,0 +1,177 @@ +"""App-server-backed model catalog and service-tier resolution. + +The Codex product may expose a user-facing tier like ``fast`` while the +app-server/API service tier is ``priority``. Keep that mapping in one place and +source it from ``model/list`` instead of hard-coded model names. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from outfitter.dispatch.client.models import AppModel, ConfigInfo +from outfitter.dispatch.contracts.context import Ctx +from outfitter.dispatch.contracts.errors import ValidationError +from outfitter.dispatch.registry.models import ( + LaneModelSettings, + ModelCatalogEntry, + ServiceTierEntry, + ServiceTierSource, +) + +_NEUTRAL_SERVICE_TIERS = {"auto", "default"} + + +@dataclass(frozen=True) +class ModelCatalogSnapshot: + models: list[ModelCatalogEntry] + config: ConfigInfo + refreshed_at: str + + +@dataclass(frozen=True) +class ResolvedModelSettings: + model_provider: str | None + model: str | None + reasoning_effort: str | None + requested_service_tier: str | None + resolved_service_tier: str | None + service_tier_name: str | None + service_tier_source: ServiceTierSource + + def for_lane(self, lane_id: str, updated_at: str) -> LaneModelSettings: + return LaneModelSettings( + lane=lane_id, + model_provider=self.model_provider, + model=self.model, + reasoning_effort=self.reasoning_effort, + requested_service_tier=self.requested_service_tier, + resolved_service_tier=self.resolved_service_tier, + service_tier_name=self.service_tier_name, + service_tier_source=self.service_tier_source, + updated_at=updated_at, + ) + + +async def refresh_model_catalog(ctx: Ctx, *, source: str = "app-server") -> ModelCatalogSnapshot: + config = await ctx.client.config_read() + models = await ctx.client.model_list() + now = ctx.registry.now_iso() + entries = [ + _catalog_entry(model, config=config, refreshed_at=now, source=source) for model in models + ] + await ctx.registry.upsert_model_catalog(entries) + return ModelCatalogSnapshot(models=entries, config=config, refreshed_at=now) + + +async def resolve_model_settings( + ctx: Ctx, + *, + model: str | None, + model_provider: str | None, + reasoning_effort: str | None, + service_tier: str | None, +) -> ResolvedModelSettings: + if service_tier is None: + config = await ctx.client.config_read() + configured_tier = config.service_tier + return ResolvedModelSettings( + model_provider=model_provider or config.model_provider, + model=model or config.model, + reasoning_effort=reasoning_effort or config.model_reasoning_effort, + requested_service_tier=None, + resolved_service_tier=configured_tier, + service_tier_name=None, + service_tier_source="configured_default" if configured_tier else "unknown", + ) + + snapshot = await refresh_model_catalog(ctx) + provider = model_provider or snapshot.config.model_provider or "openai" + resolved_model = model or snapshot.config.model + if resolved_model is None: + raise ValidationError( + f"service_tier {service_tier!r} needs a model; set --model or configure a Codex default" + ) + + entry = _find_model(snapshot.models, resolved_model, provider=provider) + if entry is None: + available = ", ".join(item.id for item in snapshot.models) or "none" + raise ValidationError(f"unknown model {resolved_model!r}; available models: {available}") + + resolved_tier, tier_name = _resolve_service_tier(entry, service_tier) + return ResolvedModelSettings( + model_provider=provider, + model=resolved_model, + reasoning_effort=reasoning_effort or snapshot.config.model_reasoning_effort, + requested_service_tier=service_tier, + resolved_service_tier=resolved_tier, + service_tier_name=tier_name, + service_tier_source="dispatch", + ) + + +def _catalog_entry( + model: AppModel, *, config: ConfigInfo, refreshed_at: str, source: str +) -> ModelCatalogEntry: + return ModelCatalogEntry( + id=model.id, + provider=config.model_provider or "openai", + display_name=model.display_name or model.name, + description=model.description, + is_default=model.is_default, + hidden=model.hidden, + default_reasoning_effort=model.default_reasoning_effort, + supported_reasoning_efforts=model.supported_reasoning_efforts, + default_service_tier=model.default_service_tier, + service_tiers=[ + ServiceTierEntry(id=tier.id, name=tier.name, description=tier.description) + for tier in model.service_tiers + ], + additional_speed_tiers=model.additional_speed_tiers, + first_seen_at=refreshed_at, + last_seen_at=refreshed_at, + source=source, + ) + + +def _find_model( + models: list[ModelCatalogEntry], model_id: str, *, provider: str +) -> ModelCatalogEntry | None: + for model in models: + if model.id == model_id and model.provider == provider: + return model + for model in models: + if model.id == model_id: + return model + return None + + +def _resolve_service_tier(model: ModelCatalogEntry, requested: str) -> tuple[str, str | None]: + requested_norm = requested.lower() + if requested_norm in _NEUTRAL_SERVICE_TIERS: + return requested_norm, requested_norm.title() + + for tier in model.service_tiers: + if tier.id.lower() == requested_norm or tier.name.lower() == requested_norm: + return tier.id, tier.name + + if requested_norm == "fast": + for tier in model.service_tiers: + if tier.name.lower() == "fast": + return tier.id, tier.name + if any(tier.lower() == "fast" for tier in model.additional_speed_tiers): + return "fast", "Fast" + + available = _available_tiers(model) + raise ValidationError( + f"model {model.id!r} does not advertise service_tier {requested!r}; " + f"available service tiers: {available}" + ) + + +def _available_tiers(model: ModelCatalogEntry) -> str: + tiers = [f"{tier.id} ({tier.name})" for tier in model.service_tiers] + if model.additional_speed_tiers: + tiers.extend(f"{tier} (deprecated speed tier)" for tier in model.additional_speed_tiers) + tiers.extend(sorted(_NEUTRAL_SERVICE_TIERS)) + return ", ".join(tiers) if tiers else "none" diff --git a/src/outfitter/dispatch/core/models.py b/src/outfitter/dispatch/core/models.py index 0d52015..7ed89f1 100644 --- a/src/outfitter/dispatch/core/models.py +++ b/src/outfitter/dispatch/core/models.py @@ -19,6 +19,7 @@ from outfitter.dispatch.registry.models import ( LaneSource, LaneStatus, + ServiceTierSource, SyncState, TurnRuntimeStatus, ) @@ -238,6 +239,13 @@ class DiscoverInput(BaseModel): limit: int = Field(default=50, ge=1, description="Max persisted Codex sessions to list.") +class ModelsInput(BaseModel): + refresh: bool = Field( + default=True, description="Refresh from the Codex App Server before returning the catalog." + ) + include_hidden: bool = Field(default=False, description="Include hidden model catalog rows.") + + # --- outputs ------------------------------------------------------------------ @@ -298,9 +306,24 @@ class LatestTurnView(BaseModel): error_at: str | None = Field(default=None, description="When the latest turn error occurred.") +class ServiceTierView(BaseModel): + requested: str | None = None + resolved: str | None = None + name: str | None = None + source: ServiceTierSource = "unknown" + + +class ThreadModelView(BaseModel): + provider: str | None = None + model: str | None = None + reasoning_effort: str | None = None + service_tier: ServiceTierView = Field(default_factory=ServiceTierView) + + class LaneListItem(LaneRef): sync: LaneSyncView = Field(default_factory=LaneSyncView) latest_turn: LatestTurnView = Field(default_factory=LatestTurnView) + model: ThreadModelView = Field(default_factory=ThreadModelView) class NewLane(LaneRef): @@ -311,12 +334,14 @@ class NewLane(LaneRef): default=False, description="Whether a native App Server goal was set before launch." ) latest_turn: LatestTurnView = Field(default_factory=LatestTurnView) + model: ThreadModelView = Field(default_factory=ThreadModelView) class LaneDetail(LaneRef): active_turn_id: str | None = None latest_turn: LatestTurnView = Field(default_factory=LatestTurnView) sync: LaneSyncView = Field(default_factory=LaneSyncView) + model: ThreadModelView = Field(default_factory=ThreadModelView) transcript: list[TranscriptItem] = Field(default_factory=list) @@ -365,6 +390,7 @@ class ActionAck(ManagedThreadIdentity): class LaneSyncResult(ManagedThreadIdentity): sync: LaneSyncView + model: ThreadModelView = Field(default_factory=ThreadModelView) class Roster(BaseModel): @@ -382,6 +408,7 @@ class DiscoveredSession(BaseModel): status: str | None = None source: str | None = None ephemeral: bool | None = None + model: ThreadModelView = Field(default_factory=ThreadModelView) class Discovery(BaseModel): @@ -411,6 +438,41 @@ class SearchOutput(BaseModel): experimental: bool = True +class ModelServiceTierView(BaseModel): + id: str + name: str + description: str + + +class ModelCatalogItem(BaseModel): + id: str + provider: str + display_name: str | None = None + description: str | None = None + is_default: bool | None = None + hidden: bool | None = None + default_reasoning_effort: str | None = None + supported_reasoning_efforts: list[str] = Field(default_factory=list) + default_service_tier: str | None = None + service_tiers: list[ModelServiceTierView] = Field(default_factory=list) + aliases: dict[str, str] = Field(default_factory=dict) + last_seen_at: str + + +class ModelConfigView(BaseModel): + model: str | None = None + model_provider: str | None = None + service_tier: str | None = None + model_reasoning_effort: str | None = None + + +class ModelCatalogOutput(BaseModel): + refreshed_at: str | None = None + source: str + configured_default: ModelConfigView + models: list[ModelCatalogItem] + + # --- trigger ops -------------------------------------------------------------- TriggerWhenKind = Literal["interval", "cron", "idle_for", "turn_completed", "waiting_on_approval"] diff --git a/src/outfitter/dispatch/core/ops.py b/src/outfitter/dispatch/core/ops.py index 694bc25..0437edd 100644 --- a/src/outfitter/dispatch/core/ops.py +++ b/src/outfitter/dispatch/core/ops.py @@ -29,6 +29,8 @@ LaneSyncResult, LogInput, LogOutput, + ModelCatalogOutput, + ModelsInput, NewInput, NewLane, OpenInput, @@ -106,6 +108,17 @@ "error": None, "error_at": None, }, + "model": { + "provider": "openai", + "model": "gpt-5.5", + "reasoning_effort": "xhigh", + "service_tier": { + "requested": None, + "resolved": None, + "name": None, + "source": "unknown", + }, + }, }, ) ], @@ -273,6 +286,33 @@ ], ) +MODELS = define_op( + id="models", + summary="List available Codex models and service tiers from the App Server catalog.", + input=ModelsInput, + output=ModelCatalogOutput, + intent="read", + idempotent=True, + handler=handlers.models, + examples=[ + Example( + "empty", + input={"refresh": False}, + output={ + "refreshed_at": None, + "source": "registry", + "configured_default": { + "model": "gpt-5.5", + "model_provider": "openai", + "service_tier": None, + "model_reasoning_effort": "xhigh", + }, + "models": [], + }, + ) + ], +) + ARCHIVE = define_op( id="archive", summary="Archive a managed or unmanaged Codex thread.", @@ -494,6 +534,7 @@ ROSTER, DISCOVER, SEARCH, + MODELS, ARCHIVE, RESTORE, GOAL_GET, diff --git a/src/outfitter/dispatch/doctor.py b/src/outfitter/dispatch/doctor.py index c5b7b8a..19db6e2 100644 --- a/src/outfitter/dispatch/doctor.py +++ b/src/outfitter/dispatch/doctor.py @@ -302,6 +302,8 @@ def _registry_check() -> DoctorCheck: "queued_messages", "lane_sync_sources", "lane_snapshots", + "model_catalog", + "lane_model_settings", } data.update( { @@ -343,18 +345,6 @@ def _registry_check() -> DoctorCheck: ), data=data, ) - if missing: - return DoctorCheck( - name="registry", - status="fail", - summary="registry is missing required tables", - detail=", ".join(missing), - recovery=( - "Run `dispatch down`, `dispatch registry migrate`, then `dispatch up`. " - "If migration fails, inspect the backup path from `registry migrate`." - ), - data=data, - ) if version < SCHEMA_VERSION: return DoctorCheck( name="registry", @@ -367,6 +357,18 @@ def _registry_check() -> DoctorCheck: ), data=data, ) + if missing: + return DoctorCheck( + name="registry", + status="fail", + summary="registry is missing required tables", + detail=", ".join(missing), + recovery=( + "Run `dispatch down`, `dispatch registry migrate`, then `dispatch up`. " + "If migration fails, inspect the backup path from `registry migrate`." + ), + data=data, + ) return DoctorCheck(name="registry", status="ok", summary="registry is readable", data=data) diff --git a/src/outfitter/dispatch/registry/models.py b/src/outfitter/dispatch/registry/models.py index ebde865..79e7eb2 100644 --- a/src/outfitter/dispatch/registry/models.py +++ b/src/outfitter/dispatch/registry/models.py @@ -12,6 +12,42 @@ TurnRuntimeStatus = Literal["started", "completed", "failed"] SyncState = Literal["unknown", "metadata", "partial", "complete", "error"] QueuedMessageStatus = Literal["pending", "sending", "sent", "error"] +ServiceTierSource = Literal["dispatch", "configured_default", "observed", "unknown"] + + +class ServiceTierEntry(BaseModel): + id: str + name: str + description: str + + +class ModelCatalogEntry(BaseModel): + id: str + provider: str = "openai" + display_name: str | None = None + description: str | None = None + is_default: bool | None = None + hidden: bool | None = None + default_reasoning_effort: str | None = None + supported_reasoning_efforts: list[str] = Field(default_factory=list) + default_service_tier: str | None = None + service_tiers: list[ServiceTierEntry] = Field(default_factory=list) + additional_speed_tiers: list[str] = Field(default_factory=list) + first_seen_at: str + last_seen_at: str + source: str = "app-server" + + +class LaneModelSettings(BaseModel): + lane: str + model_provider: str | None = None + model: str | None = None + reasoning_effort: str | None = None + requested_service_tier: str | None = None + resolved_service_tier: str | None = None + service_tier_name: str | None = None + service_tier_source: ServiceTierSource = "unknown" + updated_at: str class Lane(BaseModel): diff --git a/src/outfitter/dispatch/registry/store.py b/src/outfitter/dispatch/registry/store.py index a678193..39ff919 100644 --- a/src/outfitter/dispatch/registry/store.py +++ b/src/outfitter/dispatch/registry/store.py @@ -7,6 +7,7 @@ from __future__ import annotations +import json from collections.abc import Callable from datetime import UTC, datetime from pathlib import Path @@ -20,17 +21,20 @@ ActionRecord, Guard, Lane, + LaneModelSettings, LaneSource, LaneStatus, LaneSync, + ModelCatalogEntry, QueuedMessage, + ServiceTierEntry, Trigger, WhenAdapter, ) from .refs import BASE58BTC_ALPHABET, CODEX_REF_SOURCE, codex_ref_payload, make_ref Clock = Callable[[], datetime] -SCHEMA_VERSION = 4 +SCHEMA_VERSION = 5 def _utcnow() -> datetime: @@ -119,6 +123,35 @@ def _utcnow() -> datetime: transcript_partial INTEGER NOT NULL DEFAULT 1, FOREIGN KEY(lane) REFERENCES lanes(id) ON DELETE CASCADE ); +CREATE TABLE IF NOT EXISTS model_catalog ( + id TEXT NOT NULL, + provider TEXT NOT NULL DEFAULT 'openai', + display_name TEXT, + description TEXT, + is_default INTEGER, + hidden INTEGER, + default_reasoning_effort TEXT, + supported_reasoning_efforts TEXT NOT NULL DEFAULT '[]', + default_service_tier TEXT, + service_tiers TEXT NOT NULL DEFAULT '[]', + additional_speed_tiers TEXT NOT NULL DEFAULT '[]', + first_seen_at TEXT NOT NULL, + last_seen_at TEXT NOT NULL, + source TEXT NOT NULL DEFAULT 'app-server', + PRIMARY KEY(provider, id) +); +CREATE TABLE IF NOT EXISTS lane_model_settings ( + lane TEXT PRIMARY KEY, + model_provider TEXT, + model TEXT, + reasoning_effort TEXT, + requested_service_tier TEXT, + resolved_service_tier TEXT, + service_tier_name TEXT, + service_tier_source TEXT NOT NULL DEFAULT 'unknown', + updated_at TEXT NOT NULL, + FOREIGN KEY(lane) REFERENCES lanes(id) ON DELETE CASCADE +); """ @@ -153,6 +186,9 @@ async def open(cls, path: str | Path = ":memory:", now: Clock = _utcnow) -> Regi async def close(self) -> None: await self._conn.close() + def now_iso(self) -> str: + return self._now().isoformat() + async def _migrate(self, user_version: int) -> None: if user_version < 3: await self._ensure_ref_columns() @@ -173,6 +209,8 @@ async def _migrate(self, user_version: int) -> None: ) if user_version < 4: await self._ensure_lane_runtime_columns() + if user_version < 5: + await self._ensure_model_registry_tables() async def _ensure_ref_columns(self) -> None: async with self._conn.execute("PRAGMA table_info(lanes)") as cur: @@ -196,6 +234,41 @@ async def _ensure_lane_runtime_columns(self) -> None: if name not in columns: await self._conn.execute(f"ALTER TABLE lanes ADD COLUMN {name} {definition}") + async def _ensure_model_registry_tables(self) -> None: + await self._conn.executescript( + """ + CREATE TABLE IF NOT EXISTS model_catalog ( + id TEXT NOT NULL, + provider TEXT NOT NULL DEFAULT 'openai', + display_name TEXT, + description TEXT, + is_default INTEGER, + hidden INTEGER, + default_reasoning_effort TEXT, + supported_reasoning_efforts TEXT NOT NULL DEFAULT '[]', + default_service_tier TEXT, + service_tiers TEXT NOT NULL DEFAULT '[]', + additional_speed_tiers TEXT NOT NULL DEFAULT '[]', + first_seen_at TEXT NOT NULL, + last_seen_at TEXT NOT NULL, + source TEXT NOT NULL DEFAULT 'app-server', + PRIMARY KEY(provider, id) + ); + CREATE TABLE IF NOT EXISTS lane_model_settings ( + lane TEXT PRIMARY KEY, + model_provider TEXT, + model TEXT, + reasoning_effort TEXT, + requested_service_tier TEXT, + resolved_service_tier TEXT, + service_tier_name TEXT, + service_tier_source TEXT NOT NULL DEFAULT 'unknown', + updated_at TEXT NOT NULL, + FOREIGN KEY(lane) REFERENCES lanes(id) ON DELETE CASCADE + ); + """ + ) + # --- lanes ---------------------------------------------------------------- async def add_lane( @@ -619,6 +692,111 @@ async def get_lane_sync_many(self, lane_ids: list[str]) -> dict[str, LaneSync]: rows = await cur.fetchall() return {sync.lane: sync for sync in (_row_to_lane_sync(row) for row in rows)} + # --- model catalog / lane model provenance --------------------------------- + + async def upsert_model_catalog(self, models: list[ModelCatalogEntry]) -> None: + for model in models: + existing = await self.get_model_catalog_entry(model.id, provider=model.provider) + first_seen_at = existing.first_seen_at if existing is not None else model.first_seen_at + await self._conn.execute( + "INSERT INTO model_catalog (id, provider, display_name, description, " + "is_default, hidden, default_reasoning_effort, supported_reasoning_efforts, " + "default_service_tier, service_tiers, additional_speed_tiers, first_seen_at, " + "last_seen_at, source) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) " + "ON CONFLICT(provider, id) DO UPDATE SET display_name = excluded.display_name, " + "description = excluded.description, is_default = excluded.is_default, " + "hidden = excluded.hidden, " + "default_reasoning_effort = excluded.default_reasoning_effort, " + "supported_reasoning_efforts = excluded.supported_reasoning_efforts, " + "default_service_tier = excluded.default_service_tier, " + "service_tiers = excluded.service_tiers, " + "additional_speed_tiers = excluded.additional_speed_tiers, " + "last_seen_at = excluded.last_seen_at, source = excluded.source", + ( + model.id, + model.provider, + model.display_name, + model.description, + _bool_or_none(model.is_default), + _bool_or_none(model.hidden), + model.default_reasoning_effort, + json.dumps(model.supported_reasoning_efforts), + model.default_service_tier, + json.dumps([tier.model_dump(mode="python") for tier in model.service_tiers]), + json.dumps(model.additional_speed_tiers), + first_seen_at, + model.last_seen_at, + model.source, + ), + ) + await self._conn.commit() + + async def list_model_catalog(self, provider: str | None = None) -> list[ModelCatalogEntry]: + query = "SELECT * FROM model_catalog" + params: tuple[str, ...] = () + if provider is not None: + query += " WHERE provider = ?" + params = (provider,) + query += " ORDER BY provider, hidden, id" + async with self._conn.execute(query, params) as cur: + rows = await cur.fetchall() + return [_row_to_model_catalog_entry(row) for row in rows] + + async def get_model_catalog_entry( + self, model_id: str, *, provider: str = "openai" + ) -> ModelCatalogEntry | None: + async with self._conn.execute( + "SELECT * FROM model_catalog WHERE provider = ? AND id = ?", + (provider, model_id), + ) as cur: + row = await cur.fetchone() + return _row_to_model_catalog_entry(row) if row is not None else None + + async def upsert_lane_model_settings(self, settings: LaneModelSettings) -> None: + await self._conn.execute( + "INSERT INTO lane_model_settings (lane, model_provider, model, reasoning_effort, " + "requested_service_tier, resolved_service_tier, service_tier_name, " + "service_tier_source, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) " + "ON CONFLICT(lane) DO UPDATE SET model_provider = excluded.model_provider, " + "model = excluded.model, reasoning_effort = excluded.reasoning_effort, " + "requested_service_tier = excluded.requested_service_tier, " + "resolved_service_tier = excluded.resolved_service_tier, " + "service_tier_name = excluded.service_tier_name, " + "service_tier_source = excluded.service_tier_source, updated_at = excluded.updated_at", + ( + settings.lane, + settings.model_provider, + settings.model, + settings.reasoning_effort, + settings.requested_service_tier, + settings.resolved_service_tier, + settings.service_tier_name, + settings.service_tier_source, + settings.updated_at, + ), + ) + await self._conn.commit() + + async def get_lane_model_settings(self, lane_id: str) -> LaneModelSettings | None: + async with self._conn.execute( + "SELECT * FROM lane_model_settings WHERE lane = ?", (lane_id,) + ) as cur: + row = await cur.fetchone() + return _row_to_lane_model_settings(row) if row is not None else None + + async def get_lane_model_settings_many( + self, lane_ids: list[str] + ) -> dict[str, LaneModelSettings]: + if not lane_ids: + return {} + placeholders = ", ".join("?" for _ in lane_ids) + async with self._conn.execute( + f"SELECT * FROM lane_model_settings WHERE lane IN ({placeholders})", tuple(lane_ids) + ) as cur: + rows = await cur.fetchall() + settings = (_row_to_lane_model_settings(row) for row in rows) + return {item.lane: item for item in settings} + # --- triggers ------------------------------------------------------------- async def add_trigger(self, trigger: Trigger) -> Trigger: @@ -718,6 +896,28 @@ def _row_dict(row: aiosqlite.Row) -> dict[str, object]: return dict(zip(row.keys(), tuple(row), strict=True)) +def _bool_or_none(value: bool | None) -> int | None: + return None if value is None else int(value) + + +def _json_str_list(value: object) -> list[str]: + if not value: + return [] + raw = json.loads(str(value)) + if not isinstance(raw, list): + return [] + return [str(item) for item in raw if isinstance(item, str)] + + +def _json_service_tiers(value: object) -> list[ServiceTierEntry]: + if not value: + return [] + raw = json.loads(str(value)) + if not isinstance(raw, list): + return [] + return [ServiceTierEntry.model_validate(item) for item in raw if isinstance(item, dict)] + + _LANE_SYNC_SELECT = """ SELECT src.lane AS lane, @@ -759,6 +959,20 @@ def _row_to_lane_sync(row: aiosqlite.Row) -> LaneSync: return LaneSync.model_validate(data) +def _row_to_model_catalog_entry(row: aiosqlite.Row) -> ModelCatalogEntry: + data = _row_dict(row) + data["is_default"] = None if data["is_default"] is None else bool(data["is_default"]) + data["hidden"] = None if data["hidden"] is None else bool(data["hidden"]) + data["supported_reasoning_efforts"] = _json_str_list(data["supported_reasoning_efforts"]) + data["service_tiers"] = _json_service_tiers(data["service_tiers"]) + data["additional_speed_tiers"] = _json_str_list(data["additional_speed_tiers"]) + return ModelCatalogEntry.model_validate(data) + + +def _row_to_lane_model_settings(row: aiosqlite.Row) -> LaneModelSettings: + return LaneModelSettings.model_validate(_row_dict(row)) + + def _row_to_trigger(row: aiosqlite.Row) -> Trigger: data = _row_dict(row) last_fired = data["last_fired_at"] diff --git a/tests/client/test_client.py b/tests/client/test_client.py index ccca6b3..588acd1 100644 --- a/tests/client/test_client.py +++ b/tests/client/test_client.py @@ -216,6 +216,54 @@ async def test_thread_list_reads_data_key( assert [t.id for t in threads] == ["a", "b"] +async def test_config_read_and_model_list_parse_current_catalog_shape( + client: tuple[AppServerClient, FakeTransport], +) -> None: + c, fake = client + fake.auto = _result_for( + "config/read", + { + "config": { + "model": "gpt-5.5", + "modelProvider": "openai", + "serviceTier": "priority", + "modelReasoningEffort": "xhigh", + } + }, + ) + config = await c.config_read() + assert config.model == "gpt-5.5" + assert config.model_provider == "openai" + assert config.service_tier == "priority" + assert config.model_reasoning_effort == "xhigh" + assert fake.sent[-1] == {"id": 1, "method": "config/read", "params": {}} + + fake.auto = _result_for( + "model/list", + { + "data": [ + { + "id": "gpt-5.5", + "defaultReasoningEffort": "xhigh", + "supportedReasoningEfforts": ["low", "xhigh"], + "serviceTiers": [ + { + "id": "priority", + "name": "Fast", + "description": "1.5x speed, increased usage", + } + ], + } + ] + }, + ) + models = await c.model_list() + assert models[0].id == "gpt-5.5" + assert models[0].service_tiers[0].id == "priority" + assert models[0].service_tiers[0].name == "Fast" + assert fake.sent[-1] == {"id": 2, "method": "model/list", "params": {}} + + async def test_thread_list_sends_current_native_filters( client: tuple[AppServerClient, FakeTransport], ) -> None: diff --git a/tests/client/test_models.py b/tests/client/test_models.py index ef336e7..b1190eb 100644 --- a/tests/client/test_models.py +++ b/tests/client/test_models.py @@ -3,7 +3,11 @@ from __future__ import annotations from outfitter.dispatch.client.models import ( + AppModel, + ConfigInfo, InitializeResult, + ModelListResult, + ModelServiceTier, SandboxPolicy, TextInput, ThreadCompactStartParams, @@ -163,6 +167,75 @@ def test_thread_info_keeps_sync_metadata_fields() -> None: assert thread.updated_at == 123 +def test_thread_info_keeps_observed_model_service_tier() -> None: + thread = ThreadInfo.model_validate( + { + "id": "t1", + "modelProvider": "openai", + "model": "gpt-5.5", + "reasoningEffort": "xhigh", + "serviceTier": "priority", + } + ) + + assert thread.model_provider == "openai" + assert thread.model == "gpt-5.5" + assert thread.reasoning_effort == "xhigh" + assert thread.service_tier == "priority" + + +def test_config_and_model_catalog_wire_models_accept_camel_case() -> None: + config = ConfigInfo.model_validate( + { + "model": "gpt-5.5", + "modelProvider": "openai", + "serviceTier": "priority", + "modelReasoningEffort": "xhigh", + } + ) + catalog = ModelListResult.model_validate( + { + "data": [ + { + "id": "gpt-5.5", + "displayName": "GPT-5.5", + "defaultReasoningEffort": "xhigh", + "supportedReasoningEfforts": [ + {"reasoningEffort": "low", "description": "faster"}, + {"reasoningEffort": "xhigh", "description": "deeper"}, + ], + "serviceTiers": [ + { + "id": "priority", + "name": "Fast", + "description": "1.5x speed, increased usage", + } + ], + "additionalSpeedTiers": ["fast"], + } + ] + } + ) + + assert config.model_provider == "openai" + assert catalog.data == [ + AppModel( + id="gpt-5.5", + display_name="GPT-5.5", + default_reasoning_effort="xhigh", + supported_reasoning_efforts=["low", "xhigh"], + service_tiers=[ + ModelServiceTier( + id="priority", + name="Fast", + description="1.5x speed, increased usage", + ) + ], + additional_speed_tiers=["fast"], + ) + ] + + def test_thread_read_include_turns_alias() -> None: params = ThreadReadParams(thread_id="t1", include_turns=True) assert params.model_dump(by_alias=True, exclude_none=True) == { diff --git a/tests/core/test_examples.py b/tests/core/test_examples.py index 0dd7f94..5c3abbc 100644 --- a/tests/core/test_examples.py +++ b/tests/core/test_examples.py @@ -24,6 +24,7 @@ async def test_registry_has_the_v1_ops() -> None: "roster", "discover", "search", + "models", "archive", "restore", "goal-get", diff --git a/tests/core/test_handlers.py b/tests/core/test_handlers.py index f030fc2..8f21773 100644 --- a/tests/core/test_handlers.py +++ b/tests/core/test_handlers.py @@ -39,6 +39,7 @@ LaneSyncInput, LaneTextInput, LogInput, + ModelsInput, NewInput, OpenInput, RollbackInput, @@ -92,7 +93,7 @@ async def test_new_lane_sets_name_and_sends_initial_turn(store: Registry, tmp_pa sandbox="workspace-write", approval_policy="on-request", effort="low", - model="test-model", + model="gpt-5.5", service_tier="priority", developer_instructions="stay focused", ), @@ -107,7 +108,7 @@ async def test_new_lane_sets_name_and_sends_initial_turn(store: Registry, tmp_pa name == "thread_start" and kw["sandbox"] == "workspace-write" and kw["approval_policy"] == "on-request" - and kw["model"] == "test-model" + and kw["model"] == "gpt-5.5" and kw["developer_instructions"] == "stay focused" for name, kw in client.calls ) @@ -125,6 +126,96 @@ async def test_new_lane_sets_name_and_sends_initial_turn(store: Registry, tmp_pa ) +async def test_new_lane_resolves_fast_service_tier_alias_and_records_provenance( + store: Registry, tmp_path: Path +) -> None: + client = FakeLaneClient() + ctx = make_ctx(store, client) + + out = await handlers.new_lane( + NewInput( + name="fast-worker", + cwd=str(tmp_path), + text="start", + model="gpt-5.5", + service_tier="fast", + ), + ctx, + ) + + assert out.model.model == "gpt-5.5" + assert out.model.service_tier.requested == "fast" + assert out.model.service_tier.resolved == "priority" + assert out.model.service_tier.name == "Fast" + assert out.model.service_tier.source == "dispatch" + assert any( + name == "thread_start" and kw["service_tier"] == "priority" for name, kw in client.calls + ) + assert any( + name == "turn_start" and kw["service_tier"] == "priority" for name, kw in client.calls + ) + stored = await store.get_lane_model_settings(out.id) + assert stored is not None + assert stored.requested_service_tier == "fast" + assert stored.resolved_service_tier == "priority" + assert stored.service_tier_source == "dispatch" + + +async def test_new_lane_without_model_override_preserves_codex_default_call_shape( + store: Registry, tmp_path: Path +) -> None: + client = FakeLaneClient() + ctx = make_ctx(store, client) + + out = await handlers.new_lane( + NewInput(name="default-worker", cwd=str(tmp_path), send=False), ctx + ) + + assert out.model.model == "gpt-5.5" + assert out.model.service_tier.source == "unknown" + call = next(kw for name, kw in client.calls if name == "thread_start") + assert call["model"] is None + assert call["model_provider"] is None + assert call["service_tier"] is None + + +async def test_new_lane_rejects_unadvertised_service_tier_with_catalog_guidance( + store: Registry, tmp_path: Path +) -> None: + client = FakeLaneClient() + ctx = make_ctx(store, client) + + with pytest.raises(ValidationError, match="available service tiers: auto, default"): + await handlers.new_lane( + NewInput( + name="spark", + cwd=str(tmp_path), + model="gpt-5.3-codex-spark", + service_tier="fast", + send=False, + ), + ctx, + ) + + +async def test_models_refreshes_catalog_and_reports_fast_alias(store: Registry) -> None: + client = FakeLaneClient() + ctx = make_ctx(store, client) + + refreshed = await handlers.models(ModelsInput(), ctx) + cached = await handlers.models(ModelsInput(refresh=False), ctx) + + assert refreshed.source == "app-server" + assert refreshed.configured_default.model == "gpt-5.5" + assert refreshed.models[0].id == "gpt-5.5" + assert refreshed.models[0].aliases == {"fast": "priority"} + assert refreshed.models[0].service_tiers[0].id == "priority" + assert cached.source == "registry" + cached_by_id = {model.id: model for model in cached.models} + assert cached_by_id["gpt-5.5"].aliases == {"fast": "priority"} + assert [name for name, _ in client.calls].count("model_list") == 1 + + async def test_new_lane_no_send_registers_without_turn(store: Registry, tmp_path: Path) -> None: client = FakeLaneClient() ctx = make_ctx(store, client) @@ -914,6 +1005,7 @@ async def test_attach_with_sync_indexes_jsonl_and_roster_reports_state( "path": str(path), "sessionId": "T9", "modelProvider": "openai", + "serviceTier": "priority", } } ctx = make_ctx(store, client) @@ -926,8 +1018,19 @@ async def test_attach_with_sync_indexes_jsonl_and_roster_reports_state( assert detail.sync.state == "partial" assert detail.sync.latest_turn_id == "turn-1" assert detail.sync.source_size == path.stat().st_size + assert detail.model.model == "test-model" + assert detail.model.service_tier.resolved == "priority" + assert detail.model.service_tier.source == "observed" assert roster.lanes[0].sync.state == "partial" assert roster.lanes[0].sync.latest_event_at == "2026-06-05T10:00:02.000Z" + assert roster.lanes[0].model.model == "test-model" + stored = await store.get_lane_model_settings("T9") + assert stored is not None + assert stored.model_provider == "openai" + assert stored.model == "test-model" + assert stored.reasoning_effort == "low" + assert stored.resolved_service_tier == "priority" + assert stored.service_tier_source == "observed" assert sum(1 for name, _ in client.calls if name == "thread_read") == 1 assert not any(name == "thread_resume" for name, _ in client.calls) @@ -938,7 +1041,16 @@ async def test_lane_sync_can_full_scan_existing_lane(store: Registry, tmp_path: '{"type":"session_meta","timestamp":"2026-06-05T10:00:00.000Z","payload":{"id":"T9"}}\n' ) client = FakeLaneClient() - client.read_result = {"thread": {"id": "T9", "path": str(path)}} + client.read_result = { + "thread": { + "id": "T9", + "path": str(path), + "modelProvider": "openai", + "model": "gpt-5.5", + "reasoningEffort": "xhigh", + "serviceTier": "priority", + } + } ctx = make_ctx(store, client) await store.add_lane(id="T9", handle="@desktop", source="attached") @@ -947,6 +1059,9 @@ async def test_lane_sync_can_full_scan_existing_lane(store: Registry, tmp_path: assert out.lane == "T9" assert out.sync.state == "complete" assert out.sync.transcript_partial is False + assert out.model.model == "gpt-5.5" + assert out.model.service_tier.resolved == "priority" + assert out.model.service_tier.source == "observed" assert any(name == "thread_read" for name, _ in client.calls) @@ -961,6 +1076,10 @@ async def test_discover_lists_persisted_sessions_from_client(store: Registry) -> source="cli", ephemeral=False, status=ThreadStatus(type="idle"), + model_provider="openai", + model="gpt-5.5", + reasoning_effort="xhigh", + service_tier="priority", ), ThreadInfo(id="t2"), # sparse row: only an id ] @@ -975,6 +1094,9 @@ async def test_discover_lists_persisted_sessions_from_client(store: Registry) -> assert first.cwd == "/work" assert first.source == "cli" assert first.ephemeral is False + assert first.model.model == "gpt-5.5" + assert first.model.service_tier.resolved == "priority" + assert first.model.service_tier.source == "observed" # Discovery reads through to the client's thread_list with the requested limit # AND state-db only — the latter is what keeps it read-only (no live resume). assert any( diff --git a/tests/fakes.py b/tests/fakes.py index 59a46f3..6125ca5 100644 --- a/tests/fakes.py +++ b/tests/fakes.py @@ -14,10 +14,13 @@ from outfitter.dispatch.client.events import LaneEvent from outfitter.dispatch.client.models import ( + AppModel, ApprovalPolicy, ApprovalsReviewer, + ConfigInfo, Decision, Effort, + ModelServiceTier, Personality, ReasoningSummary, SandboxPolicy, @@ -51,6 +54,28 @@ def __init__(self) -> None: self.list_result: list[ThreadInfo] = [] self.read_result: dict[str, object] = {} self.search_result = ThreadSearchResult() + self.config_result = ConfigInfo( + model="gpt-5.5", + model_provider="openai", + service_tier=None, + model_reasoning_effort="xhigh", + ) + self.models_result: list[AppModel] = [ + AppModel( + id="gpt-5.5", + default_reasoning_effort="xhigh", + supported_reasoning_efforts=["low", "medium", "high", "xhigh"], + service_tiers=[ + ModelServiceTier( + id="priority", + name="Fast", + description="1.5x speed, increased usage", + ) + ], + additional_speed_tiers=["fast"], + ), + AppModel(id="gpt-5.3-codex-spark"), + ] self.goal_result: ThreadGoal | None = None self.event_log: list[LaneEvent] = [] self.raw_log: list[dict[str, object]] = [] @@ -58,6 +83,14 @@ def __init__(self) -> None: def _record(self, name: str, **kwargs: object) -> None: self.calls.append((name, kwargs)) + async def config_read(self) -> ConfigInfo: + self._record("config_read") + return self.config_result + + async def model_list(self) -> list[AppModel]: + self._record("model_list") + return self.models_result + async def thread_start( self, cwd: str | None, diff --git a/tests/registry/test_store.py b/tests/registry/test_store.py index 2dfaaac..4167818 100644 --- a/tests/registry/test_store.py +++ b/tests/registry/test_store.py @@ -11,7 +11,12 @@ import pytest_asyncio from outfitter.dispatch.contracts.errors import NotFoundError -from outfitter.dispatch.registry.models import LaneSync +from outfitter.dispatch.registry.models import ( + LaneModelSettings, + LaneSync, + ModelCatalogEntry, + ServiceTierEntry, +) from outfitter.dispatch.registry.refs import BASE58BTC_ALPHABET, codex_ref_payload from outfitter.dispatch.registry.store import SCHEMA_VERSION, Registry @@ -216,6 +221,9 @@ async def test_migrates_v3_registry_with_runtime_columns(tmp_path: Path) -> None lane = await migrated.get_lane("A") assert lane.latest_turn_id is None assert lane.latest_turn_status is None + catalog = await migrated.list_model_catalog() + assert catalog == [] + assert await migrated.get_lane_model_settings("A") is None await migrated.record_turn_failed("A", "turn-1", "unsupported model") failed = await migrated.get_lane("A") assert failed.status == "error" @@ -226,6 +234,54 @@ async def test_migrates_v3_registry_with_runtime_columns(tmp_path: Path) -> None await migrated.close() +async def test_model_catalog_and_lane_model_settings_roundtrip(store: Registry) -> None: + now = store.now_iso() + entry = ModelCatalogEntry( + id="gpt-5.5", + provider="openai", + display_name="GPT-5.5", + is_default=True, + hidden=False, + default_reasoning_effort="xhigh", + supported_reasoning_efforts=["low", "xhigh"], + default_service_tier="priority", + service_tiers=[ + ServiceTierEntry( + id="priority", + name="Fast", + description="1.5x speed, increased usage", + ) + ], + additional_speed_tiers=["fast"], + first_seen_at=now, + last_seen_at=now, + ) + await store.upsert_model_catalog([entry]) + refreshed = entry.model_copy(update={"last_seen_at": "2026-06-03T12:05:00+00:00"}) + await store.upsert_model_catalog([refreshed]) + + got = await store.get_model_catalog_entry("gpt-5.5") + assert got == refreshed.model_copy(update={"first_seen_at": now}) + assert await store.list_model_catalog() == [got] + + lane = await store.add_lane(id="L1", handle="@alpha", source="own") + settings = LaneModelSettings( + lane=lane.id, + model_provider="openai", + model="gpt-5.5", + reasoning_effort="xhigh", + requested_service_tier="fast", + resolved_service_tier="priority", + service_tier_name="Fast", + service_tier_source="dispatch", + updated_at=now, + ) + await store.upsert_lane_model_settings(settings) + + assert await store.get_lane_model_settings(lane.id) == settings + assert await store.get_lane_model_settings_many([lane.id, "missing"]) == {lane.id: settings} + + async def test_get_missing_lane_raises_not_found(store: Registry) -> None: assert await store.find_lane("nope") is None with pytest.raises(NotFoundError): diff --git a/tests/surfaces/test_derive_mcp.py b/tests/surfaces/test_derive_mcp.py index 4cf0b02..38aee40 100644 --- a/tests/surfaces/test_derive_mcp.py +++ b/tests/surfaces/test_derive_mcp.py @@ -67,3 +67,12 @@ def test_action_schema_and_annotations_from_op() -> None: "rollback", "goal_clear", } + + daemon_read = tools["dispatch_daemon_read"] + assert daemon_read.annotations is not None + assert daemon_read.annotations.readOnlyHint is True + assert {s["properties"]["op"]["const"] for s in daemon_read.inputSchema["oneOf"]} >= { + "status", + "log", + "models", + } diff --git a/tests/surfaces/test_parity.py b/tests/surfaces/test_parity.py index 8368b19..000f06a 100644 --- a/tests/surfaces/test_parity.py +++ b/tests/surfaces/test_parity.py @@ -56,6 +56,7 @@ def _stub_invoke(op_id: str, params: dict[str, object]) -> dict[str, object]: "rename": "lane-rename", "archive": "archive", "restore": "restore", + "models": "models", "goal status": "goal-get", "goal set": "goal-set", "goal clear": "goal-clear", diff --git a/tests/test_doctor.py b/tests/test_doctor.py index e07927b..3ee3dee 100644 --- a/tests/test_doctor.py +++ b/tests/test_doctor.py @@ -163,7 +163,10 @@ def test_doctor_warns_for_unversioned_registry_migration( registry = next(check for check in report.checks if check.name == "registry") assert registry.status == "warn" assert registry.summary == "registry schema is unversioned" - assert registry.detail == "missing tables: lane_snapshots, lane_sync_sources, queued_messages" + assert registry.detail == ( + "missing tables: lane_model_settings, lane_snapshots, lane_sync_sources, " + "model_catalog, queued_messages" + ) assert registry.recovery is not None assert "dispatch down" in registry.recovery diff --git a/uv.lock b/uv.lock index 9ad1a02..23011f6 100644 --- a/uv.lock +++ b/uv.lock @@ -466,7 +466,7 @@ wheels = [ [[package]] name = "outfitter-dispatch" -version = "0.4.1" +version = "0.5.0" source = { editable = "." } dependencies = [ { name = "aiosqlite" },