hud-evals · ryantzr1 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/docs/guides/integrations.mdx b/docs/guides/integrations.mdx
@@ -73,6 +73,70 @@ async with hud.eval(eval) as ctx:
     await ctx.submit(msg.content or "")
 ```
 
+### Chat Completions (Single-Call Runner)
+
+If you want HUD to handle the chat tool loop for a scenario task, use
+`hud.run_scenario_chat(...)`:
+
+```python
+import os
+from openai import AsyncOpenAI
+import hud
+
+env = hud.Environment("trivia")
+task = env("initials", company="HUD")
+
+client = AsyncOpenAI(
+    base_url="https://inference.hud.ai",
+    api_key=os.environ["HUD_API_KEY"]
+)
+
+result = await hud.run_scenario_chat(
+    client=client,
+    model="gpt-4o",
+    task=task,
+    api="chat_completions",  # or "responses" / "auto"
+)
+
+print(result.answer)
+print(result.reward)
+print(result.trace_id)
+```
+
+### Interactive Scenario Chat (Turn-by-Turn)
+
+Use `hud.run_scenario_chat_interactive(...)` when you want to send multiple
+user turns before final evaluation:
+
+```python
+import os
+from openai import AsyncOpenAI
+import hud
+
+env = hud.Environment("trivia")
+
+client = AsyncOpenAI(
+    base_url="https://inference.hud.ai",
+    api_key=os.environ["HUD_API_KEY"]
+)
+
+async with hud.run_scenario_chat_interactive(
+    client=client,
+    model="gpt-4o",
+    env=env,
+    scenario="initials",
+    args={"company": "HUD"},
+) as chat:
+    first = await chat.send("Start with your initial investigation.")
+    follow_up = await chat.send("Now provide a concise final answer.")
+    result = await chat.finish()  # submits + evaluates
+
+print(first.answer)
+print(follow_up.answer)
+print(result.reward)
+print(result.trace_id)
+```
+
 ### Responses API
 
 ```python
@@ -111,6 +175,112 @@ Requires: `pip install openai-agents`
 
 ---
 
+## Serve Scenarios as an HTTP Endpoint
+
+If you want external agents to run your scenarios without the HUD SDK, use
+`env.serve_as_agent()`. It starts a local OpenAI-compatible server — any
+OpenAI client in any language can connect.
+
+### Server (`04_scenario_server.py`)
+
+```python
+import os
+import hud
+from openai import AsyncOpenAI
+
+env = hud.Environment(os.environ["HUD_ENV_NAME"])
+env.connect_hub(os.environ["HUD_ENV_NAME"])
+
+env.serve_as_agent(
+    client=AsyncOpenAI(
+        base_url="https://inference.hud.ai",
+        api_key=os.environ["HUD_API_KEY"],
+    ),
+    model="gpt-4o",
+    port=8321,
+)
+```
+
+The server exposes:
+
+| Endpoint | Purpose |
+|---|---|
+| `GET /scenarios` | List available scenarios and their required args |
+| `GET /v1/lifecycle-tools` | List scenario lifecycle tool schemas |
+| `POST /v1/lifecycle-tools/call` | Call lifecycle tools (`scenario_list/start/send/finish`) |
+| `POST /v1/chat/completions` | Start or continue a session |
+| `POST /v1/sessions/{id}/finish` | Submit and evaluate |
+| `GET /v1/sessions` | List active sessions |
+| `GET /mcp/tools` | MCP-native lifecycle tool list |
+| `POST /mcp/tools/call` | MCP-native lifecycle tool execution |
+
+### Client (`05_scenario_client.py`)
+
+No HUD SDK needed. Use any standard OpenAI client:
+
+```python
+import httpx
+from openai import OpenAI
+
+client = OpenAI(base_url="http://localhost:8321/v1", api_key="not-needed")
+
+# 1. Discover scenarios
+scenarios = httpx.get("http://localhost:8321/scenarios").json()["scenarios"]
+selected = scenarios[0]
+
+# 2. First turn — pass scenario name and args in the request body
+#    (both fields are required for session bootstrap)
+first = client.chat.completions.create(
+    model="gpt-4o",
+    messages=[{"role": "user", "content": "Begin."}],
+    extra_body={
+        "scenario": selected["short_name"],
+        "scenario_args": {"arg": "value"},
+    },
+)
+session_id = first.hud["session_id"]  # returned in every response
+
+# 3. Follow-up turns — pass session ID in the header
+follow_up = client.chat.completions.create(
+    model="gpt-4o",
+    messages=[{"role": "user", "content": "What are the root causes?"}],
+    extra_headers={"X-HUD-Session-Id": session_id},
+)
+
+#    You can also pass `thread_id` / `conversation_id` in `extra_body`.
+follow_up_alt = client.chat.completions.create(
+    model="gpt-4o",
+    messages=[{"role": "user", "content": "Any remaining risks?"}],
+    extra_body={"thread_id": session_id},
+)
+
+# 4. Finish — submits the answer and returns reward + trace URL
+result = httpx.post(f"http://localhost:8321/v1/sessions/{session_id}/finish").json()
+print(result["reward"], result["trace_url"])
+```
+
+Streaming works the same way — just pass `stream=True`. The server sends
+standard SSE chunks, with a final chunk carrying `hud.session_id` and
+`hud.trace_url`.
+
+### Lifecycle Tools (Agent-native Helpers)
+
+If your orchestrator prefers explicit lifecycle calls, use:
+
+- `GET /v1/lifecycle-tools` + `POST /v1/lifecycle-tools/call`
+- or the MCP-native aliases: `GET /mcp/tools` + `POST /mcp/tools/call`
+
+Available tool names:
+
+- `scenario_list`
+- `scenario_start` (requires `scenario` + `scenario_args`)
+- `scenario_send`
+- `scenario_finish`
+
+Requires: `pip install hud-python[server]` (installs `fastapi` and `uvicorn`)
+
+---
+
 ## Anthropic
 
 Claude's Messages API with tool use.

diff --git a/examples/03_scenario_chat.py b/examples/03_scenario_chat.py
@@ -0,0 +1,117 @@
+"""Interactive REPL for scenario chat with optional streaming.
+
+Usage:
+    HUD_API_KEY=... HUD_ENV_NAME=... python examples/03_interactive_repl.py
+    HUD_API_KEY=... HUD_ENV_NAME=... python examples/03_interactive_repl.py --stream
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import os
+
+import hud
+from openai import AsyncOpenAI
+
+TURN_TIMEOUT_SECONDS = 60
+
+
+def _parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--stream", action="store_true", help="Enable SSE token streaming.")
+    parser.add_argument(
+        "--env",
+        default=os.getenv("HUD_ENV_NAME"),
+        help="HUD environment name (or set HUD_ENV_NAME).",
+    )
+    parser.add_argument(
+        "--model",
+        default=os.getenv("HUD_MODEL", "gpt-4o"),
+        help="Model name for chat calls.",
+    )
+    return parser
+
+
+async def main() -> None:
+    args = _parser().parse_args()
+    if not args.env:
+        raise ValueError("Provide --env or set HUD_ENV_NAME")
+
+    client = AsyncOpenAI(
+        base_url="https://inference.hud.ai",
+        api_key=os.environ["HUD_API_KEY"],
+    )
+    env = hud.Environment(args.env)
+    env.connect_hub(args.env)
+
+    async with env:
+        scenarios = await env.list_scenarios()
+        if not scenarios:
+            print("No scenarios found.")
+            return
+
+        print("Available scenarios:")
+        for i, scenario in enumerate(scenarios, 1):
+            req = ", ".join(scenario.required_args) or "(none)"
+            print(f"  [{i}] {scenario.short_name} - {scenario.description or 'no description'}")
+            print(f"      required args: {req}")
+        print()
+
+        choice = input("Pick a scenario (number, default 1): ").strip()
+        idx = int(choice) - 1 if choice.isdigit() else 0
+        chosen = scenarios[idx] if 0 <= idx < len(scenarios) else scenarios[0]
+
+        scenario_args: dict[str, str] = {}
+        for arg in chosen.arguments:
+            label = arg.name if arg.required else f"{arg.name} (optional)"
+            value = input(f"  {label}: ").strip()
+            if value:
+                scenario_args[arg.name] = value
+
+        print(f"\nRunning: {chosen.short_name}")
+        print(f"Streaming: {'on' if args.stream else 'off'}")
+        print("Type /done when finished.\n")
+
+        async with hud.run_scenario_chat_interactive(
+            client=client,
+            model=args.model,
+            env=env,
+            scenario=chosen.short_name,
+            args=scenario_args,
+            api="chat_completions",
+        ) as chat:
+            print(f"Trace: https://hud.ai/trace/{chat.trace_id}\n")
+
+            async def send_message(msg: str) -> None:
+                if args.stream:
+                    print("Assistant: ", end="", flush=True)
+                    async for event in chat.send_stream(msg):
+                        if event.type == "text_delta":
+                            print(event.content, end="", flush=True)
+                    print("\n")
+                    return
+                turn = await asyncio.wait_for(chat.send(msg), timeout=TURN_TIMEOUT_SECONDS)
+                print(f"Assistant: {turn.answer}\n")
+
+            await send_message("Begin.")
+            while True:
+                try:
+                    user_input = input("You: ").strip()
+                except (EOFError, KeyboardInterrupt):
+                    print()
+                    break
+                if not user_input:
+                    continue
+                if user_input.lower() in {"/done", "/quit", "/exit"}:
+                    break
+                await send_message(user_input)
+
+            result = await chat.finish()
+            print("---")
+            print(f"Reward: {result.reward}")
+            print(f"Trace:  https://hud.ai/trace/{result.trace_id}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/04_scenario_server.py b/examples/04_scenario_server.py
@@ -0,0 +1,48 @@
+"""Serve HUD scenarios as an OpenAI-compatible agent endpoint.
+
+Usage:
+    HUD_API_KEY=... HUD_ENV_NAME=... python examples/04_scenario_server.py
+"""
+
+from __future__ import annotations
+
+import os
+
+import hud
+from openai import AsyncOpenAI
+
+
+def main() -> None:
+    env_name = os.getenv("HUD_ENV_NAME")
+    if not env_name:
+        raise ValueError("Set HUD_ENV_NAME to the target HUD environment")
+
+    model = os.getenv("HUD_MODEL", "gpt-4o")
+    port = int(os.getenv("HUD_AGENT_PORT", "8321"))
+
+    client = AsyncOpenAI(
+        base_url="https://inference.hud.ai",
+        api_key=os.environ["HUD_API_KEY"],
+    )
+    env = hud.Environment(env_name)
+    env.connect_hub(env_name)
+
+    print(f"Serving {env_name} on http://localhost:{port}")
+    for route in (
+        "GET  /scenarios",
+        "GET  /v1/lifecycle-tools",
+        "POST /v1/lifecycle-tools/call",
+        "POST /v1/chat/completions (use X-HUD-Session-Id for follow-up turns)",
+        "POST /v1/sessions/<id>/finish",
+        "GET  /v1/sessions",
+        "GET  /mcp/tools",
+        "POST /mcp/tools/call",
+    ):
+        print(route)
+    print()
+
+    env.serve_as_agent(client=client, model=model, port=port)
+
+
+if __name__ == "__main__":
+    main()