diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f0a3f10..a9b32c7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -46,6 +46,38 @@ jobs:
       - name: Build
         run: make build
 
+  eval-harness:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: PromptFoo
+        working-directory: eval-harness/promptfoo
+        run: npx --yes promptfoo@latest eval -c config.yaml
+
+      - name: DeepEval
+        working-directory: eval-harness/deepeval
+        run: |
+          python3 -m venv .venv
+          .venv/bin/pip install -r requirements.txt
+          .venv/bin/pytest test_agent.py -v
+
   # examples:local — off until if: true and EXAMPLES_* repo secrets (examples/.env.defaults).
   examples:
     if: false
diff --git a/.gitignore b/.gitignore
index aef69b8..f69f2b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,9 @@ pnpm-debug.log*
 .env*.local
 
 .DS_Store
+
+# Python / DeepEval (eval-harness/deepeval)
+eval-harness/deepeval/.venv/
+eval-harness/deepeval/.pytest_cache/
+eval-harness/deepeval/.deepeval/
+eval-harness/deepeval/__pycache__/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b7cb3b8..32fdeb4 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -62,7 +62,7 @@ Keep your branch short and descriptive. Sync with `main` before opening a PR: `g
 make check
 ```
 
-Runs `fmt-check`, spell check, `make lint`, `make test`, `make build`, and `make secrets-scan` — same core gates as CI (coverage is CI-only; use `make test-coverage` locally if you want a report).
+Runs `fmt-check`, spell check, `make lint`, `make test`, `make build`, and `make secrets-scan` — same core gates as the main CI job (coverage is CI-only; use `make test-coverage` locally if you want a report). `make test` includes eval-harness Go tests; the full Promptfoo/DeepEval suite runs in CI and via `make eval-harness` (see below).
 
 Also run the full example suite on any code change to catch regressions unit tests may miss:
 
@@ -72,6 +72,14 @@ task examples:all
 
 Requires Task, Docker, and LLM credentials — see [examples/README.md](examples/README.md).
 
+If you change **agent behavior** (e.g. `pkg/agent`, telemetry, tools, runtime) or **`eval-harness/`**, run:
+
+```bash
+make eval-harness
+```
+
+Behavioral regression tests use mock LLM/tools and assert on run output — SDK changes can break them even when eval-harness files are untouched. Requires Node.js and Python 3.10+ — see [eval-harness/README.md](eval-harness/README.md). CI runs this automatically on PRs (`eval-harness` job).
+
 **CI runs automatically** on pull requests to `main` (open a PR or push updates to an existing PR to re-run checks). Pushes or merges to `main` do not trigger CI; use **workflow_dispatch** in GitHub Actions for an on-demand run. Run `make check` locally before opening a PR; CI must pass on the PR before merge.
 
 To run only tests (e.g. while iterating):
@@ -172,6 +180,7 @@ Using the SDK and ran into issues, unclear docs, or confusing behavior? **Raise
 2. **Tests**
    - Add tests for new features and bug fixes.
    - Unit tests go in `*_test.go` files alongside the code.
+   - Agent behavior changes (`pkg/agent`, telemetry, tools, runtime) or **`eval-harness/`** edits — run `make eval-harness` before submitting a PR.
 
 3. **Commits**
    - Use [conventional commits](https://www.conventionalcommits.org) — these drive the release changelog:
diff --git a/Makefile b/Makefile
index 460b18e..ec2b529 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: build install test lint tidy clean fmt fmt-check spell secrets-scan check
+.PHONY: build install test lint tidy clean fmt fmt-check spell secrets-scan check eval-harness
 
 BIN_DIR := cmd/bin
 BINARY := $(BIN_DIR)/agentctl
@@ -26,13 +26,24 @@ install: build
 	cp $(BINARY) $(GOPATH_BIN)/agentctl
 	@echo "Installed to $(GOPATH_BIN)/agentctl"
 
-# Run tests under pkg
+# Run Go tests (pkg, internal, eval-harness runner)
 test:
 	@echo "==> Running tests..."
 	go test ./pkg/... -count=1
 	go test ./internal/... -count=1
+	go test ./eval-harness/... -count=1
 	@echo "==> Tests complete"
 
+# Promptfoo + DeepEval (same as CI eval-harness job). Requires Node.js and Python 3.10+.
+eval-harness:
+	@echo "==> Running eval-harness (Promptfoo + DeepEval)..."
+	cd eval-harness/promptfoo && npx --yes promptfoo@latest eval -c config.yaml
+	cd eval-harness/deepeval && \
+		(test -d .venv || python3 -m venv .venv) && \
+		.venv/bin/pip install -q -r requirements.txt && \
+		.venv/bin/pytest test_agent.py -v
+	@echo "==> Eval-harness complete"
+
 # Run before push: lint, test, build, and secrets scan (same core gates as CI; no auto-format).
 # Coverage is CI-only (`make test-coverage` when you want the report). If fmt-check fails, run `make fmt`.
 check: lint test build secrets-scan
diff --git a/README.md b/README.md
index a2f4063..4b5e219 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@
   - [Agent and worker in separate processes](#agent-and-worker-in-separate-processes)
   - [Conversation](#conversation-message-history)
   - [AG-UI Protocol](#ag-ui-protocol)
+- [Telemetry](#telemetry)
 - [Observability](#observability)
   - [Wire OTLP](#wire-otlp-traces--metrics--logs-in-one-block)
   - [Bring your own tracer / metrics](#bring-your-own-tracer--metrics)
@@ -54,6 +55,7 @@
   - [Code Coverage](#code-coverage)
 - [Setup and run examples](#setup-and-run-examples)
 - [Benchmarks](#benchmarks)
+- [Eval Harness](#eval-harness)
 - [Production Readiness Checklist](#production-readiness-checklist)
 - [Disclaimer](#disclaimer)
 
@@ -328,8 +330,8 @@ Streaming text deltas (`TEXT_MESSAGE_*`) versus the `**RUN_FINISHED**` body ofte
 
 Each LLM completion can report token counts via `[interfaces.LLMUsage](pkg/interfaces/llm.go)` on `[interfaces.LLMResponse.Usage](pkg/interfaces/llm.go)`. OpenAI, Anthropic, and Gemini clients populate `**PromptTokens**`, `**CompletionTokens**`, `**TotalTokens**`, and optional `**CachedPromptTokens**` / `**ReasoningTokens**` when the provider returns them.
 
-- `**Agent.Run` / `RunAsync`:** `**Usage`** on [*AgentRunResult](pkg/agent/agent.go) is the **sum** across all LLM calls in that run (including tool rounds). Use it for cost estimates, quotas, and logging.
-- `**Stream`:** the same aggregate appears as `**Usage*`* on `**RUN_FINISHED**`: assert `**[*AgentRunFinishedEvent](pkg/agent/agent.go)**`, then `**Result**` as `**[*AgentRunResult](pkg/agent/agent.go)**`. OpenAI streaming `**include_usage**` surfaces totals there. Helpers: [examples/shared/utils.go](examples/shared/utils.go) (`UsageFooter`, `RunResultFromFinishedEvent`).
+- `**Agent.Run` / `RunAsync`:** `**LLMUsage**` on [*AgentRunResult](pkg/agent/agent.go) is the **sum** across all LLM calls in that run (including tool rounds). Use it for cost estimates, quotas, and logging.
+- `**Stream`:** the same aggregate is on `**LLMUsage**` in `**RUN_FINISHED**` `**Result**` (`**[*AgentRunFinishedEvent](pkg/agent/agent.go)**`; `**Result**` is `**[*AgentRunResult](pkg/agent/agent.go)**`). OpenAI streaming `**include_usage**` surfaces totals there. Helpers: [examples/shared/utils.go](examples/shared/utils.go) (`LLMUsageFooter`, `RunResultFromFinishedEvent`).
 
 Examples: [examples/simple_agent](examples/simple_agent) (prints usage after `Run`), [examples/agent_with_stream](examples/agent_with_stream) (prints usage on `**RUN_FINISHED**`).
 
@@ -348,7 +350,6 @@ Custom tools may also implement:
 
 - `interfaces.ToolApproval` — tool-level hint for **interactive human approval**. Use this when a person should decide whether the tool runs, and no agent-level approval policy is set.
 - `interfaces.ToolAuthorizer` — tool-level **programmatic authorization**. Use this when code should decide whether the tool runs before approval/execute (for example: scopes, tenancy, environment flags, or feature access). Return `Allow=false` to deny the tool call without executing it.
-- `interfaces.ToolKindProvider` — optional interface that reports the tool's origin category. The built-in tool wrappers already implement it (`"mcp"`, `"a2a"`, `"sub-agent"`, `"retriever"`). Implement it on custom tools when you want to distinguish origin in logs or metrics. Use `interfaces.KindOf(tool)` to read the kind from any tool; returns `"native"` when the interface is not implemented.
 
 ```go
 reg := agent.NewToolRegistry()
@@ -1116,6 +1117,41 @@ for ev := range ch {
 
 ---
 
+## Telemetry
+
+Every run populates `AgentTelemetry` inside `AgentRunResult` with behavioral metrics across three areas:
+
+> Telemetry fields are designed to support eval harness assertions — see [eval-harness/](eval-harness/) for examples with PromptFoo and DeepEval.
+
+- **Run** — start/end time, total LLM calls, and finish reason (`complete` or `max_iterations`)
+- **Tools** — total calls, failed calls, and per-tool breakdown for registered tools and MCP tools
+- **Storage** — RAG retriever search counts split by mode (`prefetch_searches`, `agentic_searches`) and failure count; all fields are zero when no retriever is configured
+
+```go
+result, _ := ag.Run(ctx, "prompt")
+t := result.Telemetry
+fmt.Printf("llm_calls=%d  finish=%s\n", t.Run.TotalLLMCalls, t.Run.FinishReason)
+fmt.Printf("tool_calls=%d  failed=%d\n", t.Tools.TotalCalls, t.Tools.FailedCalls)
+fmt.Printf("retriever_searches=%d  prefetch=%d  agentic=%d\n",
+    t.Storage.TotalRetrieverSearches,
+    t.Storage.PrefetchSearches,
+    t.Storage.AgenticSearches)
+```
+
+**Stream** — telemetry is on `Result.Telemetry` inside the `RUN_FINISHED` event:
+
+```go
+for ev := range ch {
+    if result := shared.RunResultFromFinishedEvent(ev); result != nil {
+        fmt.Println(result.Telemetry.Run.TotalLLMCalls)
+    }
+}
+```
+
+Examples can print a formatted telemetry footer — see [examples/README.md](examples/README.md#run-output).
+
+---
+
 ## Observability
 
 The SDK emits **traces**, **metrics**, and **logs** via OpenTelemetry. All signals are **no-op by default** — if you set nothing, the agent runs without any overhead. Wire them only when you need them.
@@ -1248,7 +1284,7 @@ A Temporal connection (`WithTemporalConfig` or `WithTemporalClient`) is **option
 - **WithMaxSubAgentDepth**: Maximum delegation hops from this agent (default 2). See [Sub-agents](#sub-agents).
 - **WithMaxIterations**: Max LLM rounds (default 5).
 - **WithStream**: Enable `Stream` partial content streaming.
-- **Token usage:** Not a separate option. On `**Run`**, read `**Usage**` on `**[*AgentRunResult](pkg/agent/agent.go)**` when set. On `**Stream**`, assert `**[*AgentRunFinishedEvent](pkg/agent/agent.go)**` with `**[*AgentRunResult](pkg/agent/agent.go)**` in `**Result**` (aggregate across LLM/tool rounds when the provider reports it). See [Token usage](#token-usage-llmusage).
+- **Token usage:** Not a separate option. On `**Run`**, read `**LLMUsage**` on `**[*AgentRunResult](pkg/agent/agent.go)**` when set. On `**Stream**`, assert `**[*AgentRunFinishedEvent](pkg/agent/agent.go)**` and read `**Result.LLMUsage**` (aggregate across LLM/tool rounds when the provider reports it). See [Token usage](#token-usage-llmusage).
 - **WithLLMSampling**: Pass `&agent.LLMSampling{...}`; nil or zero fields leave that knob to the provider default. Which fields apply where:
   - `**Temperature`** — OpenAI, Anthropic, Gemini.
   - `**MaxTokens**` — OpenAI, Anthropic, Gemini (max output / completion tokens).
@@ -1272,7 +1308,7 @@ A Temporal connection (`WithTemporalConfig` or `WithTemporalClient`) is **option
 Contributors: see **[CONTRIBUTING.md](CONTRIBUTING.md)** for prerequisites (Go, Temporal setup, workflow, and guidelines).
 Project policies: **[SECURITY.md](SECURITY.md)** for vulnerability reporting and **[CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md)** for community standards.
 
-Quick commands: `make test` | `make lint` | `make fmt` | `make spell` | `make tidy` | `make test-coverage` (`make lint` runs `gofmt -s`, `misspell`, then `go vet` + `golangci-lint`)
+Quick commands: `make test` | `make check` | `make eval-harness` | `make lint` | `make fmt` | `make spell` | `make tidy` | `make test-coverage` (`make lint` runs `gofmt -s`, `misspell`, then `go vet` + `golangci-lint`)
 
 ## Code Coverage
 
@@ -1319,6 +1355,12 @@ Config-driven benchmark suite to measure agent performance in your environment.
 
 See [benchmarks/README.md](benchmarks/README.md).
 
+## Eval Harness
+
+Behavioral regression suite for agent runs — verify tools, completion, and telemetry without a live LLM. Use it to catch breaking changes in CI and as a reference for wiring your own agents into eval tools.
+
+See [eval-harness/README.md](eval-harness/README.md).
+
 ## Production Readiness Checklist
 
 - **Run and approval limits** — Use `WithTimeout` and/or a context deadline on `Run` / `Stream`; use `WithApprovalTimeout` when tools require approval (activity retry counts inside workflows are fixed in the SDK, not user-tunable).
diff --git a/cmd/main.go b/cmd/main.go
index d635812..507456b 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -234,8 +234,7 @@ func runResultFromFinishedEvent(ev agent.AgentEvent) *agent.AgentRunResult {
 	if !ok || fin == nil {
 		return nil
 	}
-	res, _ := fin.Result.(*agent.AgentRunResult)
-	return res
+	return fin.Result
 }
 
 func printEvent(ev agent.AgentEvent, streamedContent bool) {
diff --git a/eval-harness/README.md b/eval-harness/README.md
new file mode 100644
index 0000000..0cc9450
--- /dev/null
+++ b/eval-harness/README.md
@@ -0,0 +1,161 @@
+# Eval harness
+
+Runs a single agent execution with mock LLM and mock tools. Prints JSON to stdout with `content`, `llm_usage`, and `telemetry` for evaluation assertions.
+
+## Runner
+
+From the repo root:
+
+```bash
+go run ./eval-harness/runner
+go run ./eval-harness/runner -prompt "custom prompt"
+go run ./eval-harness/runner -runtime temporal
+go run ./eval-harness/runner -tools 2
+go run ./eval-harness/runner -config eval-harness/runner/config.yaml
+```
+
+### Arguments
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `-config` | `eval-harness/runner/config.yaml` | Path to config file |
+| `-prompt` | from config | Override `user_prompt` |
+| `-runtime` | from config | Override `runtime` (`local` or `temporal`) |
+| `-tools` | from config | Override `agent.tool_count` |
+
+### config.yaml
+
+Default path: `eval-harness/runner/config.yaml`
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `runtime` | `local` | `local` or `temporal` |
+| `user_prompt` | — | User message (required) |
+| `agent.name` | `eval-agent` | Agent name |
+| `agent.system_prompt` | built-in eval prompt | System instructions |
+| `agent.tool_count` | `3` | Number of mock tools |
+| `temporal.host` | `localhost` | Temporal host when `runtime: temporal` |
+| `temporal.port` | `7233` | Temporal port |
+| `temporal.namespace` | `default` | Temporal namespace |
+| `temporal.task_queue` | `eval-harness` | Task queue |
+
+Temporal mode uses an embedded local worker. Start Temporal before running (e.g. `task infra:temporal:up` from `examples/`).
+
+### Output
+
+Stdout is always JSON:
+
+```json
+{
+  "content": "eval complete",
+  "llm_usage": { "prompt_tokens": 600, "completion_tokens": 400, "total_tokens": 1000 },
+  "telemetry": { "run": { ... }, "tools": { ... }, "storage": { ... } }
+}
+```
+
+## PromptFoo
+
+Config: `eval-harness/promptfoo/config.yaml`
+
+PromptFoo runs the eval harness as an [exec provider](https://www.promptfoo.dev/docs/providers/custom-script/). Each test invokes the runner once, parses the JSON stdout, and asserts on `content`, `llm_usage`, and `telemetry`.
+
+### Run
+
+```bash
+cd eval-harness/promptfoo
+npx promptfoo eval -c config.yaml
+```
+
+View results in the web UI:
+
+```bash
+npx promptfoo view
+```
+
+Requires Node.js. PromptFoo is installed on demand via `npx`; no local install is required.
+
+### How it works
+
+| Piece | Role |
+|-------|------|
+| **Provider** | `exec:../run_agent.sh` — shared wrapper in `eval-harness/` |
+| **Prompt** | `"run eval check"` — passed as the first arg to `run_agent.sh` (overrides `user_prompt`) |
+| **Output** | Runner JSON on stdout; assertions use `JSON.parse(output)` |
+| **Paths** | `eval-harness/run_agent.sh` resolves repo root and runner config |
+
+The runner accepts PromptFoo’s prompt as a positional argument when `-prompt` is not set. Agent settings (`tool_count`, `runtime`, etc.) still come from `eval-harness/runner/config.yaml`.
+
+### Tests
+
+Four test cases in `config.yaml`, each with a JavaScript assertion on runner JSON:
+
+| Test | Checks |
+|------|--------|
+| all mock tools were called | `telemetry.tools.breakdown` — `eval_tool_1`, `eval_tool_2`, `eval_tool_3`, each called once |
+| agent completed successfully | `telemetry.run.finish_reason === "complete"` and `content === "eval complete"` |
+| no failed tool calls | `telemetry.tools.failed_calls === 0` |
+| llm usage reported | `llm_usage.total_tokens > 0` |
+
+### Customizing
+
+- **Change the prompt** — edit `prompts` in `promptfoo/config.yaml`, or add `vars` and use `{{var}}` in the prompt string.
+- **Change agent behavior** — edit `eval-harness/runner/config.yaml` (tool count, runtime, system prompt), or adjust `eval-harness/run_agent.sh`.
+- **Add tests** — append cases under `tests:` with `type: javascript` and `value:` returning a boolean.
+- **Filter providers** — use `label: eval-agent` in test `options.providers` if you add more providers later.
+
+## DeepEval
+
+Python tests in `eval-harness/deepeval/`. The suite runs the Go eval harness, parses the JSON stdout, and asserts on `content`, `llm_usage`, and `telemetry` — the same output contract as the runner and PromptFoo.
+
+### Run
+
+```bash
+cd eval-harness/deepeval
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+pytest test_agent.py -v
+```
+
+Requires Python 3.10+ and Go. No API key is required for the default tests.
+
+### How it works
+
+1. `harness.run_agent()` calls `eval-harness/run_agent.sh` and parses JSON.
+2. Tests read telemetry from the agent SDK run output.
+3. `assert_test()` runs DeepEval metrics where useful; plain pytest asserts cover the rest.
+
+| Source field | Used for |
+|--------------|----------|
+| `content` | Agent response text |
+| `llm_usage.total_tokens` | Token usage reported |
+| `telemetry.run.finish_reason` | Run completed (`"complete"`) |
+| `telemetry.tools.failed_calls` | No tool failures |
+| `telemetry.tools.total_calls` | Expected call count |
+| `telemetry.tools.breakdown` | Per-tool call counts; fed into `tools_called` for `ToolCorrectnessMetric` |
+
+Example — extract tools from telemetry:
+
+```python
+agent_res = run_agent()
+tools = list(agent_res["telemetry"]["tools"]["breakdown"].keys())
+finish_reason = agent_res["telemetry"]["run"]["finish_reason"]
+```
+
+### Tests
+
+Two pytest tests in `test_agent.py`:
+
+| Test | Checks |
+|------|--------|
+| `test_agent_completes_with_telemetry` | `content`, `llm_usage`, `finish_reason`, `failed_calls`, `total_calls`, `breakdown` keys |
+| `test_agent_tool_correctness` | `ToolCorrectnessMetric` — `tools_called` from telemetry vs expected tools |
+
+### Customizing
+
+- **Change the prompt** — pass a different string to `run_agent(prompt=...)`.
+- **Change agent behavior** — edit `eval-harness/runner/config.yaml` or `eval-harness/run_agent.sh`.
+- **Add tests** — extend `test_agent.py` with more telemetry asserts or DeepEval `LLMTestCase` fields.
+
+> **Note:** CI runs both PromptFoo and DeepEval on PRs — see `.github/workflows/ci.yml` (`eval-harness` job). Locally: `make eval-harness` from the repo root.
+
diff --git a/eval-harness/deepeval/harness.py b/eval-harness/deepeval/harness.py
new file mode 100644
index 0000000..a5a16b2
--- /dev/null
+++ b/eval-harness/deepeval/harness.py
@@ -0,0 +1,41 @@
+"""Helpers for running the Go eval harness from DeepEval tests."""
+
+from __future__ import annotations
+
+import json
+import subprocess
+from pathlib import Path
+
+from deepeval.models import DeepEvalBaseLLM
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+DEFAULT_PROMPT = "run eval check"
+
+
+class StubJudge(DeepEvalBaseLLM):
+    """Placeholder model for metrics that only use deterministic scoring."""
+
+    def load_model(self):
+        return self
+
+    def generate(self, *args, **kwargs) -> str:
+        raise RuntimeError("stub judge should not be invoked for deterministic metrics")
+
+    async def a_generate(self, *args, **kwargs) -> str:
+        raise RuntimeError("stub judge should not be invoked for deterministic metrics")
+
+    def get_model_name(self, *args, **kwargs) -> str:
+        return "stub-judge"
+
+
+def run_agent(prompt: str = DEFAULT_PROMPT) -> dict:
+    """Execute the eval harness runner and return parsed JSON output."""
+    script = REPO_ROOT / "eval-harness" / "run_agent.sh"
+    raw = subprocess.check_output([str(script), prompt], cwd=REPO_ROOT, text=True)
+    return json.loads(raw)
+
+
+def tools_called(agent_res: dict) -> list[str]:
+    """Return tool names from telemetry breakdown."""
+    breakdown = agent_res["telemetry"]["tools"]["breakdown"]
+    return list(breakdown.keys())
diff --git a/eval-harness/deepeval/requirements.txt b/eval-harness/deepeval/requirements.txt
new file mode 100644
index 0000000..f84d9a6
--- /dev/null
+++ b/eval-harness/deepeval/requirements.txt
@@ -0,0 +1,2 @@
+deepeval>=2.0.0
+pytest>=8.0.0
diff --git a/eval-harness/deepeval/test_agent.py b/eval-harness/deepeval/test_agent.py
new file mode 100644
index 0000000..a89ee35
--- /dev/null
+++ b/eval-harness/deepeval/test_agent.py
@@ -0,0 +1,56 @@
+"""DeepEval pytest suite for the Go eval harness."""
+
+from deepeval import assert_test
+from deepeval.metrics import ToolCorrectnessMetric
+from deepeval.test_case import LLMTestCase, ToolCall
+
+from harness import DEFAULT_PROMPT, StubJudge, run_agent, tools_called
+
+EXPECTED_TOOLS = [
+    ToolCall(name="eval_tool_1"),
+    ToolCall(name="eval_tool_2"),
+    ToolCall(name="eval_tool_3"),
+]
+
+
+def test_agent_completes_with_telemetry():
+    """Assert on agent SDK run output: content, llm_usage, and telemetry."""
+    agent_res = run_agent()
+
+    assert agent_res["content"] == "eval complete"
+    assert agent_res["llm_usage"]["total_tokens"] > 0
+
+    run_telemetry = agent_res["telemetry"]["run"]
+    tools_telemetry = agent_res["telemetry"]["tools"]
+
+    assert run_telemetry["finish_reason"] == "complete"
+    assert tools_telemetry["failed_calls"] == 0
+    assert tools_telemetry["total_calls"] == 3
+    assert set(tools_called(agent_res)) == {
+        "eval_tool_1",
+        "eval_tool_2",
+        "eval_tool_3",
+    }
+
+
+def test_agent_tool_correctness():
+    """ToolCorrectnessMetric using tools_called from telemetry.breakdown."""
+    agent_res = run_agent()
+    called = [ToolCall(name=name) for name in tools_called(agent_res)]
+
+    test_case = LLMTestCase(
+        input=DEFAULT_PROMPT,
+        actual_output=agent_res["content"],
+        tools_called=called,
+        expected_tools=EXPECTED_TOOLS,
+    )
+
+    metric = ToolCorrectnessMetric(
+        model=StubJudge(),
+        threshold=1.0,
+        strict_mode=True,
+        should_exact_match=True,
+        include_reason=True,
+        async_mode=False,
+    )
+    assert_test(test_case, [metric])
diff --git a/eval-harness/promptfoo/config.yaml b/eval-harness/promptfoo/config.yaml
new file mode 100644
index 0000000..106c260
--- /dev/null
+++ b/eval-harness/promptfoo/config.yaml
@@ -0,0 +1,46 @@
+# Run from this directory:
+#   npx promptfoo eval -c config.yaml
+
+description: eval-harness agent run assertions
+
+prompts:
+  - "run eval check"
+
+providers:
+  - id: exec:../run_agent.sh
+    label: eval-agent
+
+tests:
+  - description: all mock tools were called
+    assert:
+      - type: javascript
+        value: |
+          const res = JSON.parse(output);
+          const breakdown = res.telemetry.tools.breakdown;
+          const tools = Object.keys(breakdown);
+          return tools.length === 3
+            && breakdown.eval_tool_1 === 1
+            && breakdown.eval_tool_2 === 1
+            && breakdown.eval_tool_3 === 1;
+
+  - description: agent completed successfully
+    assert:
+      - type: javascript
+        value: |
+          const res = JSON.parse(output);
+          return res.telemetry.run.finish_reason === "complete"
+            && res.content === "eval complete";
+
+  - description: no failed tool calls
+    assert:
+      - type: javascript
+        value: |
+          const res = JSON.parse(output);
+          return res.telemetry.tools.failed_calls === 0;
+
+  - description: llm usage reported
+    assert:
+      - type: javascript
+        value: |
+          const res = JSON.parse(output);
+          return res.llm_usage.total_tokens > 0;
diff --git a/eval-harness/run_agent.sh b/eval-harness/run_agent.sh
new file mode 100755
index 0000000..522838b
--- /dev/null
+++ b/eval-harness/run_agent.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+# Eval harness wrapper: runs the Go runner and prints JSON to stdout.
+# Args: $1 prompt (optional). Promptfoo also passes $2/$3 (JSON); ignored.
+
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+CONFIG="${ROOT}/eval-harness/runner/config.yaml"
+
+cd "$ROOT"
+if [[ -n "${1:-}" ]]; then
+  exec go run ./eval-harness/runner -config "$CONFIG" -prompt "$1"
+else
+  exec go run ./eval-harness/runner -config "$CONFIG"
+fi
diff --git a/eval-harness/runner/config.yaml b/eval-harness/runner/config.yaml
new file mode 100644
index 0000000..0c8b260
--- /dev/null
+++ b/eval-harness/runner/config.yaml
@@ -0,0 +1,14 @@
+runtime: local          # local or temporal
+
+user_prompt: "run eval check"
+
+agent:
+  name: eval-agent
+  system_prompt: "You are an evaluation agent. Use available tools when helpful, then answer concisely."
+  tool_count: 3
+
+temporal:
+  host: localhost
+  port: 7233
+  namespace: default
+  task_queue: eval-harness
diff --git a/eval-harness/runner/main.go b/eval-harness/runner/main.go
new file mode 100644
index 0000000..1b8ad64
--- /dev/null
+++ b/eval-harness/runner/main.go
@@ -0,0 +1,49 @@
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"flag"
+	"log"
+	"os"
+
+	"github.com/agenticenv/agent-sdk-go/eval-harness/runner/setup"
+)
+
+func main() {
+	configPath := flag.String("config", "", "path to config.yaml (default: runner/config.yaml)")
+	prompt := flag.String("prompt", "", "override user_prompt from config")
+	runtimeFlag := flag.String("runtime", "", "override runtime: local or temporal")
+	toolCount := flag.Int("tools", 0, "override agent.tool_count (0 = use config)")
+	flag.Parse()
+
+	fileCfg, err := setup.LoadConfig(*configPath)
+	if err != nil {
+		log.Fatalf("load config: %v", err)
+	}
+
+	runCfg := fileCfg.Config()
+	if *prompt != "" {
+		runCfg.UserPrompt = *prompt
+	} else if args := flag.Args(); len(args) > 0 && args[0] != "" {
+		// Promptfoo exec provider passes the rendered prompt as the first positional arg.
+		runCfg.UserPrompt = args[0]
+	}
+	if *runtimeFlag != "" {
+		runCfg.Runtime = setup.Runtime(*runtimeFlag)
+	}
+	if *toolCount > 0 {
+		runCfg.ToolCount = *toolCount
+	}
+
+	result, err := Run(context.Background(), runCfg)
+	if err != nil {
+		log.Fatalf("eval run failed: %v", err)
+	}
+
+	enc := json.NewEncoder(os.Stdout)
+	enc.SetIndent("", "  ")
+	if err := enc.Encode(OutputFromResult(result)); err != nil {
+		log.Fatalf("encode result: %v", err)
+	}
+}
diff --git a/eval-harness/runner/output.go b/eval-harness/runner/output.go
new file mode 100644
index 0000000..fa94cf8
--- /dev/null
+++ b/eval-harness/runner/output.go
@@ -0,0 +1,22 @@
+package main
+
+import "github.com/agenticenv/agent-sdk-go/pkg/agent"
+
+// Output is a JSON-friendly view of an agent run for eval harness tools.
+type Output struct {
+	Content   string                `json:"content"`
+	LLMUsage  *agent.LLMUsage       `json:"llm_usage,omitempty"`
+	Telemetry *agent.AgentTelemetry `json:"telemetry,omitempty"`
+}
+
+// OutputFromResult maps an AgentRunResult into Output for assertions or CLI JSON output.
+func OutputFromResult(result *agent.AgentRunResult) *Output {
+	if result == nil {
+		return nil
+	}
+	return &Output{
+		Content:   result.Content,
+		LLMUsage:  result.LLMUsage,
+		Telemetry: result.Telemetry,
+	}
+}
diff --git a/eval-harness/runner/runner.go b/eval-harness/runner/runner.go
new file mode 100644
index 0000000..40f0d74
--- /dev/null
+++ b/eval-harness/runner/runner.go
@@ -0,0 +1,29 @@
+package main
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/agenticenv/agent-sdk-go/eval-harness/runner/setup"
+	"github.com/agenticenv/agent-sdk-go/pkg/agent"
+)
+
+// Run executes one agent run with mock LLM and mock tools, then closes the agent.
+func Run(ctx context.Context, cfg setup.Config) (*agent.AgentRunResult, error) {
+	cfg.ApplyDefaults()
+	if err := cfg.Validate(); err != nil {
+		return nil, err
+	}
+
+	a, err := setup.BuildAgent(cfg)
+	if err != nil {
+		return nil, err
+	}
+	defer a.Close()
+
+	result, err := a.Run(ctx, cfg.UserPrompt, nil)
+	if err != nil {
+		return nil, fmt.Errorf("agent run: %w", err)
+	}
+	return result, nil
+}
diff --git a/eval-harness/runner/runner_test.go b/eval-harness/runner/runner_test.go
new file mode 100644
index 0000000..19fb5f5
--- /dev/null
+++ b/eval-harness/runner/runner_test.go
@@ -0,0 +1,69 @@
+package main
+
+import (
+	"context"
+	"os"
+	"testing"
+
+	"github.com/agenticenv/agent-sdk-go/eval-harness/runner/setup"
+	"github.com/stretchr/testify/require"
+)
+
+func TestLoadConfig_Defaults(t *testing.T) {
+	cfg, err := setup.LoadConfig("config.yaml")
+	require.NoError(t, err)
+	require.Equal(t, "run eval check", cfg.UserPrompt)
+	require.Equal(t, "local", cfg.Runtime)
+	require.Equal(t, 3, cfg.Agent.ToolCount)
+}
+
+func TestRun_FromFileConfig(t *testing.T) {
+	fileCfg, err := setup.LoadConfig("config.yaml")
+	require.NoError(t, err)
+
+	result, err := Run(context.Background(), fileCfg.Config())
+	require.NoError(t, err)
+	require.NotEmpty(t, result.Content)
+	require.Equal(t, int64(3), result.Telemetry.Tools.TotalCalls)
+}
+
+func TestRun_LocalRuntime(t *testing.T) {
+	result, err := Run(context.Background(), setup.Config{
+		UserPrompt: "run eval check",
+		Runtime:    setup.RuntimeLocal,
+		ToolCount:  2,
+	})
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	require.NotEmpty(t, result.Content)
+	require.NotNil(t, result.Telemetry)
+	require.Equal(t, int64(2), result.Telemetry.Tools.TotalCalls)
+}
+
+func TestRun_TemporalRuntime(t *testing.T) {
+	if os.Getenv("EVAL_HARNESS_TEMPORAL") != "true" {
+		t.Skip("set EVAL_HARNESS_TEMPORAL=true with Temporal running on localhost:7233")
+	}
+
+	result, err := Run(context.Background(), setup.Config{
+		UserPrompt: "run eval check",
+		Runtime:    setup.RuntimeTemporal,
+		ToolCount:  2,
+	})
+	require.NoError(t, err)
+	require.NotEmpty(t, result.Content)
+	require.Equal(t, int64(2), result.Telemetry.Tools.TotalCalls)
+}
+
+func TestRun_RequiresUserPrompt(t *testing.T) {
+	_, err := Run(context.Background(), setup.Config{})
+	require.Error(t, err)
+}
+
+func TestRun_InvalidRuntime(t *testing.T) {
+	_, err := Run(context.Background(), setup.Config{
+		UserPrompt: "hello",
+		Runtime:    "invalid",
+	})
+	require.Error(t, err)
+}
diff --git a/eval-harness/runner/setup/agent.go b/eval-harness/runner/setup/agent.go
new file mode 100644
index 0000000..863fb64
--- /dev/null
+++ b/eval-harness/runner/setup/agent.go
@@ -0,0 +1,53 @@
+package setup
+
+import (
+	"fmt"
+	"math/rand"
+	"time"
+
+	"github.com/agenticenv/agent-sdk-go/pkg/agent"
+)
+
+const evalRNGSeed int64 = 42
+
+// BuildAgent constructs an agent from cfg using mock LLM and tools when not overridden.
+func BuildAgent(cfg Config) (*agent.Agent, error) {
+	rng := rand.New(rand.NewSource(evalRNGSeed))
+
+	llmClient := cfg.LLMClient
+	if llmClient == nil {
+		llmClient = NewMockLLMClient(cfg.LLM, rng)
+	}
+
+	toolRegistry := cfg.ToolRegistry
+	if toolRegistry == nil {
+		toolRegistry = RegisterMockTools(cfg.ToolCount, cfg.Tool, rng)
+	}
+
+	opts := []agent.Option{
+		agent.WithName(cfg.AgentName),
+		agent.WithDescription("Eval harness agent for single-run testing."),
+		agent.WithSystemPrompt(cfg.SystemPrompt),
+		agent.WithLLMClient(llmClient),
+		agent.WithToolRegistry(toolRegistry),
+		agent.WithToolApprovalPolicy(agent.AutoToolApprovalPolicy()),
+		agent.WithLogger(cfg.Logger),
+	}
+	if cfg.UseTemporal() {
+		opts = append(opts, agent.WithTemporalConfig(&agent.TemporalConfig{
+			Host:      cfg.Temporal.Host,
+			Port:      cfg.Temporal.Port,
+			Namespace: cfg.Temporal.Namespace,
+			TaskQueue: cfg.Temporal.TaskQueue,
+		}))
+	}
+
+	a, err := agent.NewAgent(opts...)
+	if err != nil {
+		return nil, fmt.Errorf("new agent: %w", err)
+	}
+	if cfg.UseTemporal() {
+		time.Sleep(300 * time.Millisecond)
+	}
+	return a, nil
+}
diff --git a/eval-harness/runner/setup/config.go b/eval-harness/runner/setup/config.go
new file mode 100644
index 0000000..de4f53f
--- /dev/null
+++ b/eval-harness/runner/setup/config.go
@@ -0,0 +1,115 @@
+package setup
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/agenticenv/agent-sdk-go/pkg/agent"
+	"github.com/agenticenv/agent-sdk-go/pkg/interfaces"
+	"github.com/agenticenv/agent-sdk-go/pkg/logger"
+)
+
+const (
+	DefaultAgentName    = "eval-agent"
+	DefaultToolCount    = 3
+	DefaultMockTokens   = 500
+	DefaultSystemPrompt = "You are an evaluation agent. Use available tools when helpful, then answer concisely."
+	DefaultRuntime      = RuntimeLocal
+)
+
+// Runtime selects the agent execution backend.
+type Runtime string
+
+const (
+	RuntimeLocal    Runtime = "local"
+	RuntimeTemporal Runtime = "temporal"
+)
+
+// LLMConfig configures the built-in mock LLM (internal defaults, not in YAML).
+type LLMConfig struct {
+	MockTokens int
+}
+
+// ToolConfig configures mock tools (internal defaults, not in YAML).
+type ToolConfig struct{}
+
+// TemporalConfig configures Temporal when Runtime is temporal.
+type TemporalConfig struct {
+	Host      string `mapstructure:"host"`
+	Port      int    `mapstructure:"port"`
+	Namespace string `mapstructure:"namespace"`
+	TaskQueue string `mapstructure:"task_queue"`
+}
+
+// Config holds settings for a single eval agent run.
+type Config struct {
+	UserPrompt   string
+	Runtime      Runtime
+	Temporal     TemporalConfig
+	AgentName    string
+	SystemPrompt string
+	LLM          LLMConfig
+	Tool         ToolConfig
+	ToolCount    int
+	LLMClient    interfaces.LLMClient
+	ToolRegistry agent.ToolRegistry
+	Logger       logger.Logger
+}
+
+// UseTemporal reports whether cfg selects the Temporal runtime.
+func (c *Config) UseTemporal() bool {
+	return c != nil && strings.EqualFold(strings.TrimSpace(string(c.Runtime)), string(RuntimeTemporal))
+}
+
+// ApplyDefaults fills unset config fields.
+func (c *Config) ApplyDefaults() {
+	if c == nil {
+		return
+	}
+	if strings.TrimSpace(string(c.Runtime)) == "" {
+		c.Runtime = DefaultRuntime
+	}
+	if c.AgentName == "" {
+		c.AgentName = DefaultAgentName
+	}
+	if c.SystemPrompt == "" {
+		c.SystemPrompt = DefaultSystemPrompt
+	}
+	if c.ToolCount <= 0 {
+		c.ToolCount = DefaultToolCount
+	}
+	if c.LLM.MockTokens <= 0 {
+		c.LLM.MockTokens = DefaultMockTokens
+	}
+	if c.Logger == nil {
+		c.Logger = logger.NoopLogger()
+	}
+	if c.Temporal.TaskQueue == "" {
+		c.Temporal.TaskQueue = "eval-harness"
+	}
+	if c.Temporal.Port == 0 {
+		c.Temporal.Port = 7233
+	}
+	if c.Temporal.Host == "" {
+		c.Temporal.Host = "localhost"
+	}
+	if c.Temporal.Namespace == "" {
+		c.Temporal.Namespace = "default"
+	}
+}
+
+// Validate checks required config fields.
+func (c *Config) Validate() error {
+	if c == nil {
+		return fmt.Errorf("config is required")
+	}
+	if c.UserPrompt == "" {
+		return fmt.Errorf("user prompt is required")
+	}
+	switch strings.ToLower(strings.TrimSpace(string(c.Runtime))) {
+	case string(RuntimeLocal), string(RuntimeTemporal):
+	default:
+		return fmt.Errorf("runtime must be %q or %q", RuntimeLocal, RuntimeTemporal)
+	}
+	return nil
+}
diff --git a/eval-harness/runner/setup/load.go b/eval-harness/runner/setup/load.go
new file mode 100644
index 0000000..4ad0f76
--- /dev/null
+++ b/eval-harness/runner/setup/load.go
@@ -0,0 +1,116 @@
+package setup
+
+import (
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/spf13/viper"
+)
+
+// FileConfig is the YAML configuration for eval-harness runs.
+type FileConfig struct {
+	Runtime    string          `mapstructure:"runtime"`
+	UserPrompt string          `mapstructure:"user_prompt"`
+	Agent      FileAgentConfig `mapstructure:"agent"`
+	Temporal   TemporalConfig  `mapstructure:"temporal"`
+}
+
+// FileAgentConfig holds agent fields from YAML.
+type FileAgentConfig struct {
+	Name         string `mapstructure:"name"`
+	SystemPrompt string `mapstructure:"system_prompt"`
+	ToolCount    int    `mapstructure:"tool_count"`
+}
+
+// Config returns a runner Config from the file config.
+func (f *FileConfig) Config() Config {
+	if f == nil {
+		return Config{}
+	}
+	return Config{
+		UserPrompt:   f.UserPrompt,
+		Runtime:      Runtime(f.Runtime),
+		Temporal:     f.Temporal,
+		AgentName:    f.Agent.Name,
+		SystemPrompt: f.Agent.SystemPrompt,
+		ToolCount:    f.Agent.ToolCount,
+	}
+}
+
+// LoadConfig reads and validates eval-harness config from a YAML file.
+func LoadConfig(path string) (*FileConfig, error) {
+	if path == "" {
+		path = defaultConfigPath()
+	}
+	v := viper.New()
+	v.SetConfigFile(path)
+	v.SetConfigType("yaml")
+	if err := v.ReadInConfig(); err != nil {
+		return nil, fmt.Errorf("read config %q: %w", path, err)
+	}
+	var cfg FileConfig
+	if err := v.Unmarshal(&cfg); err != nil {
+		return nil, fmt.Errorf("parse config: %w", err)
+	}
+	if err := cfg.validate(); err != nil {
+		return nil, err
+	}
+	return &cfg, nil
+}
+
+// DefaultConfigPath returns the default eval-harness config file path.
+func DefaultConfigPath() string { return defaultConfigPath() }
+
+func defaultConfigPath() string {
+	for _, candidate := range []string{
+		"eval-harness/runner/config.yaml",
+		"runner/config.yaml",
+		"config.yaml",
+	} {
+		if _, err := os.Stat(candidate); err == nil {
+			return candidate
+		}
+	}
+	return "config.yaml"
+}
+
+func (f *FileConfig) validate() error {
+	if f == nil {
+		return fmt.Errorf("config is required")
+	}
+	if strings.TrimSpace(f.UserPrompt) == "" {
+		return fmt.Errorf("user_prompt is required")
+	}
+	switch strings.ToLower(strings.TrimSpace(f.Runtime)) {
+	case "", string(RuntimeLocal):
+		if f.Runtime == "" {
+			f.Runtime = string(RuntimeLocal)
+		}
+	case string(RuntimeTemporal):
+	default:
+		return fmt.Errorf("runtime must be %q or %q", RuntimeLocal, RuntimeTemporal)
+	}
+	if f.Agent.ToolCount <= 0 {
+		f.Agent.ToolCount = DefaultToolCount
+	}
+	if f.Agent.Name == "" {
+		f.Agent.Name = DefaultAgentName
+	}
+	if f.Agent.SystemPrompt == "" {
+		f.Agent.SystemPrompt = DefaultSystemPrompt
+	}
+	if f.Temporal.TaskQueue == "" {
+		f.Temporal.TaskQueue = "eval-harness"
+	}
+	if f.Temporal.Port == 0 {
+		f.Temporal.Port = 7233
+	}
+	if f.Temporal.Host == "" {
+		f.Temporal.Host = "localhost"
+	}
+	if f.Temporal.Namespace == "" {
+		f.Temporal.Namespace = "default"
+	}
+	return nil
+}
diff --git a/eval-harness/runner/setup/mock_llm.go b/eval-harness/runner/setup/mock_llm.go
new file mode 100644
index 0000000..8ef9064
--- /dev/null
+++ b/eval-harness/runner/setup/mock_llm.go
@@ -0,0 +1,119 @@
+package setup
+
+import (
+	"context"
+	"fmt"
+	"math/rand"
+	"time"
+
+	"github.com/agenticenv/agent-sdk-go/pkg/interfaces"
+)
+
+const mockLLMModel = "eval-mock"
+
+// MockLLMClient is a deterministic mock LLM for eval harness runs.
+type MockLLMClient struct {
+	cfg LLMConfig
+	rng *rand.Rand
+}
+
+// NewMockLLMClient builds a mock LLM client from cfg.
+func NewMockLLMClient(cfg LLMConfig, rng *rand.Rand) *MockLLMClient {
+	if rng == nil {
+		rng = rand.New(rand.NewSource(time.Now().UnixNano()))
+	}
+	return &MockLLMClient{cfg: cfg, rng: rng}
+}
+
+func (m *MockLLMClient) Generate(ctx context.Context, request *interfaces.LLMRequest) (*interfaces.LLMResponse, error) {
+	promptTokens, completionTokens := splitMockTokens(m.cfg.MockTokens)
+	usage := &interfaces.LLMUsage{
+		PromptTokens:     int64(promptTokens),
+		CompletionTokens: int64(completionTokens),
+		TotalTokens:      int64(promptTokens + completionTokens),
+	}
+
+	if hasToolResultMessages(request) {
+		return &interfaces.LLMResponse{
+			Content: "eval complete",
+			Usage:   usage,
+		}, nil
+	}
+
+	toolCalls := make([]*interfaces.ToolCall, 0, len(request.Tools))
+	for i, spec := range request.Tools {
+		toolCalls = append(toolCalls, &interfaces.ToolCall{
+			ToolCallID: fmt.Sprintf("tc-%d", i+1),
+			ToolName:   spec.Name,
+			Args:       map[string]any{"input": "eval"},
+		})
+	}
+
+	return &interfaces.LLMResponse{
+		Content:   "executing tools",
+		ToolCalls: toolCalls,
+		Usage:     usage,
+	}, nil
+}
+
+func (m *MockLLMClient) GenerateStream(ctx context.Context, request *interfaces.LLMRequest) (interfaces.LLMStream, error) {
+	resp, err := m.Generate(ctx, request)
+	if err != nil {
+		return nil, err
+	}
+	return &mockLLMStream{resp: resp}, nil
+}
+
+func (m *MockLLMClient) GetModel() string { return mockLLMModel }
+
+func (m *MockLLMClient) GetProvider() interfaces.LLMProvider {
+	return interfaces.LLMProviderOpenAI
+}
+
+func (m *MockLLMClient) IsStreamSupported() bool { return false }
+
+type mockLLMStream struct {
+	resp *interfaces.LLMResponse
+	done bool
+	err  error
+}
+
+func (s *mockLLMStream) Next() bool {
+	if s.done {
+		return false
+	}
+	s.done = true
+	return true
+}
+
+func (s *mockLLMStream) Current() *interfaces.LLMStreamChunk {
+	if s.resp == nil {
+		return nil
+	}
+	return &interfaces.LLMStreamChunk{ContentDelta: s.resp.Content, ToolCalls: s.resp.ToolCalls}
+}
+
+func (s *mockLLMStream) Err() error { return s.err }
+
+func (s *mockLLMStream) GetResult() *interfaces.LLMResponse { return s.resp }
+
+func hasToolResultMessages(request *interfaces.LLMRequest) bool {
+	if request == nil {
+		return false
+	}
+	for _, msg := range request.Messages {
+		if msg.Role == interfaces.MessageRoleTool {
+			return true
+		}
+	}
+	return false
+}
+
+func splitMockTokens(total int) (prompt, completion int) {
+	if total <= 0 {
+		return 0, 0
+	}
+	prompt = total * 3 / 5
+	completion = total - prompt
+	return prompt, completion
+}
diff --git a/eval-harness/runner/setup/mock_tool.go b/eval-harness/runner/setup/mock_tool.go
new file mode 100644
index 0000000..e63bbf8
--- /dev/null
+++ b/eval-harness/runner/setup/mock_tool.go
@@ -0,0 +1,61 @@
+package setup
+
+import (
+	"context"
+	"fmt"
+	"math/rand"
+
+	"github.com/agenticenv/agent-sdk-go/pkg/agent"
+	"github.com/agenticenv/agent-sdk-go/pkg/interfaces"
+	"github.com/agenticenv/agent-sdk-go/pkg/tools"
+)
+
+const mockToolPrefix = "eval_tool_"
+
+// MockTool is a mock tool for eval harness runs.
+type MockTool struct {
+	name string
+	cfg  ToolConfig
+	rng  *rand.Rand
+}
+
+func newMockTool(index int, cfg ToolConfig, rng *rand.Rand) *MockTool {
+	return &MockTool{
+		name: fmt.Sprintf("%s%d", mockToolPrefix, index),
+		cfg:  cfg,
+		rng:  rng,
+	}
+}
+
+func (t *MockTool) Name() string { return t.name }
+
+func (t *MockTool) DisplayName() string { return t.name }
+
+func (t *MockTool) Description() string {
+	return "Eval harness mock tool."
+}
+
+func (t *MockTool) Parameters() interfaces.JSONSchema {
+	return tools.Params(map[string]interfaces.JSONSchema{
+		"input": tools.ParamString("Input payload for the eval tool."),
+	}, "input")
+}
+
+func (t *MockTool) Execute(ctx context.Context, args map[string]any) (any, error) {
+	input, _ := args["input"].(string)
+	if input == "" {
+		input = "eval"
+	}
+	return map[string]any{"tool": t.name, "input": input, "status": "ok"}, nil
+}
+
+// RegisterMockTools registers count mock tools on a new registry.
+func RegisterMockTools(count int, cfg ToolConfig, rng *rand.Rand) agent.ToolRegistry {
+	reg := agent.NewToolRegistry()
+	for i := 1; i <= count; i++ {
+		if err := reg.Register(newMockTool(i, cfg, rng)); err != nil {
+			panic(err)
+		}
+	}
+	return reg
+}
diff --git a/examples/.env.defaults b/examples/.env.defaults
index faf076b..a722111 100644
--- a/examples/.env.defaults
+++ b/examples/.env.defaults
@@ -7,6 +7,11 @@
 # error | warn | info | debug
 LOG_LEVEL=error
 
+# --- Example run output (examples/shared PrintRunFooters) ---
+# Set to true to print token usage and/or run telemetry after each agent run.
+SHOW_LLM_USAGE=false
+SHOW_TELEMETRY=false
+
 # --- Agent runtime ---
 # local = in-process (default). temporal = requires Temporal server (see temporal-setup.md).
 AGENT_RUNTIME=local
diff --git a/examples/README.md b/examples/README.md
index e8c654c..3476cb2 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -270,6 +270,23 @@ Examples send conversation (user prompt, assistant response) to **stdout** and i
   go run ./simple_agent "Hello" 2>/dev/null
   ```
 
+## Run output
+
+All examples call [`shared.PrintRunFooters`](shared/utils.go) after each run. Set these in `examples/.env` (defaults in [`.env.defaults`](.env.defaults)) to print formatted footers:
+
+| Env var | Default | When `true` |
+|---------|---------|-------------|
+| `SHOW_LLM_USAGE` | `false` | Prints token usage (`prompt_tokens`, `completion_tokens`, etc.) |
+| `SHOW_TELEMETRY` | `false` | Prints run telemetry (`total_llm_calls`, tool counts, retriever searches, etc.) |
+
+```bash
+SHOW_LLM_USAGE=true go run ./simple_agent "Hello, what can you do?"
+SHOW_TELEMETRY=true go run ./simple_agent "Hello, what can you do?"
+SHOW_LLM_USAGE=true SHOW_TELEMETRY=true go run ./agent_with_stream "What's 17 * 23?"
+```
+
+For retriever examples, `SHOW_TELEMETRY=true` also prints prefetch/agentic search breakdowns — see [agent_with_retriever/README.md](agent_with_retriever/README.md).
+
 ## Env vars
 
 | Env var | Description |
@@ -283,6 +300,8 @@ Examples send conversation (user prompt, assistant response) to **stdout** and i
 | `LLM_MODEL` | e.g. `gpt-4o`, `claude-3-5-sonnet-20241022` |
 | `LLM_BASEURL` | Optional (custom/proxy endpoints) |
 | `LOG_LEVEL` | `error` (default), `warn`, `info`, `debug` — logs go to stderr |
+| `SHOW_LLM_USAGE` | Set to `true` to print token usage footer after each run (default: `false`) |
+| `SHOW_TELEMETRY` | Set to `true` to print run telemetry footer after each run (default: `false`) |
 | `SERPER_API_KEY` | For search tool |
 | `MCP_TRANSPORT` | **Required** for MCP examples: `stdio` or `streamable_http` (aliases: `local`, `http`, `remote`, …) |
 | `MCP_SERVER_NAME` | Optional server id for wiring (defaults: `local` for stdio, `remote` for HTTP) |
diff --git a/examples/agent_with_a2a_client/main.go b/examples/agent_with_a2a_client/main.go
index 3a57ab8..20863f3 100644
--- a/examples/agent_with_a2a_client/main.go
+++ b/examples/agent_with_a2a_client/main.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	a2aclient "github.com/agenticenv/agent-sdk-go/pkg/a2a/client"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 )
@@ -79,4 +80,5 @@ func main() {
 		return
 	}
 	fmt.Println("assistant:", result.Content)
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_a2a_config/main.go b/examples/agent_with_a2a_config/main.go
index 409093f..70a3ebe 100644
--- a/examples/agent_with_a2a_config/main.go
+++ b/examples/agent_with_a2a_config/main.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 )
 
@@ -54,4 +55,5 @@ func main() {
 		return
 	}
 	fmt.Println("assistant:", result.Content)
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_conversation/main.go b/examples/agent_with_conversation/main.go
index ff503f3..8623c09 100644
--- a/examples/agent_with_conversation/main.go
+++ b/examples/agent_with_conversation/main.go
@@ -9,6 +9,7 @@ import (
 	"strings"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 	"github.com/agenticenv/agent-sdk-go/pkg/conversation/redis"
 	"github.com/agenticenv/agent-sdk-go/pkg/tools/calculator"
@@ -96,6 +97,7 @@ func runSingleTurn(ctx context.Context, a *agent.Agent, prompt, convID string) {
 		return
 	}
 	fmt.Println("assistant:", result.Content)
+	shared.PrintRunFooters(result)
 }
 
 func runInteractive(ctx context.Context, a *agent.Agent, convID string) {
@@ -124,5 +126,6 @@ func runInteractive(ctx context.Context, a *agent.Agent, convID string) {
 			continue
 		}
 		fmt.Println("assistant:", result.Content)
+		shared.PrintRunFooters(result)
 	}
 }
diff --git a/examples/agent_with_json_response/main.go b/examples/agent_with_json_response/main.go
index 6226026..8f946f9 100644
--- a/examples/agent_with_json_response/main.go
+++ b/examples/agent_with_json_response/main.go
@@ -9,6 +9,7 @@ import (
 	"strings"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 	"github.com/agenticenv/agent-sdk-go/pkg/interfaces"
 )
@@ -81,4 +82,5 @@ func main() {
 		return
 	}
 	fmt.Printf("assistant (JSON):\n%s\n", string(pretty))
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_mcp_client/main.go b/examples/agent_with_mcp_client/main.go
index 8087c02..2f6ea3a 100644
--- a/examples/agent_with_mcp_client/main.go
+++ b/examples/agent_with_mcp_client/main.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 	mcpclient "github.com/agenticenv/agent-sdk-go/pkg/mcp/client"
 )
@@ -74,4 +75,5 @@ func main() {
 		return
 	}
 	fmt.Println("assistant:", result.Content)
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_mcp_config/main.go b/examples/agent_with_mcp_config/main.go
index fa53816..910742a 100644
--- a/examples/agent_with_mcp_config/main.go
+++ b/examples/agent_with_mcp_config/main.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 )
 
@@ -70,4 +71,5 @@ func main() {
 		return
 	}
 	fmt.Println("assistant:", result.Content)
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_observability/config/main.go b/examples/agent_with_observability/config/main.go
index e88a6f2..60e7762 100644
--- a/examples/agent_with_observability/config/main.go
+++ b/examples/agent_with_observability/config/main.go
@@ -15,6 +15,7 @@ import (
 
 	excfg "github.com/agenticenv/agent-sdk-go/examples"
 	"github.com/agenticenv/agent-sdk-go/examples/agent_with_observability/setup"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 )
 
@@ -50,4 +51,5 @@ func main() {
 		return
 	}
 	fmt.Printf("assistant: %s\n", result.Content)
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_observability/objects/main.go b/examples/agent_with_observability/objects/main.go
index 70f0d64..1dd8f6a 100644
--- a/examples/agent_with_observability/objects/main.go
+++ b/examples/agent_with_observability/objects/main.go
@@ -18,6 +18,7 @@ import (
 
 	excfg "github.com/agenticenv/agent-sdk-go/examples"
 	"github.com/agenticenv/agent-sdk-go/examples/agent_with_observability/setup"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 	"github.com/agenticenv/agent-sdk-go/pkg/observability"
 )
@@ -92,4 +93,5 @@ func main() {
 		return
 	}
 	fmt.Printf("assistant: %s\n", result.Content)
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_reasoning/main.go b/examples/agent_with_reasoning/main.go
index 1a8da25..cce8386 100644
--- a/examples/agent_with_reasoning/main.go
+++ b/examples/agent_with_reasoning/main.go
@@ -114,6 +114,7 @@ func printEvent(ev agent.AgentEvent, streamedSoFar bool) {
 		if res != nil && res.Content != "" && !streamedSoFar {
 			fmt.Printf("\n[complete] %s\n", res.Content)
 		}
+		shared.PrintRunFooters(res)
 	default:
 		// Ignore tool events (none registered).
 	}
diff --git a/examples/agent_with_retriever/README.md b/examples/agent_with_retriever/README.md
index 0eaaafa..dbf0878 100644
--- a/examples/agent_with_retriever/README.md
+++ b/examples/agent_with_retriever/README.md
@@ -69,6 +69,12 @@ go run ./agent_with_retriever/weaviate "What is the return policy?"
 RETRIEVER_MODE=prefetch go run ./agent_with_retriever/weaviate "What is the return policy?"
 ```
 
+Add `SHOW_TELEMETRY=true` to see retriever search counts (total, failed, prefetch/agentic breakdown) printed after the run:
+
+```bash
+SHOW_TELEMETRY=true go run ./agent_with_retriever/weaviate "What is the return policy?"
+```
+
 ### Weaviate troubleshooting
 
 | Symptom | What to do |
@@ -122,6 +128,12 @@ go run ./agent_with_retriever/pgvector "What is the return policy?"
 RETRIEVER_MODE=prefetch go run ./agent_with_retriever/pgvector "What is the return policy?"
 ```
 
+Add `SHOW_TELEMETRY=true` to see retriever search counts (total, failed, prefetch/agentic breakdown) printed after the run:
+
+```bash
+SHOW_TELEMETRY=true go run ./agent_with_retriever/pgvector "What is the return policy?"
+```
+
 ### pgvector troubleshooting
 
 | Symptom | What to do |
diff --git a/examples/agent_with_retriever/pgvector/main.go b/examples/agent_with_retriever/pgvector/main.go
index 107dc9c..efdeae9 100644
--- a/examples/agent_with_retriever/pgvector/main.go
+++ b/examples/agent_with_retriever/pgvector/main.go
@@ -16,6 +16,7 @@ import (
 
 	examplecfg "github.com/agenticenv/agent-sdk-go/examples"
 	"github.com/agenticenv/agent-sdk-go/examples/agent_with_retriever/common"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 	pgretriever "github.com/agenticenv/agent-sdk-go/pkg/retriever/pgvector"
 )
@@ -87,4 +88,5 @@ func main() {
 		return
 	}
 	fmt.Println("assistant:", result.Content)
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_retriever/weaviate/main.go b/examples/agent_with_retriever/weaviate/main.go
index fb89da3..c6dc3ba 100644
--- a/examples/agent_with_retriever/weaviate/main.go
+++ b/examples/agent_with_retriever/weaviate/main.go
@@ -16,6 +16,7 @@ import (
 
 	examplecfg "github.com/agenticenv/agent-sdk-go/examples"
 	"github.com/agenticenv/agent-sdk-go/examples/agent_with_retriever/common"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 	weaviate "github.com/agenticenv/agent-sdk-go/pkg/retriever/weaviate"
 )
@@ -77,4 +78,5 @@ func main() {
 		return
 	}
 	fmt.Println("assistant:", result.Content)
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_run_async/main.go b/examples/agent_with_run_async/main.go
index df47395..efb840f 100644
--- a/examples/agent_with_run_async/main.go
+++ b/examples/agent_with_run_async/main.go
@@ -12,6 +12,7 @@ import (
 	"strings"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 	"github.com/agenticenv/agent-sdk-go/pkg/tools/calculator"
 	"github.com/agenticenv/agent-sdk-go/pkg/tools/echo"
@@ -82,6 +83,7 @@ func main() {
 		return
 	}
 	fmt.Println("agent:", res.Result.Content)
+	shared.PrintRunFooters(res.Result)
 }
 
 func makeApprovalHandler(lineCh <-chan string) agent.ApprovalHandler {
diff --git a/examples/agent_with_stream/main.go b/examples/agent_with_stream/main.go
index fa6a22f..ad3987f 100644
--- a/examples/agent_with_stream/main.go
+++ b/examples/agent_with_stream/main.go
@@ -141,10 +141,7 @@ func printEvent(ev agent.AgentEvent, streamedSoFar bool) {
 		} else if res != nil && res.Content != "" {
 			fmt.Printf("[%s]", eventType)
 		}
-		if u := shared.UsageFooter(res); u != "" {
-			fmt.Println()
-			fmt.Println(u)
-		}
+		shared.PrintRunFooters(res)
 	default:
 		//fmt.Printf("[%s] %+v\n", ev.Type(), ev)
 		return
diff --git a/examples/agent_with_stream_conversation/main.go b/examples/agent_with_stream_conversation/main.go
index 1ca885d..4a60fe9 100644
--- a/examples/agent_with_stream_conversation/main.go
+++ b/examples/agent_with_stream_conversation/main.go
@@ -172,13 +172,15 @@ func printEvent(ev agent.AgentEvent, streamedContent bool) {
 		}
 	case agent.AgentEventTypeRunFinished:
 		res := shared.RunResultFromFinishedEvent(ev)
-		if res != nil && res.Content != "" && !streamedContent {
-			fmt.Printf("\n[%s] %s\n", eventType, res.Content)
+		if res == nil {
+			return
 		}
-		if u := shared.UsageFooter(res); u != "" {
-			fmt.Println()
-			fmt.Println(u)
+		if res.Content != "" && !streamedContent {
+			fmt.Printf("\n[%s] %s\n", eventType, res.Content)
+		} else {
+			fmt.Printf("\n[%s]\n", eventType)
 		}
+		shared.PrintRunFooters(res)
 	default:
 		//fmt.Printf("[%s] %+v\n", eventType, ev)
 		return
diff --git a/examples/agent_with_subagents/main.go b/examples/agent_with_subagents/main.go
index 6d1b0e9..466cb47 100644
--- a/examples/agent_with_subagents/main.go
+++ b/examples/agent_with_subagents/main.go
@@ -182,6 +182,7 @@ func main() {
 				who = "agent"
 			}
 			fmt.Printf("\n[%s] [%s complete] %s\n", eventType, who, res.Content)
+			shared.PrintRunFooters(res)
 		}
 	}
 }
diff --git a/examples/agent_with_temporal_client/main.go b/examples/agent_with_temporal_client/main.go
index b5c9fb5..1035889 100644
--- a/examples/agent_with_temporal_client/main.go
+++ b/examples/agent_with_temporal_client/main.go
@@ -13,6 +13,7 @@ import (
 	"strings"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/internal/runtime/temporal"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 	"go.temporal.io/sdk/client"
@@ -65,4 +66,5 @@ func main() {
 		return
 	}
 	fmt.Println("assistant:", result.Content)
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_tools/approval/main.go b/examples/agent_with_tools/approval/main.go
index 22f1e97..785afc2 100644
--- a/examples/agent_with_tools/approval/main.go
+++ b/examples/agent_with_tools/approval/main.go
@@ -10,6 +10,7 @@ import (
 	"strings"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 	"github.com/agenticenv/agent-sdk-go/pkg/tools/calculator"
 	"github.com/agenticenv/agent-sdk-go/pkg/tools/echo"
@@ -69,6 +70,7 @@ func main() {
 		return
 	}
 	fmt.Println("agent:", result.Content)
+	shared.PrintRunFooters(result)
 }
 
 func makeApprovalHandler(lineCh <-chan string) agent.ApprovalHandler {
diff --git a/examples/agent_with_tools/authorizer/main.go b/examples/agent_with_tools/authorizer/main.go
index 4cc27c8..c67908a 100644
--- a/examples/agent_with_tools/authorizer/main.go
+++ b/examples/agent_with_tools/authorizer/main.go
@@ -114,6 +114,7 @@ func printEvent(ev agent.AgentEvent, streamedContent bool) {
 			}
 			fmt.Printf("\n[%s complete] %s\n", who, res.Content)
 		}
+		shared.PrintRunFooters(res)
 	default:
 		//fmt.Printf("[%s] %+v\n", ev.Type(), ev)
 		return
diff --git a/examples/agent_with_tools/basic/main.go b/examples/agent_with_tools/basic/main.go
index 2e7958a..488377a 100644
--- a/examples/agent_with_tools/basic/main.go
+++ b/examples/agent_with_tools/basic/main.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 	"github.com/agenticenv/agent-sdk-go/pkg/tools/calculator"
 	"github.com/agenticenv/agent-sdk-go/pkg/tools/currenttime"
@@ -67,4 +68,5 @@ func main() {
 		return
 	}
 	fmt.Println("agent:", result.Content)
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_tools/custom/main.go b/examples/agent_with_tools/custom/main.go
index 9a14dd1..03080f0 100644
--- a/examples/agent_with_tools/custom/main.go
+++ b/examples/agent_with_tools/custom/main.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 )
 
@@ -50,4 +51,5 @@ func main() {
 		return
 	}
 	fmt.Println("agent:", result.Content)
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_tools/dynamic_registry/main.go b/examples/agent_with_tools/dynamic_registry/main.go
index 3e36e63..edd0ad2 100644
--- a/examples/agent_with_tools/dynamic_registry/main.go
+++ b/examples/agent_with_tools/dynamic_registry/main.go
@@ -6,6 +6,7 @@ import (
 	"log"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 	"github.com/agenticenv/agent-sdk-go/pkg/tools/calculator"
 	"github.com/agenticenv/agent-sdk-go/pkg/tools/echo"
@@ -51,6 +52,7 @@ func main() {
 		log.Printf("run 1 failed: %v", err)
 	} else {
 		fmt.Println("agent:", result.Content)
+		shared.PrintRunFooters(result)
 	}
 
 	if err := a.ToolRegistry().Register(calculator.New()); err != nil {
@@ -66,4 +68,5 @@ func main() {
 		return
 	}
 	fmt.Println("agent:", result.Content)
+	shared.PrintRunFooters(result)
 }
diff --git a/examples/agent_with_worker/agent/main.go b/examples/agent_with_worker/agent/main.go
index 884ba35..d3c490e 100644
--- a/examples/agent_with_worker/agent/main.go
+++ b/examples/agent_with_worker/agent/main.go
@@ -153,9 +153,7 @@ func runStream(ctx context.Context, a *agent.Agent, scanner *bufio.Scanner, prom
 			} else {
 				fmt.Println("[complete]")
 			}
-			if u := shared.UsageFooter(res); u != "" {
-				fmt.Println(u)
-			}
+			shared.PrintRunFooters(res)
 
 		default:
 			//fmt.Printf("[%s] %+v\n", ev.Type(), ev)
diff --git a/examples/config.go b/examples/config.go
index 71907da..ff47a9d 100644
--- a/examples/config.go
+++ b/examples/config.go
@@ -125,15 +125,13 @@ func getEnvInt(key string, def int) int {
 	return def
 }
 
-// ToolApprovalOptions applies AutoToolApprovalPolicy when EXAMPLES_AUTO_APPROVE
-// is set (task batch runs). Manual go run leaves it unset (default require-all + prompts).
+// ToolApprovalOptions applies AutoToolApprovalPolicy when EXAMPLES_AUTO_APPROVE=true
+// (task batch runs). Manual go run leaves it unset or false (default require-all + prompts).
 func ToolApprovalOptions() []agent.Option {
-	switch strings.ToLower(strings.TrimSpace(os.Getenv("EXAMPLES_AUTO_APPROVE"))) {
-	case "1", "true", "yes", "y":
+	if strings.EqualFold(strings.TrimSpace(os.Getenv("EXAMPLES_AUTO_APPROVE")), "true") {
 		return []agent.Option{agent.WithToolApprovalPolicy(agent.AutoToolApprovalPolicy())}
-	default:
-		return nil
 	}
+	return nil
 }
 
 func init() {
diff --git a/examples/durable_agent/agent/main.go b/examples/durable_agent/agent/main.go
index a4cf4ee..e50ab76 100644
--- a/examples/durable_agent/agent/main.go
+++ b/examples/durable_agent/agent/main.go
@@ -163,9 +163,7 @@ func runStream(ctx context.Context, a *agent.Agent, scanner *bufio.Scanner, prom
 			} else {
 				fmt.Println("[complete]")
 			}
-			if u := shared.UsageFooter(res); u != "" {
-				fmt.Println(u)
-			}
+			shared.PrintRunFooters(res)
 
 		default:
 			//fmt.Printf("[%s] %+v\n", ev.Type(), ev)
diff --git a/examples/multiple_agents/main.go b/examples/multiple_agents/main.go
index 452369f..9233fb5 100644
--- a/examples/multiple_agents/main.go
+++ b/examples/multiple_agents/main.go
@@ -9,6 +9,7 @@ import (
 	"sync"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 )
 
@@ -65,6 +66,7 @@ func main() {
 			return
 		}
 		fmt.Printf("%s: %s\n", name, result.Content)
+		shared.PrintRunFooters(result)
 	}
 
 	if mode == "concurrent" {
diff --git a/examples/shared/utils.go b/examples/shared/utils.go
index 7ff8d0e..0085db2 100644
--- a/examples/shared/utils.go
+++ b/examples/shared/utils.go
@@ -2,7 +2,10 @@ package shared
 
 import (
 	"fmt"
+	"os"
+	"sort"
 	"strings"
+	"time"
 
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 )
@@ -16,8 +19,7 @@ func RunResultFromFinishedEvent(ev agent.AgentEvent) *agent.AgentRunResult {
 	if !ok || fin == nil {
 		return nil
 	}
-	res, _ := fin.Result.(*agent.AgentRunResult)
-	return res
+	return fin.Result
 }
 
 // ToolApprovalValueFromEvent returns the CUSTOM tool-approval payload when ev is that stream event.
@@ -59,19 +61,111 @@ func MarksStreamDelta(ev agent.AgentEvent) bool {
 	}
 }
 
-// UsageFooter returns a non-empty line describing token usage from a finished run, or "".
-func UsageFooter(res *agent.AgentRunResult) string {
-	if res == nil || res.Usage == nil {
+// ShowLLMUsage reports whether examples should print token usage (SHOW_LLM_USAGE; default false).
+func ShowLLMUsage() bool {
+	return envBool("SHOW_LLM_USAGE")
+}
+
+// ShowTelemetry reports whether examples should print run telemetry (SHOW_TELEMETRY; default false).
+func ShowTelemetry() bool {
+	return envBool("SHOW_TELEMETRY")
+}
+
+// PrintRunFooters prints usage and telemetry when SHOW_LLM_USAGE / SHOW_TELEMETRY are enabled.
+func PrintRunFooters(result *agent.AgentRunResult) {
+	if result == nil {
+		return
+	}
+	if ShowLLMUsage() {
+		if footer := LLMUsageFooter(result.LLMUsage); footer != "" {
+			fmt.Println(footer)
+		}
+	}
+	if ShowTelemetry() {
+		if footer := TelemetryFooter(result.Telemetry); footer != "" {
+			fmt.Println(footer)
+		}
+	}
+}
+
+func envBool(key string) bool {
+	return strings.EqualFold(strings.TrimSpace(os.Getenv(key)), "true")
+}
+
+// LLMUsageFooter returns a multi-line block describing token usage, or "".
+func LLMUsageFooter(llmUsage *agent.LLMUsage) string {
+	if llmUsage == nil {
 		return ""
 	}
-	u := res.Usage
-	b := strings.Builder{}
-	fmt.Fprintf(&b, "[USAGE] prompt=%d completion=%d total=%d", u.PromptTokens, u.CompletionTokens, u.TotalTokens)
-	if u.CachedPromptTokens > 0 {
-		fmt.Fprintf(&b, " cached_prompt=%d", u.CachedPromptTokens)
+	lines := []string{
+		"\n[USAGE]",
+		fmt.Sprintf("  prompt_tokens:     %d", llmUsage.PromptTokens),
+		fmt.Sprintf("  completion_tokens: %d", llmUsage.CompletionTokens),
+		fmt.Sprintf("  total_tokens:      %d", llmUsage.TotalTokens),
 	}
-	if u.ReasoningTokens > 0 {
-		fmt.Fprintf(&b, " reasoning=%d", u.ReasoningTokens)
+	if llmUsage.CachedPromptTokens > 0 {
+		lines = append(lines, fmt.Sprintf("  cached_prompt:     %d", llmUsage.CachedPromptTokens))
+	}
+	if llmUsage.ReasoningTokens > 0 {
+		lines = append(lines, fmt.Sprintf("  reasoning_tokens:  %d", llmUsage.ReasoningTokens))
+	}
+	return strings.Join(lines, "\n")
+}
+
+// TelemetryFooter returns a multi-line block describing run telemetry, or "".
+func TelemetryFooter(telemetry *agent.AgentTelemetry) string {
+	if telemetry == nil {
+		return ""
+	}
+	lines := []string{
+		"[TELEMETRY RUN]",
+		fmt.Sprintf("  total_llm_calls: %d", telemetry.Run.TotalLLMCalls),
+		fmt.Sprintf("  started_at:      %s", formatTelemetryTime(telemetry.Run.StartedAt)),
+		fmt.Sprintf("  completed_at:    %s", formatTelemetryTime(telemetry.Run.CompletedAt)),
+	}
+	if telemetry.Run.FinishReason != "" {
+		lines = append(lines, fmt.Sprintf("  finish_reason:   %s", telemetry.Run.FinishReason))
+	}
+
+	lines = append(lines,
+		"[TELEMETRY TOOLS]",
+		fmt.Sprintf("  total_calls:  %d", telemetry.Tools.TotalCalls),
+		fmt.Sprintf("  failed_calls: %d", telemetry.Tools.FailedCalls),
+	)
+	if len(telemetry.Tools.Breakdown) > 0 {
+		lines = append(lines, "  breakdown:")
+		for _, name := range sortedKeys(telemetry.Tools.Breakdown) {
+			lines = append(lines, fmt.Sprintf("    %s: %d", name, telemetry.Tools.Breakdown[name]))
+		}
+	}
+	if len(telemetry.Tools.FailedBreakdown) > 0 {
+		lines = append(lines, "  failed_breakdown:")
+		for _, name := range sortedKeys(telemetry.Tools.FailedBreakdown) {
+			lines = append(lines, fmt.Sprintf("    %s: %d", name, telemetry.Tools.FailedBreakdown[name]))
+		}
+	}
+	lines = append(lines,
+		"[TELEMETRY STORAGE]",
+		fmt.Sprintf("  total_retriever_searches: %d", telemetry.Storage.TotalRetrieverSearches),
+		fmt.Sprintf("  failed_retriever_searches: %d", telemetry.Storage.FailedRetrieverSearches),
+		fmt.Sprintf("  prefetch_searches: %d", telemetry.Storage.PrefetchSearches),
+		fmt.Sprintf("  agentic_searches: %d", telemetry.Storage.AgenticSearches),
+	)
+	return strings.Join(lines, "\n")
+}
+
+func formatTelemetryTime(t time.Time) string {
+	if t.IsZero() {
+		return "-"
+	}
+	return t.UTC().Format(time.RFC3339)
+}
+
+func sortedKeys(m map[string]int64) []string {
+	keys := make([]string, 0, len(m))
+	for k := range m {
+		keys = append(keys, k)
 	}
-	return b.String()
+	sort.Strings(keys)
+	return keys
 }
diff --git a/examples/simple_agent/main.go b/examples/simple_agent/main.go
index 2268276..7d8e932 100644
--- a/examples/simple_agent/main.go
+++ b/examples/simple_agent/main.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 
 	config "github.com/agenticenv/agent-sdk-go/examples"
+	"github.com/agenticenv/agent-sdk-go/examples/shared"
 	"github.com/agenticenv/agent-sdk-go/pkg/agent"
 )
 
@@ -45,14 +46,5 @@ func main() {
 		return
 	}
 	fmt.Printf("assistant: %s\n", result.Content)
-	if result.Usage != nil {
-		fmt.Printf("\nusage: prompt=%d completion=%d total=%d", result.Usage.PromptTokens, result.Usage.CompletionTokens, result.Usage.TotalTokens)
-		if result.Usage.CachedPromptTokens > 0 {
-			fmt.Printf(" cached_prompt=%d", result.Usage.CachedPromptTokens)
-		}
-		if result.Usage.ReasoningTokens > 0 {
-			fmt.Printf(" reasoning=%d", result.Usage.ReasoningTokens)
-		}
-		fmt.Println()
-	}
+	shared.PrintRunFooters(result)
 }
diff --git a/internal/events/events_test.go b/internal/events/events_test.go
index 81ec8b8..ce3b4cf 100644
--- a/internal/events/events_test.go
+++ b/internal/events/events_test.go
@@ -4,6 +4,8 @@ import (
 	"encoding/json"
 	"strings"
 	"testing"
+
+	"github.com/agenticenv/agent-sdk-go/internal/types"
 )
 
 func TestConstructorsAndRoundTripForAllEventTypes(t *testing.T) {
@@ -26,7 +28,7 @@ func TestConstructorsAndRoundTripForAllEventTypes(t *testing.T) {
 		},
 		{
 			name:     "run_finished_with_result",
-			event:    NewAgentRunFinishedEvent("thread-2", "run-2", map[string]any{"ok": true}),
+			event:    NewAgentRunFinishedEvent("thread-2", "run-2", &types.AgentRunResult{Content: "ok"}),
 			wantType: AgentEventTypeRunFinished,
 			assertFields: func(t *testing.T, raw map[string]any) {
 				t.Helper()
@@ -263,6 +265,37 @@ func TestConstructorsAndRoundTripForAllEventTypes(t *testing.T) {
 	}
 }
 
+func TestRunFinishedResultRoundTrip(t *testing.T) {
+	orig := NewAgentRunFinishedEvent("thread-1", "run-1", &types.AgentRunResult{
+		Content:   "hello",
+		AgentName: "agent-a",
+		Model:     "gpt-test",
+		LLMUsage:  &types.LLMUsage{TotalTokens: 42},
+	})
+	data, err := orig.ToJSON()
+	if err != nil {
+		t.Fatalf("ToJSON error: %v", err)
+	}
+
+	decoded, err := EventFromJSON(data)
+	if err != nil {
+		t.Fatalf("EventFromJSON error: %v", err)
+	}
+	fin, ok := decoded.(*AgentRunFinishedEvent)
+	if !ok || fin == nil {
+		t.Fatalf("decoded type: %T", decoded)
+	}
+	if fin.Result == nil {
+		t.Fatal("decoded Result is nil")
+	}
+	if fin.Result.Content != "hello" || fin.Result.AgentName != "agent-a" || fin.Result.Model != "gpt-test" {
+		t.Fatalf("unexpected result: %#v", fin.Result)
+	}
+	if fin.Result.LLMUsage == nil || fin.Result.LLMUsage.TotalTokens != 42 {
+		t.Fatalf("unexpected llm usage: %#v", fin.Result.LLMUsage)
+	}
+}
+
 func TestEventTypeFromJSON(t *testing.T) {
 	data := []byte(`{"type":"TOOL_CALL_END","toolCallId":"x"}`)
 	typ, err := EventTypeFromJSON(data)
diff --git a/internal/events/lifecycle_events.go b/internal/events/lifecycle_events.go
index cb04aaa..7a9a70d 100644
--- a/internal/events/lifecycle_events.go
+++ b/internal/events/lifecycle_events.go
@@ -1,6 +1,10 @@
 package events
 
-import "encoding/json"
+import (
+	"encoding/json"
+
+	"github.com/agenticenv/agent-sdk-go/internal/types"
+)
 
 // RUN_STARTED
 type AgentRunStartedEvent struct {
@@ -27,12 +31,12 @@ func (e *AgentRunStartedEvent) ToJSON() ([]byte, error) { return json.Marshal(e)
 // RUN_FINISHED
 type AgentRunFinishedEvent struct {
 	*BaseEvent
-	ThreadID string `json:"threadId"`
-	RunID    string `json:"runId"`
-	Result   any    `json:"result,omitempty"` // ← any not json.RawMessage
+	ThreadID string                `json:"threadId"`
+	RunID    string                `json:"runId"`
+	Result   *types.AgentRunResult `json:"result,omitempty"`
 }
 
-func NewAgentRunFinishedEvent(threadID, runID string, result any) *AgentRunFinishedEvent {
+func NewAgentRunFinishedEvent(threadID, runID string, result *types.AgentRunResult) *AgentRunFinishedEvent {
 	return &AgentRunFinishedEvent{
 		BaseEvent: NewBaseEvent(AgentEventTypeRunFinished),
 		ThreadID:  threadID,
diff --git a/internal/runtime/base/runtime.go b/internal/runtime/base/runtime.go
index 1dc0792..7a9cfa1 100644
--- a/internal/runtime/base/runtime.go
+++ b/internal/runtime/base/runtime.go
@@ -31,6 +31,17 @@ type Runtime struct {
 	ToolExecutionMode types.AgentToolExecutionMode
 }
 
+type ExecuteLLMInput struct {
+	Logger           logger.Logger
+	AgentName        string
+	MessageID        string
+	Messages         []interfaces.Message
+	SkipTools        bool
+	RetrieverContext string
+	Tools            []interfaces.Tool
+	Emit             func(events.AgentEvent)
+}
+
 // BuildLLMRequest constructs an LLMRequest from the given messages and options.
 // When retrieverContext is non-empty it is appended to the system prompt (prefetch/hybrid mode).
 // tools is the per-run resolved tool list from [runtime.ExecuteRequest] or activity resolve.
@@ -116,6 +127,7 @@ func (rt *Runtime) llmResponseToResult(resp *interfaces.LLMResponse, tools []int
 			ToolCallID:      tc.ToolCallID,
 			ToolName:        tc.ToolName,
 			ToolDisplayName: displayName,
+			ToolKind:        types.KindOf(tool),
 			Args:            tc.Args,
 			NeedsApproval:   rt.RequiresApproval(tool),
 		})
@@ -133,17 +145,8 @@ func emitEvent(fn func(events.AgentEvent), ev events.AgentEvent) {
 // ExecuteLLM calls the LLM in non-streaming mode, records metrics and traces, emits
 // TEXT_MESSAGE_START / TEXT_MESSAGE_CONTENT / TEXT_MESSAGE_END events, and returns LLMResult.
 // messageID and agentName are used only for event construction; emit may be nil.
-func (rt *Runtime) ExecuteLLM(
-	ctx context.Context,
-	log logger.Logger,
-	agentName, messageID string,
-	messages []interfaces.Message,
-	skipTools bool,
-	retrieverContext string,
-	tools []interfaces.Tool,
-	emit func(events.AgentEvent),
-) (*LLMResult, error) {
-	req := rt.BuildLLMRequest(messages, skipTools, retrieverContext, tools)
+func (rt *Runtime) ExecuteLLM(ctx context.Context, input ExecuteLLMInput) (*LLMResult, error) {
+	req := rt.BuildLLMRequest(input.Messages, input.SkipTools, input.RetrieverContext, input.Tools)
 
 	llmClient := rt.AgentConfig.LLM.Client
 	model := llmClient.GetModel()
@@ -151,14 +154,14 @@ func (rt *Runtime) ExecuteLLM(
 	modelAttr := interfaces.Attribute{Key: types.MetricAttrModel, Value: model}
 	providerAttr := interfaces.Attribute{Key: types.MetricAttrProvider, Value: provider}
 
-	log.Debug(ctx, "runtime: LLM generate started", slog.String("scope", "runtime"), slog.Int("messageCount", len(messages)))
+	input.Logger.Debug(ctx, "runtime: LLM generate started", slog.String("scope", "runtime"), slog.Int("messageCount", len(input.Messages)))
 
 	rt.Metrics.IncrementCounter(ctx, types.MetricLLMCallStarted, modelAttr, providerAttr)
 	llmStart := time.Now()
 
 	ctx, sp := rt.Tracer.StartSpan(ctx, "llm.generate",
-		interfaces.Attribute{Key: "agent.name", Value: strings.TrimSpace(agentName)},
-		interfaces.Attribute{Key: "message.count", Value: len(messages)},
+		interfaces.Attribute{Key: "agent.name", Value: strings.TrimSpace(input.AgentName)},
+		interfaces.Attribute{Key: "message.count", Value: len(input.Messages)},
 		modelAttr,
 		providerAttr,
 	)
@@ -180,16 +183,16 @@ func (rt *Runtime) ExecuteLLM(
 		rt.Metrics.RecordHistogram(ctx, types.MetricLLMTokensOutput, float64(resp.Usage.CompletionTokens), modelAttr, providerAttr)
 	}
 
-	log.Debug(ctx, "runtime: LLM generate completed", slog.String("scope", "runtime"), slog.Int("messageCount", len(messages)))
+	input.Logger.Debug(ctx, "runtime: LLM generate completed", slog.String("scope", "runtime"), slog.Int("messageCount", len(input.Messages)))
 
-	result, err := rt.llmResponseToResult(resp, tools)
+	result, err := rt.llmResponseToResult(resp, input.Tools)
 	if err != nil {
 		return nil, err
 	}
 
-	emitEvent(emit, events.NewAgentTextMessageStartEvent(messageID, string(interfaces.MessageRoleAssistant)))
-	emitEvent(emit, events.NewAgentTextMessageContentEvent(messageID, result.Content))
-	emitEvent(emit, events.NewAgentTextMessageEndEvent(messageID))
+	emitEvent(input.Emit, events.NewAgentTextMessageStartEvent(input.MessageID, string(interfaces.MessageRoleAssistant)))
+	emitEvent(input.Emit, events.NewAgentTextMessageContentEvent(input.MessageID, result.Content))
+	emitEvent(input.Emit, events.NewAgentTextMessageEndEvent(input.MessageID))
 	return result, nil
 }
 
@@ -197,17 +200,8 @@ func (rt *Runtime) ExecuteLLM(
 // it falls back to Generate automatically. Delta events (text content, reasoning) are emitted via
 // emit as chunks arrive; a final TEXT_MESSAGE_START/CONTENT/END triple is emitted for non-streaming
 // fallback. emit may be nil.
-func (rt *Runtime) ExecuteLLMStream(
-	ctx context.Context,
-	log logger.Logger,
-	agentName, messageID string,
-	messages []interfaces.Message,
-	skipTools bool,
-	retrieverContext string,
-	tools []interfaces.Tool,
-	emit func(events.AgentEvent),
-) (*LLMResult, error) {
-	req := rt.BuildLLMRequest(messages, skipTools, retrieverContext, tools)
+func (rt *Runtime) ExecuteLLMStream(ctx context.Context, input ExecuteLLMInput) (*LLMResult, error) {
+	req := rt.BuildLLMRequest(input.Messages, input.SkipTools, input.RetrieverContext, input.Tools)
 
 	llmClient := rt.AgentConfig.LLM.Client
 	model := llmClient.GetModel()
@@ -220,8 +214,8 @@ func (rt *Runtime) ExecuteLLMStream(
 	llmStart := time.Now()
 
 	ctx, sp := rt.Tracer.StartSpan(ctx, "llm.stream",
-		interfaces.Attribute{Key: "agent.name", Value: strings.TrimSpace(agentName)},
-		interfaces.Attribute{Key: "message.count", Value: len(messages)},
+		interfaces.Attribute{Key: "agent.name", Value: strings.TrimSpace(input.AgentName)},
+		interfaces.Attribute{Key: "message.count", Value: len(input.Messages)},
 		interfaces.Attribute{Key: "streaming", Value: isStreamSupported},
 		modelAttr,
 		providerAttr,
@@ -234,14 +228,14 @@ func (rt *Runtime) ExecuteLLMStream(
 		if textMsgOpen {
 			return
 		}
-		emitEvent(emit, events.NewAgentTextMessageStartEvent(messageID, string(interfaces.MessageRoleAssistant)))
+		emitEvent(input.Emit, events.NewAgentTextMessageStartEvent(input.MessageID, string(interfaces.MessageRoleAssistant)))
 		textMsgOpen = true
 	}
 	closeTextMsg := func() {
 		if !textMsgOpen {
 			return
 		}
-		emitEvent(emit, events.NewAgentTextMessageEndEvent(messageID))
+		emitEvent(input.Emit, events.NewAgentTextMessageEndEvent(input.MessageID))
 		textMsgOpen = false
 	}
 	// If the model never sent text chunks still emit one assistant turn (empty for tool-only).
@@ -251,13 +245,13 @@ func (rt *Runtime) ExecuteLLMStream(
 			return
 		}
 		openTextMsg()
-		emitEvent(emit, events.NewAgentTextMessageContentEvent(messageID, result.Content))
+		emitEvent(input.Emit, events.NewAgentTextMessageContentEvent(input.MessageID, result.Content))
 		closeTextMsg()
 	}
 
 	// Non-streaming fallback: use Generate and emit a complete text message.
 	if !isStreamSupported {
-		log.Debug(ctx, "runtime: LLM stream unsupported, using generate", slog.String("scope", "runtime"))
+		input.Logger.Debug(ctx, "runtime: LLM stream unsupported, using generate", slog.String("scope", "runtime"))
 		resp, err := llmClient.Generate(ctx, req)
 		llmLatency := float64(time.Since(llmStart).Milliseconds())
 		if err != nil {
@@ -266,7 +260,7 @@ func (rt *Runtime) ExecuteLLMStream(
 			rt.Metrics.RecordHistogram(ctx, types.MetricLLMLatencyMs, llmLatency, modelAttr, providerAttr)
 			return nil, err
 		}
-		result, err := rt.llmResponseToResult(resp, tools)
+		result, err := rt.llmResponseToResult(resp, input.Tools)
 		if err != nil {
 			sp.RecordError(err)
 			rt.Metrics.IncrementCounter(ctx, types.MetricLLMCallFailed, modelAttr, providerAttr)
@@ -298,11 +292,11 @@ func (rt *Runtime) ExecuteLLMStream(
 	reasoningMsgOpen := false
 	flushReasoning := func() {
 		if reasoningMsgOpen {
-			emitEvent(emit, events.NewAgentReasoningMessageEndEvent(reasoningMID))
+			emitEvent(input.Emit, events.NewAgentReasoningMessageEndEvent(reasoningMID))
 			reasoningMsgOpen = false
 		}
 		if reasoningPhaseOpen {
-			emitEvent(emit, events.NewAgentReasoningEndEvent(reasoningMID))
+			emitEvent(input.Emit, events.NewAgentReasoningEndEvent(reasoningMID))
 			reasoningPhaseOpen = false
 		}
 	}
@@ -311,9 +305,9 @@ func (rt *Runtime) ExecuteLLMStream(
 			return
 		}
 		reasoningMID = uuid.New().String()
-		emitEvent(emit, events.NewAgentReasoningStartEvent(reasoningMID))
+		emitEvent(input.Emit, events.NewAgentReasoningStartEvent(reasoningMID))
 		reasoningPhaseOpen = true
-		emitEvent(emit, events.NewAgentReasoningMessageStartEvent(reasoningMID, string(interfaces.MessageRoleReasoning)))
+		emitEvent(input.Emit, events.NewAgentReasoningMessageStartEvent(reasoningMID, string(interfaces.MessageRoleReasoning)))
 		reasoningMsgOpen = true
 	}
 
@@ -325,11 +319,11 @@ func (rt *Runtime) ExecuteLLMStream(
 		if chunk.ContentDelta != "" {
 			flushReasoning()
 			openTextMsg()
-			emitEvent(emit, events.NewAgentTextMessageContentEvent(messageID, chunk.ContentDelta))
+			emitEvent(input.Emit, events.NewAgentTextMessageContentEvent(input.MessageID, chunk.ContentDelta))
 		}
 		if chunk.ThinkingDelta != "" {
 			openReasoning()
-			emitEvent(emit, events.NewAgentReasoningMessageContentEvent(reasoningMID, chunk.ThinkingDelta))
+			emitEvent(input.Emit, events.NewAgentReasoningMessageContentEvent(reasoningMID, chunk.ThinkingDelta))
 		}
 	}
 	flushReasoning()
@@ -351,7 +345,7 @@ func (rt *Runtime) ExecuteLLMStream(
 		return nil, err
 	}
 
-	result, err := rt.llmResponseToResult(resp, tools)
+	result, err := rt.llmResponseToResult(resp, input.Tools)
 	if err != nil {
 		sp.RecordError(err)
 		rt.Metrics.IncrementCounter(ctx, types.MetricLLMCallFailed, modelAttr, providerAttr)
@@ -366,7 +360,7 @@ func (rt *Runtime) ExecuteLLMStream(
 		rt.Metrics.RecordHistogram(ctx, types.MetricLLMTokensOutput, float64(resp.Usage.CompletionTokens), modelAttr, providerAttr)
 	}
 
-	log.Debug(ctx, "runtime: LLM stream completed", slog.String("scope", "runtime"))
+	input.Logger.Debug(ctx, "runtime: LLM stream completed", slog.String("scope", "runtime"))
 	finalizeAssistantText(result)
 	return result, nil
 }
@@ -453,10 +447,10 @@ func (rt *Runtime) AuthorizeTool(ctx context.Context, log logger.Logger, tools [
 // ExecuteRetrievers runs all configured retrievers in parallel for the given query and
 // returns a combined document context string for injection into the LLM system prompt.
 // Partial failures are logged and skipped; all retrievers failing returns an error.
-func (rt *Runtime) ExecuteRetrievers(ctx context.Context, log logger.Logger, query string) (string, error) {
+func (rt *Runtime) ExecuteRetrievers(ctx context.Context, log logger.Logger, query string) (*RetrieverResult, error) {
 	retrievers := rt.AgentConfig.Retrievers.Retrievers
 	if len(retrievers) == 0 {
-		return "", nil
+		return &RetrieverResult{}, nil
 	}
 
 	log.Debug(ctx, "runtime: retriever prefetch started", slog.String("scope", "runtime"), slog.Int("retrieverCount", len(retrievers)), slog.String("query", query))
@@ -517,14 +511,15 @@ func (rt *Runtime) ExecuteRetrievers(ctx context.Context, log logger.Logger, que
 		sb.WriteString(FormatRetrieverDocs(res.docs))
 	}
 
-	if failedCount == len(retrievers) {
-		return "", fmt.Errorf("retriever prefetch: all %d retriever(s) failed", len(retrievers))
-	}
 	if failedCount > 0 {
 		log.Warn(ctx, "runtime: some retrievers failed, continuing with partial context", slog.String("scope", "runtime"), slog.Int("failed", failedCount), slog.Int("total", len(retrievers)))
 	}
 
 	retrieverContext := strings.TrimSpace(sb.String())
 	log.Debug(ctx, "runtime: retriever prefetch completed", slog.String("scope", "runtime"), slog.Int("retrieverCount", len(retrievers)), slog.Bool("hasContext", retrieverContext != ""))
-	return retrieverContext, nil
+	return &RetrieverResult{
+		Context:        retrieverContext,
+		TotalSearches:  int64(len(retrievers)),
+		FailedSearches: int64(failedCount),
+	}, nil
 }
diff --git a/internal/runtime/base/runtime_test.go b/internal/runtime/base/runtime_test.go
index db3a882..d01ed4b 100644
--- a/internal/runtime/base/runtime_test.go
+++ b/internal/runtime/base/runtime_test.go
@@ -244,7 +244,8 @@ func TestExecuteRetrievers_NoRetrievers(t *testing.T) {
 	rt := newTestRuntime(sdkruntime.AgentConfig{})
 	got, err := rt.ExecuteRetrievers(context.Background(), noopLog(), "query")
 	require.NoError(t, err)
-	require.Equal(t, "", got)
+	require.Equal(t, "", got.Context)
+	require.Equal(t, int64(0), got.TotalSearches)
 }
 
 func TestExecuteRetrievers_AllFail(t *testing.T) {
@@ -256,9 +257,11 @@ func TestExecuteRetrievers_AllFail(t *testing.T) {
 	rt := newTestRuntime(sdkruntime.AgentConfig{
 		Retrievers: sdkruntime.AgentRetrievers{Retrievers: []interfaces.Retriever{r}},
 	})
-	_, err := rt.ExecuteRetrievers(context.Background(), noopLog(), "q")
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "all")
+	got, err := rt.ExecuteRetrievers(context.Background(), noopLog(), "q")
+	require.NoError(t, err)
+	require.Equal(t, "", got.Context)
+	require.Equal(t, int64(1), got.TotalSearches)
+	require.Equal(t, int64(1), got.FailedSearches)
 }
 
 func TestExecuteRetrievers_Success(t *testing.T) {
@@ -274,7 +277,9 @@ func TestExecuteRetrievers_Success(t *testing.T) {
 	})
 	got, err := rt.ExecuteRetrievers(context.Background(), noopLog(), "my query")
 	require.NoError(t, err)
-	require.Contains(t, got, "doc content")
+	require.Contains(t, got.Context, "doc content")
+	require.Equal(t, int64(1), got.TotalSearches)
+	require.Equal(t, int64(0), got.FailedSearches)
 }
 
 // --- ExecuteLLM ---
@@ -283,7 +288,17 @@ func TestExecuteLLM_LLMError(t *testing.T) {
 	rt := newTestRuntime(sdkruntime.AgentConfig{
 		LLM: sdkruntime.AgentLLM{Client: stubLLMClient{err: errors.New("llm unavailable")}},
 	})
-	_, err := rt.ExecuteLLM(context.Background(), noopLog(), "agent", "msg-1", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg-1",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	_, err := rt.ExecuteLLM(context.Background(), input)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "llm unavailable")
 }
@@ -294,7 +309,17 @@ func TestExecuteLLM_Success_NoTools(t *testing.T) {
 			resp: &interfaces.LLMResponse{Content: "hello world"},
 		}},
 	})
-	result, err := rt.ExecuteLLM(context.Background(), noopLog(), "agent", "msg-1", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg-1",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	result, err := rt.ExecuteLLM(context.Background(), input)
 	require.NoError(t, err)
 	require.Equal(t, "hello world", result.Content)
 	require.Empty(t, result.ToolCalls)
@@ -312,7 +337,17 @@ func TestExecuteLLM_EmitsTextMessageEvents(t *testing.T) {
 		emitted = append(emitted, ev.Type())
 	}
 
-	_, err := rt.ExecuteLLM(context.Background(), noopLog(), "agent", "msg-1", nil, false, "", nil, emit)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg-1",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             emit,
+	}
+	_, err := rt.ExecuteLLM(context.Background(), input)
 	require.NoError(t, err)
 	require.Equal(t, []events.AgentEventType{
 		events.AgentEventTypeTextMessageStart,
@@ -328,7 +363,17 @@ func TestExecuteLLM_NilEmitDoesNotPanic(t *testing.T) {
 		}},
 	})
 	require.NotPanics(t, func() {
-		_, _ = rt.ExecuteLLM(context.Background(), noopLog(), "a", "m", nil, false, "", nil, nil)
+		input := ExecuteLLMInput{
+			Logger:           noopLog(),
+			AgentName:        "a",
+			MessageID:        "m",
+			Messages:         nil,
+			SkipTools:        false,
+			RetrieverContext: "",
+			Tools:            nil,
+			Emit:             nil,
+		}
+		_, _ = rt.ExecuteLLM(context.Background(), input)
 	})
 }
 
@@ -343,7 +388,17 @@ func TestExecuteLLM_UnknownToolCallReturnsError(t *testing.T) {
 			},
 		}},
 	})
-	_, err := rt.ExecuteLLM(context.Background(), noopLog(), "a", "m", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "a",
+		MessageID:        "m",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	_, err := rt.ExecuteLLM(context.Background(), input)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "unknown tool")
 }
@@ -357,7 +412,17 @@ func TestExecuteLLM_WithUsageMetrics(t *testing.T) {
 			},
 		}},
 	})
-	result, err := rt.ExecuteLLM(context.Background(), noopLog(), "a", "m", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "a",
+		MessageID:        "m",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	result, err := rt.ExecuteLLM(context.Background(), input)
 	require.NoError(t, err)
 	require.NotNil(t, result.Usage)
 	require.EqualValues(t, 10, result.Usage.PromptTokens)
@@ -380,7 +445,17 @@ func TestExecuteLLM_ToolCallWithEmptyDisplayName(t *testing.T) {
 			},
 		}},
 	})
-	result, err := rt.ExecuteLLM(context.Background(), noopLog(), "a", "m", nil, false, "", []interfaces.Tool{tool}, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "a",
+		MessageID:        "m",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            []interfaces.Tool{tool},
+		Emit:             nil,
+	}
+	result, err := rt.ExecuteLLM(context.Background(), input)
 	require.NoError(t, err)
 	require.Len(t, result.ToolCalls, 1)
 	require.Equal(t, "my-tool", result.ToolCalls[0].ToolDisplayName)
@@ -395,7 +470,17 @@ func TestExecuteLLM_NilToolCallInResponse(t *testing.T) {
 			},
 		}},
 	})
-	result, err := rt.ExecuteLLM(context.Background(), noopLog(), "a", "m", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "a",
+		MessageID:        "m",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	result, err := rt.ExecuteLLM(context.Background(), input)
 	require.NoError(t, err)
 	require.Empty(t, result.ToolCalls)
 }
@@ -443,7 +528,9 @@ func TestExecuteRetrievers_PartialFailure(t *testing.T) {
 	})
 	got, err := rt.ExecuteRetrievers(context.Background(), noopLog(), "q")
 	require.NoError(t, err) // partial is ok
-	require.Contains(t, got, "useful")
+	require.Contains(t, got.Context, "useful")
+	require.Equal(t, int64(2), got.TotalSearches)
+	require.Equal(t, int64(1), got.FailedSearches)
 }
 
 // --- ExecuteLLMStream ---
@@ -491,7 +578,17 @@ func TestExecuteLLMStream_FallbackGenerate_Success(t *testing.T) {
 			resp: &interfaces.LLMResponse{Content: "fallback answer"},
 		}},
 	})
-	result, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	result, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.NoError(t, err)
 	require.Equal(t, "fallback answer", result.Content)
 }
@@ -500,7 +597,17 @@ func TestExecuteLLMStream_FallbackGenerate_LLMError(t *testing.T) {
 	rt := newTestRuntime(sdkruntime.AgentConfig{
 		LLM: sdkruntime.AgentLLM{Client: stubLLMClient{err: errors.New("llm down")}},
 	})
-	_, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	_, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "llm down")
 }
@@ -514,7 +621,17 @@ func TestExecuteLLMStream_FallbackGenerate_EmitsEvents(t *testing.T) {
 	var emitted []events.AgentEventType
 	emit := func(ev events.AgentEvent) { emitted = append(emitted, ev.Type()) }
 
-	_, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, emit)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             emit,
+	}
+	_, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.NoError(t, err)
 	require.Equal(t, []events.AgentEventType{
 		events.AgentEventTypeTextMessageStart,
@@ -529,7 +646,17 @@ func TestExecuteLLMStream_GenerateStreamError(t *testing.T) {
 			streamErr: errors.New("stream init failed"),
 		}},
 	})
-	_, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	_, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "stream init failed")
 }
@@ -543,7 +670,17 @@ func TestExecuteLLMStream_StreamError_AfterChunks(t *testing.T) {
 	rt := newTestRuntime(sdkruntime.AgentConfig{
 		LLM: sdkruntime.AgentLLM{Client: streamCapableLLMClient{stream: s}},
 	})
-	_, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	_, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "connection reset")
 }
@@ -554,7 +691,17 @@ func TestExecuteLLMStream_StreamNilResult(t *testing.T) {
 	rt := newTestRuntime(sdkruntime.AgentConfig{
 		LLM: sdkruntime.AgentLLM{Client: streamCapableLLMClient{stream: s}},
 	})
-	_, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	_, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "stream completed without result")
 }
@@ -572,7 +719,17 @@ func TestExecuteLLMStream_TextChunks_EmitsCorrectEvents(t *testing.T) {
 	var emitted []events.AgentEventType
 	emit := func(ev events.AgentEvent) { emitted = append(emitted, ev.Type()) }
 
-	result, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, emit)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             emit,
+	}
+	result, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.NoError(t, err)
 	require.Equal(t, "hello world", result.Content)
 	require.Equal(t, events.AgentEventTypeTextMessageStart, emitted[0])
@@ -594,7 +751,17 @@ func TestExecuteLLMStream_ReasoningChunks_EmitsReasoningEvents(t *testing.T) {
 	var emitted []events.AgentEventType
 	emit := func(ev events.AgentEvent) { emitted = append(emitted, ev.Type()) }
 
-	_, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, emit)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             emit,
+	}
+	_, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.NoError(t, err)
 
 	// Reasoning events must appear before text events
@@ -626,7 +793,17 @@ func TestExecuteLLMStream_ToolOnlyResponse_EmitsEmptyAssistantTurn(t *testing.T)
 	var emitted []events.AgentEventType
 	emit := func(ev events.AgentEvent) { emitted = append(emitted, ev.Type()) }
 
-	result, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", []interfaces.Tool{tool}, emit)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            []interfaces.Tool{tool},
+		Emit:             emit,
+	}
+	result, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.NoError(t, err)
 	require.Len(t, result.ToolCalls, 1)
 	// finalizeAssistantText emits a start/content/end even when no text chunks arrived
@@ -642,7 +819,17 @@ func TestExecuteLLMStream_WithUsageMetrics(t *testing.T) {
 	rt := newTestRuntime(sdkruntime.AgentConfig{
 		LLM: sdkruntime.AgentLLM{Client: streamCapableLLMClient{stream: s}},
 	})
-	result, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	result, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.NoError(t, err)
 	require.NotNil(t, result.Usage)
 	require.EqualValues(t, 8, result.Usage.PromptTokens)
@@ -657,7 +844,17 @@ func TestExecuteLLMStream_NilChunkSkipped(t *testing.T) {
 	rt := newTestRuntime(sdkruntime.AgentConfig{
 		LLM: sdkruntime.AgentLLM{Client: streamCapableLLMClient{stream: s}},
 	})
-	result, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	result, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.NoError(t, err)
 	require.Equal(t, "text", result.Content)
 }
@@ -671,7 +868,17 @@ func TestExecuteLLMStream_FallbackGenerate_WithUsage(t *testing.T) {
 			},
 		}},
 	})
-	result, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	result, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.NoError(t, err)
 	require.NotNil(t, result.Usage)
 	require.EqualValues(t, 5, result.Usage.PromptTokens)
@@ -685,7 +892,17 @@ func TestExecuteLLMStream_FallbackGenerate_UnknownToolCallError(t *testing.T) {
 			},
 		}},
 	})
-	_, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	_, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "unknown tool")
 }
@@ -697,7 +914,17 @@ func TestExecuteLLMStream_Stream_UnknownToolCallError(t *testing.T) {
 	rt := newTestRuntime(sdkruntime.AgentConfig{
 		LLM: sdkruntime.AgentLLM{Client: streamCapableLLMClient{stream: s}},
 	})
-	_, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil)
+	input := ExecuteLLMInput{
+		Logger:           noopLog(),
+		AgentName:        "agent",
+		MessageID:        "msg",
+		Messages:         nil,
+		SkipTools:        false,
+		RetrieverContext: "",
+		Tools:            nil,
+		Emit:             nil,
+	}
+	_, err := rt.ExecuteLLMStream(context.Background(), input)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "unknown tool")
 }
@@ -713,7 +940,9 @@ func TestExecuteRetrievers_EmptyDocsSkipped(t *testing.T) {
 	})
 	got, err := rt.ExecuteRetrievers(context.Background(), noopLog(), "q")
 	require.NoError(t, err)
-	require.Equal(t, "", got)
+	require.Equal(t, "", got.Context)
+	require.Equal(t, int64(1), got.TotalSearches)
+	require.Equal(t, int64(0), got.FailedSearches)
 }
 
 // --- ApplyLLMSampling Reasoning field ---
diff --git a/internal/runtime/base/types.go b/internal/runtime/base/types.go
index c631f69..eb34534 100644
--- a/internal/runtime/base/types.go
+++ b/internal/runtime/base/types.go
@@ -1,6 +1,9 @@
 package base
 
-import "github.com/agenticenv/agent-sdk-go/pkg/interfaces"
+import (
+	"github.com/agenticenv/agent-sdk-go/internal/types"
+	"github.com/agenticenv/agent-sdk-go/pkg/interfaces"
+)
 
 // LLMResult is the result of a successful LLM call.
 // Content holds the assistant text; ToolCalls holds any tool invocations resolved against
@@ -18,6 +21,7 @@ type ToolCallRequest struct {
 	ToolCallID      string
 	ToolName        string
 	ToolDisplayName string
+	ToolKind        types.ToolKind
 	Args            map[string]any
 	NeedsApproval   bool
 }
@@ -28,3 +32,10 @@ type AuthorizeResult struct {
 	Allowed bool
 	Reason  string
 }
+
+// RetrieverResult is the outcome of ExecuteRetrievers (prefetch / hybrid pre-loop).
+type RetrieverResult struct {
+	Context        string
+	TotalSearches  int64
+	FailedSearches int64
+}
diff --git a/internal/runtime/base/utils.go b/internal/runtime/base/utils.go
index c331833..dc0ab36 100644
--- a/internal/runtime/base/utils.go
+++ b/internal/runtime/base/utils.go
@@ -3,6 +3,7 @@ package base
 import (
 	"fmt"
 	"strings"
+	"time"
 
 	"github.com/agenticenv/agent-sdk-go/internal/runtime"
 	"github.com/agenticenv/agent-sdk-go/internal/types"
@@ -98,3 +99,16 @@ func GetConversationID(req *runtime.ExecuteRequest) string {
 	}
 	return ""
 }
+
+func NewAgentTelemetry(startedAt time.Time) *types.AgentTelemetry {
+	return &types.AgentTelemetry{
+		Run: types.RunTelemetry{
+			StartedAt:    startedAt,
+			FinishReason: types.FinishReasonComplete,
+		},
+		Tools: types.ToolTelemetry{
+			Breakdown:       make(map[string]int64),
+			FailedBreakdown: make(map[string]int64),
+		},
+	}
+}
diff --git a/internal/runtime/local/agent_loop.go b/internal/runtime/local/agent_loop.go
index 693e624..888ab9f 100644
--- a/internal/runtime/local/agent_loop.go
+++ b/internal/runtime/local/agent_loop.go
@@ -7,6 +7,7 @@ import (
 	"log/slog"
 	"strings"
 	"sync"
+	"time"
 
 	"github.com/agenticenv/agent-sdk-go/internal/events"
 	"github.com/agenticenv/agent-sdk-go/internal/runtime/base"
@@ -47,8 +48,14 @@ type AgentLoopInput struct {
 
 // AgentLoopResult is the outcome of a completed local agent run.
 type AgentLoopResult struct {
-	Content string
-	Usage   *interfaces.LLMUsage
+	Content   string
+	LLMUsage  *interfaces.LLMUsage
+	Telemetry *types.AgentTelemetry
+}
+
+type toolResult struct {
+	message interfaces.Message
+	failed  bool // true: hard err, ExecuteTool err, or ctx cancel
 }
 
 // publishEventToChannel marshals ev and publishes it on channelName via the runtime eventbus.
@@ -78,9 +85,16 @@ func (rt *LocalRuntime) publishEventToChannel(ctx context.Context, channelName s
 // Events are published to rt.eventbus on input.ChannelName; callers subscribe to that channel.
 func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput) (*AgentLoopResult, error) {
 	log := rt.logger
+	telemetry := base.NewAgentTelemetry(time.Now())
 	agentName := rt.AgentSpec.Name
 	model := rt.AgentConfig.LLM.Client.GetModel()
 
+	ctx, sp := rt.Tracer.StartSpan(ctx, "agent.loop",
+		interfaces.Attribute{Key: "agent.name", Value: agentName},
+		interfaces.Attribute{Key: "model", Value: model},
+	)
+	defer sp.End()
+
 	tools := input.Tools
 
 	maxIter := rt.AgentConfig.Limits.MaxIterations
@@ -127,22 +141,24 @@ func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput)
 			slog.String("scope", "loop"),
 			slog.String("mode", string(retrieverMode)),
 			slog.Int("retrieverCount", len(rt.AgentConfig.Retrievers.Retrievers)))
-		rc, err := rt.ExecuteRetrievers(ctx, log, input.UserPrompt)
+		res, err := rt.ExecuteRetrievers(ctx, log, input.UserPrompt)
 		if err != nil {
 			return nil, fmt.Errorf("retriever prefetch: %w", err)
 		}
-		retrieverContext = rc
+		retrieverContext = res.Context
+		telemetry.Storage.TotalRetrieverSearches += res.TotalSearches
+		telemetry.Storage.FailedRetrieverSearches += res.FailedSearches
+		telemetry.Storage.PrefetchSearches += res.TotalSearches
 		log.Debug(ctx, "local: retriever prefetch done",
 			slog.String("scope", "loop"),
 			slog.Bool("hasContext", retrieverContext != ""))
 	}
 
-	var runUsage *interfaces.LLMUsage
-	lastContent := ""
+	var lastContent string
+	var llmUsage *interfaces.LLMUsage
 
 	for iter := 0; iter < maxIter; iter++ {
 		messageID := uuid.New().String()
-
 		log.Debug(ctx, "local: LLM call started",
 			slog.String("scope", "loop"),
 			slog.Int("iteration", iter),
@@ -150,16 +166,27 @@ func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput)
 
 		var llmResult *base.LLMResult
 		var err error
+		executeLLMInput := base.ExecuteLLMInput{
+			Logger:           log,
+			AgentName:        agentName,
+			MessageID:        messageID,
+			Messages:         messages,
+			SkipTools:        false,
+			RetrieverContext: retrieverContext,
+			Tools:            tools,
+			Emit:             emit,
+		}
 		if input.StreamingEnabled {
-			llmResult, err = rt.ExecuteLLMStream(ctx, log, agentName, messageID, messages, false, retrieverContext, tools, emit)
+			llmResult, err = rt.ExecuteLLMStream(ctx, executeLLMInput)
 		} else {
-			llmResult, err = rt.ExecuteLLM(ctx, log, agentName, messageID, messages, false, retrieverContext, tools, emit)
+			llmResult, err = rt.ExecuteLLM(ctx, executeLLMInput)
 		}
 		if err != nil {
 			return nil, fmt.Errorf("llm call (iter %d): %w", iter, err)
 		}
 
-		runUsage = base.MergeLLMUsage(runUsage, llmResult.Usage)
+		telemetry.Run.TotalLLMCalls++
+		llmUsage = base.MergeLLMUsage(llmUsage, llmResult.Usage)
 
 		// Final response: no tool calls → done.
 		if len(llmResult.ToolCalls) == 0 {
@@ -177,20 +204,32 @@ func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput)
 				slog.String("scope", "loop"),
 				slog.Int("iteration", iter))
 			finalMessageID := uuid.New().String()
+			executeLLMInput := base.ExecuteLLMInput{
+				Logger:           log,
+				AgentName:        agentName,
+				MessageID:        finalMessageID,
+				Messages:         messages,
+				SkipTools:        true,
+				RetrieverContext: retrieverContext,
+				Tools:            tools,
+				Emit:             emit,
+			}
 			if input.StreamingEnabled {
-				llmResult, err = rt.ExecuteLLMStream(ctx, log, agentName, finalMessageID, messages, true, retrieverContext, tools, emit)
+				llmResult, err = rt.ExecuteLLMStream(ctx, executeLLMInput)
 			} else {
-				llmResult, err = rt.ExecuteLLM(ctx, log, agentName, finalMessageID, messages, true, retrieverContext, tools, emit)
+				llmResult, err = rt.ExecuteLLM(ctx, executeLLMInput)
 			}
 			if err != nil {
 				return nil, fmt.Errorf("llm final call (iter %d): %w", iter, err)
 			}
-			runUsage = base.MergeLLMUsage(runUsage, llmResult.Usage)
+			llmUsage = base.MergeLLMUsage(llmUsage, llmResult.Usage)
 			messages = append(messages, interfaces.Message{
 				Role:    interfaces.MessageRoleAssistant,
 				Content: llmResult.Content,
 			})
 			lastContent = llmResult.Content
+			telemetry.Run.TotalLLMCalls++
+			telemetry.Run.FinishReason = types.FinishReasonMaxIterations
 			break
 		}
 
@@ -210,7 +249,7 @@ func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput)
 		messages = append(messages, assistantMsg)
 
 		// Execute tools according to the requested execution mode.
-		var toolResults []interfaces.Message
+		var toolResults []toolResult
 		switch toolExecMode {
 		case types.AgentToolExecutionModeParallel:
 			toolResults, err = rt.executeToolsParallel(ctx, input, messageID, llmResult.ToolCalls, emit)
@@ -226,28 +265,30 @@ func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput)
 			return nil, err
 		}
 
-		messages = append(messages, toolResults...)
+		for idx, result := range toolResults {
+			messages = append(messages, result.message)
+			tc := llmResult.ToolCalls[idx]
+			if tc.ToolKind.CountsTowardToolTelemetry() {
+				telemetry.Tools.Record(tc.ToolName, result.failed)
+			}
+			if tc.ToolKind == types.ToolKindRetriever {
+				telemetry.Storage.TotalRetrieverSearches++
+				telemetry.Storage.AgenticSearches++
+				if result.failed {
+					telemetry.Storage.FailedRetrieverSearches++
+				}
+			}
+		}
 
 		if rt.conversationMemoryEnabled(input) && rt.AgentConfig.Session.ConversationSaveOnIteration && len(messages) > persistedMessageCount {
-			if err := persistConversationMessages(ctx, rt, input.ConversationID, messages[persistedMessageCount:]); err != nil {
-				log.Warn(ctx, "local: persist conversation failed",
-					slog.String("scope", "loop"),
-					slog.String("conversationID", input.ConversationID),
-					slog.Any("error", err))
-			} else {
-				persistedMessageCount = len(messages)
-			}
+			rt.persistConversationMessages(ctx, input.ConversationID, messages[persistedMessageCount:])
+			persistedMessageCount = len(messages)
 		}
 	}
 
 	// Persist unsaved messages: full run when ConversationSaveOnIteration is false; final assistant only when true.
 	if rt.conversationMemoryEnabled(input) && len(messages) > persistedMessageCount {
-		if err := persistConversationMessages(ctx, rt, input.ConversationID, messages[persistedMessageCount:]); err != nil {
-			log.Warn(ctx, "local: persist conversation failed",
-				slog.String("scope", "loop"),
-				slog.String("conversationID", input.ConversationID),
-				slog.Any("error", err))
-		}
+		rt.persistConversationMessages(ctx, input.ConversationID, messages[persistedMessageCount:])
 	}
 
 	log.Info(ctx, "local: agent run completed",
@@ -256,7 +297,13 @@ func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput)
 		slog.String("model", model),
 		slog.Int("contentLen", len(lastContent)))
 
-	return &AgentLoopResult{Content: lastContent, Usage: runUsage}, nil
+	telemetry.Run.CompletedAt = time.Now()
+
+	return &AgentLoopResult{
+		Content:   lastContent,
+		LLMUsage:  llmUsage,
+		Telemetry: telemetry,
+	}, nil
 }
 
 func (rt *LocalRuntime) conversationMemoryEnabled(input AgentLoopInput) bool {
@@ -272,29 +319,34 @@ func (rt *LocalRuntime) executeToolsParallel(
 	messageID string,
 	toolCalls []base.ToolCallRequest,
 	emit func(events.AgentEvent),
-) ([]interfaces.Message, error) {
+) ([]toolResult, error) {
 	rt.logger.Info(ctx, "local: tool execution (parallel)",
 		slog.String("scope", "loop"),
 		slog.Int("toolCount", len(toolCalls)))
 
-	results := make([]interfaces.Message, len(toolCalls))
+	results := make([]toolResult, len(toolCalls))
 	var wg sync.WaitGroup
 
 	for i := range toolCalls {
 		wg.Add(1)
 		go func(idx int, tc base.ToolCallRequest) {
 			defer wg.Done()
-			msg, err := rt.executeSingleTool(ctx, input, messageID, tc, emit)
+			result, err := rt.executeSingleTool(ctx, input, messageID, tc, emit)
 			if err != nil {
-				results[idx] = interfaces.Message{
+				rt.logger.Info(ctx, "local: parallel tool failed",
+					slog.String("scope", "loop"),
+					slog.Int("toolIndex", idx),
+					slog.String("toolName", tc.ToolName),
+					slog.Any("error", err))
+				result.message = interfaces.Message{
 					Role:       interfaces.MessageRoleTool,
 					Content:    "Tool execution failed: " + err.Error(),
 					ToolName:   tc.ToolName,
 					ToolCallID: tc.ToolCallID,
 				}
-				return
+				result.failed = true
 			}
-			results[idx] = msg
+			results[idx] = result
 		}(i, toolCalls[i])
 	}
 	wg.Wait()
@@ -302,30 +354,38 @@ func (rt *LocalRuntime) executeToolsParallel(
 	return results, nil
 }
 
-// executeToolsSequential runs tool calls one at a time and returns on the first hard error.
+// executeToolsSequential runs tool calls one at a time.
+// Hard errors from individual tools become synthetic tool messages; the batch always continues.
 func (rt *LocalRuntime) executeToolsSequential(
 	ctx context.Context,
 	input AgentLoopInput,
 	messageID string,
 	toolCalls []base.ToolCallRequest,
 	emit func(events.AgentEvent),
-) ([]interfaces.Message, error) {
+) ([]toolResult, error) {
 	rt.logger.Info(ctx, "local: tool execution (sequential)",
 		slog.String("scope", "loop"),
 		slog.Int("toolCount", len(toolCalls)))
 
-	results := make([]interfaces.Message, 0, len(toolCalls))
-	for i, tc := range toolCalls {
-		msg, err := rt.executeSingleTool(ctx, input, messageID, tc, emit)
+	results := make([]toolResult, len(toolCalls))
+	for idx, tc := range toolCalls {
+		result, err := rt.executeSingleTool(ctx, input, messageID, tc, emit)
 		if err != nil {
 			rt.logger.Info(ctx, "local: sequential tool failed",
 				slog.String("scope", "loop"),
-				slog.Int("toolIndex", i),
+				slog.Int("toolIndex", idx),
 				slog.String("toolName", tc.ToolName),
 				slog.Any("error", err))
-			return nil, err
+
+			result.message = interfaces.Message{
+				Role:       interfaces.MessageRoleTool,
+				Content:    "Tool execution failed: " + err.Error(),
+				ToolName:   tc.ToolName,
+				ToolCallID: tc.ToolCallID,
+			}
+			result.failed = true
 		}
-		results = append(results, msg)
+		results[idx] = result
 	}
 	return results, nil
 }
@@ -338,7 +398,7 @@ func (rt *LocalRuntime) executeSingleTool(
 	messageID string,
 	tc base.ToolCallRequest,
 	emit func(events.AgentEvent),
-) (interfaces.Message, error) {
+) (toolResult, error) {
 	log := rt.logger
 	tools := input.Tools
 
@@ -361,7 +421,10 @@ func (rt *LocalRuntime) executeSingleTool(
 	// Authorization check.
 	authResult, err := rt.AuthorizeTool(ctx, log, tools, tc.ToolName, tc.Args)
 	if err != nil {
-		return interfaces.Message{}, fmt.Errorf("tool authorization error for %q: %w", tc.ToolName, err)
+		return toolResult{
+			message: interfaces.Message{},
+			failed:  true,
+		}, fmt.Errorf("tool authorization error for %q: %w", tc.ToolName, err)
 	}
 	if !authResult.Allowed {
 		content := msgToolUnauthorized
@@ -373,11 +436,14 @@ func (rt *LocalRuntime) executeSingleTool(
 			slog.String("toolName", tc.ToolName),
 			slog.String("reason", authResult.Reason))
 		emitToolEndThenResult(tc.ToolCallID, content)
-		return interfaces.Message{
-			Role:       interfaces.MessageRoleTool,
-			Content:    content,
-			ToolName:   tc.ToolName,
-			ToolCallID: tc.ToolCallID,
+		return toolResult{
+			message: interfaces.Message{
+				Role:       interfaces.MessageRoleTool,
+				Content:    content,
+				ToolName:   tc.ToolName,
+				ToolCallID: tc.ToolCallID,
+			},
+			failed: false,
 		}, nil
 	}
 
@@ -452,12 +518,16 @@ func (rt *LocalRuntime) executeSingleTool(
 			case status := <-resultCh:
 				approvalStatus = status
 			case <-ctx.Done():
-				return interfaces.Message{}, ctx.Err()
+				return toolResult{
+					message: interfaces.Message{},
+					failed:  true,
+				}, ctx.Err()
 			}
 		}
 	}
 
 	var content string
+	failed := false
 	switch approvalStatus {
 	case types.ApprovalStatusApproved:
 		if isSubAgent {
@@ -507,6 +577,7 @@ func (rt *LocalRuntime) executeSingleTool(
 			result, execErr := rt.ExecuteTool(ctx, log, tools, tc.ToolName, tc.Args)
 			if execErr != nil {
 				content = "Tool execution failed: " + execErr.Error()
+				failed = true
 			} else {
 				content = result
 			}
@@ -516,31 +587,49 @@ func (rt *LocalRuntime) executeSingleTool(
 	case types.ApprovalStatusUnavailable:
 		content = msgToolApprovalUnavailable
 	default:
-		return interfaces.Message{}, fmt.Errorf("unexpected approval status %q for tool %q", approvalStatus, tc.ToolName)
+		return toolResult{
+			message: interfaces.Message{},
+			failed:  true,
+		}, fmt.Errorf("unexpected approval status %q for tool %q", approvalStatus, tc.ToolName)
 	}
 
 	emitToolEndThenResult(tc.ToolCallID, content)
-	return interfaces.Message{
-		Role:       interfaces.MessageRoleTool,
-		Content:    content,
-		ToolName:   tc.ToolName,
-		ToolCallID: tc.ToolCallID,
+	return toolResult{
+		message: interfaces.Message{
+			Role:       interfaces.MessageRoleTool,
+			Content:    content,
+			ToolName:   tc.ToolName,
+			ToolCallID: tc.ToolCallID,
+		},
+		failed: failed,
 	}, nil
 }
 
-// persistConversationMessages stores all accumulated messages from the run into the conversation store.
-func persistConversationMessages(ctx context.Context, rt *LocalRuntime, conversationID string, messages []interfaces.Message) error {
+// persistConversationMessages stores messages in the conversation store.
+// Logs per-message failures and continues; does not fail the run.
+func (rt *LocalRuntime) persistConversationMessages(ctx context.Context, conversationID string, messages []interfaces.Message) {
 	conv := rt.AgentConfig.Session.Conversation
-	if conv == nil {
-		return nil
+	if conv == nil || len(messages) == 0 {
+		return
 	}
+
+	ctx, sp := rt.Tracer.StartSpan(ctx, "conversation.add_messages",
+		interfaces.Attribute{Key: "conversation.id", Value: conversationID},
+		interfaces.Attribute{Key: "message.count", Value: len(messages)},
+	)
+	defer sp.End()
+
+	failCount := 0
 	for _, msg := range messages {
 		if err := conv.AddMessage(ctx, conversationID, msg); err != nil {
+			failCount++
 			rt.logger.Warn(ctx, "local: add conversation message failed",
 				slog.String("scope", "loop"),
 				slog.String("conversationID", conversationID),
 				slog.Any("error", err))
 		}
 	}
-	return nil
+	if failCount > 0 {
+		sp.SetAttribute("failed.count", failCount)
+	}
 }
diff --git a/internal/runtime/local/agent_loop_test.go b/internal/runtime/local/agent_loop_test.go
index 9559ee4..98bb350 100644
--- a/internal/runtime/local/agent_loop_test.go
+++ b/internal/runtime/local/agent_loop_test.go
@@ -45,6 +45,29 @@ func loopToolsInput(tools []interfaces.Tool) AgentLoopInput {
 	return AgentLoopInput{Tools: tools}
 }
 
+// stubKindTool is a stubTool with an explicit ToolKind for telemetry tests.
+type stubKindTool struct {
+	stubTool
+	kind types.ToolKind
+}
+
+func (t stubKindTool) ToolKind() types.ToolKind { return t.kind }
+
+// testToolCall builds a ToolCallRequest with ToolKind set (matches stubTool → native).
+func testToolCall(toolCallID, toolName string) base.ToolCallRequest {
+	return base.ToolCallRequest{
+		ToolCallID: toolCallID,
+		ToolName:   toolName,
+		ToolKind:   types.ToolKindNative,
+	}
+}
+
+func testToolCallNeedsApproval(toolCallID, toolName string) base.ToolCallRequest {
+	tc := testToolCall(toolCallID, toolName)
+	tc.NeedsApproval = true
+	return tc
+}
+
 // noopEmit discards all events.
 func noopEmit(_ events.AgentEvent) {}
 
@@ -113,6 +136,58 @@ func TestRunAgentLoop_ToolCallThenFinalAnswer(t *testing.T) {
 	require.Equal(t, "sum is 7", result.Content)
 }
 
+func TestRunAgentLoop_ToolTelemetry_Success(t *testing.T) {
+	client := &seqLLMClient{
+		responses: []*interfaces.LLMResponse{
+			{ToolCalls: []*interfaces.ToolCall{{ToolCallID: "c1", ToolName: "ok"}}},
+			{Content: "done"},
+		},
+	}
+	okTool := stubTool{name: "ok", result: "1"}
+	rt, tools := newLoopRT(t, 5, client, okTool)
+
+	result, err := runLoop(context.Background(), rt, tools, AgentLoopInput{UserPrompt: "go"})
+	require.NoError(t, err)
+	require.NotNil(t, result.Telemetry)
+	require.Equal(t, int64(1), result.Telemetry.Tools.TotalCalls)
+	require.Equal(t, int64(0), result.Telemetry.Tools.FailedCalls)
+	require.Equal(t, int64(1), result.Telemetry.Tools.Breakdown["ok"])
+}
+
+func TestRunAgentLoop_ToolTelemetry_ExecError(t *testing.T) {
+	client := &seqLLMClient{
+		responses: []*interfaces.LLMResponse{
+			{ToolCalls: []*interfaces.ToolCall{{ToolCallID: "c1", ToolName: "bad"}}},
+			{Content: "done"},
+		},
+	}
+	badTool := stubTool{name: "bad", execErr: errors.New("boom")}
+	rt, tools := newLoopRT(t, 5, client, badTool)
+
+	result, err := runLoop(context.Background(), rt, tools, AgentLoopInput{UserPrompt: "go"})
+	require.NoError(t, err)
+	require.NotNil(t, result.Telemetry)
+	require.Equal(t, int64(1), result.Telemetry.Tools.TotalCalls)
+	require.Equal(t, int64(1), result.Telemetry.Tools.FailedCalls)
+	require.Equal(t, int64(1), result.Telemetry.Tools.FailedBreakdown["bad"])
+}
+
+func TestRunAgentLoop_ToolTelemetry_SkipsNonCountableKind(t *testing.T) {
+	client := &seqLLMClient{
+		responses: []*interfaces.LLMResponse{
+			{ToolCalls: []*interfaces.ToolCall{{ToolCallID: "c1", ToolName: "delegate"}}},
+			{Content: "done"},
+		},
+	}
+	tool := stubKindTool{stubTool: stubTool{name: "delegate", result: "ok"}, kind: types.ToolKindSubAgent}
+	rt, tools := newLoopRT(t, 5, client, tool)
+
+	result, err := runLoop(context.Background(), rt, tools, AgentLoopInput{UserPrompt: "go"})
+	require.NoError(t, err)
+	require.NotNil(t, result.Telemetry)
+	require.Equal(t, int64(0), result.Telemetry.Tools.TotalCalls)
+}
+
 func TestRunAgentLoop_MaxIterationsForcesFinalCall(t *testing.T) {
 	// With maxIter=1 and the only LLM response returning a tool call, the loop
 	// must fire a second "forced final" LLM call (skipTools=true) and return its content.
@@ -179,7 +254,7 @@ func TestRunAgentLoop_WithConversationID(t *testing.T) {
 
 	history := []interfaces.Message{{Role: interfaces.MessageRoleUser, Content: "old message"}}
 	conv.EXPECT().ListMessages(gomock.Any(), "conv-x", gomock.Any()).Return(history, nil)
-	// user + assistant = 2 messages persisted (history messages re-saved too).
+	// user + assistant persisted in one batch at run end.
 	conv.EXPECT().AddMessage(gomock.Any(), "conv-x", gomock.Any()).Return(nil).AnyTimes()
 
 	client := &seqLLMClient{
@@ -263,15 +338,21 @@ func TestRunAgentLoop_RetrieverPrefetch(t *testing.T) {
 	result, err := runLoop(context.Background(), rt, nil, AgentLoopInput{UserPrompt: "fetch me"})
 	require.NoError(t, err)
 	require.Equal(t, "answer with context", result.Content)
+	require.Equal(t, int64(1), result.Telemetry.Storage.TotalRetrieverSearches)
+	require.Equal(t, int64(0), result.Telemetry.Storage.FailedRetrieverSearches)
+	require.Equal(t, int64(1), result.Telemetry.Storage.PrefetchSearches)
+	require.Equal(t, int64(0), result.Telemetry.Storage.AgenticSearches)
 }
 
-func TestRunAgentLoop_RetrieverPrefetchError(t *testing.T) {
+func TestRunAgentLoop_RetrieverAllFailContinues(t *testing.T) {
 	ctrl := gomock.NewController(t)
 	ret := ifmocks.NewMockRetriever(ctrl)
 	ret.EXPECT().Name().Return("kb").AnyTimes()
 	ret.EXPECT().Search(gomock.Any(), gomock.Any()).Return(nil, errors.New("kb down"))
 
-	client := &seqLLMClient{}
+	client := &seqLLMClient{
+		responses: []*interfaces.LLMResponse{{Content: "answer without context"}},
+	}
 	rt, err := NewLocalRuntime(
 		WithLogger(logger.NoopLogger()),
 		WithAgentConfig(sdkruntime.AgentConfig{
@@ -285,9 +366,11 @@ func TestRunAgentLoop_RetrieverPrefetchError(t *testing.T) {
 	)
 	require.NoError(t, err)
 
-	_, err = runLoop(context.Background(), rt, nil, AgentLoopInput{UserPrompt: "fetch"})
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "retriever prefetch")
+	result, err := runLoop(context.Background(), rt, nil, AgentLoopInput{UserPrompt: "fetch"})
+	require.NoError(t, err)
+	require.Equal(t, "answer without context", result.Content)
+	require.Equal(t, int64(1), result.Telemetry.Storage.TotalRetrieverSearches)
+	require.Equal(t, int64(1), result.Telemetry.Storage.FailedRetrieverSearches)
 }
 
 // ---------------------------------------------------------------------------
@@ -355,16 +438,16 @@ func TestExecuteToolsParallel_AllSucceed(t *testing.T) {
 	rt, tools := newLoopRT(t, 5, &seqLLMClient{}, t1, t2)
 
 	calls := []base.ToolCallRequest{
-		{ToolCallID: "c1", ToolName: "t1"},
-		{ToolCallID: "c2", ToolName: "t2"},
+		testToolCall("c1", "t1"),
+		testToolCall("c2", "t2"),
 	}
 
 	msgs, err := rt.executeToolsParallel(context.Background(), loopToolsInput(tools), "msg-1", calls, noopEmit)
 	require.NoError(t, err)
 	require.Len(t, msgs, 2)
 	// Order must match submission order (parallel but results are indexed).
-	require.Equal(t, "r1", msgs[0].Content)
-	require.Equal(t, "r2", msgs[1].Content)
+	require.Equal(t, "r1", msgs[0].message.Content)
+	require.Equal(t, "r2", msgs[1].message.Content)
 }
 
 func TestExecuteToolsParallel_ToolErrorInMessage(t *testing.T) {
@@ -372,11 +455,12 @@ func TestExecuteToolsParallel_ToolErrorInMessage(t *testing.T) {
 	failing := stubTool{name: "bad", execErr: errors.New("boom")}
 	rt, tools := newLoopRT(t, 5, &seqLLMClient{}, failing)
 
-	calls := []base.ToolCallRequest{{ToolCallID: "c1", ToolName: "bad"}}
+	calls := []base.ToolCallRequest{testToolCall("c1", "bad")}
 	msgs, err := rt.executeToolsParallel(context.Background(), loopToolsInput(tools), "msg", calls, noopEmit)
 	require.NoError(t, err) // parallel swallows into message
 	require.Len(t, msgs, 1)
-	require.Contains(t, msgs[0].Content, "boom")
+	require.Contains(t, msgs[0].message.Content, "boom")
+	require.True(t, msgs[0].failed)
 }
 
 func TestExecuteToolsParallel_ResultsOrderPreserved(t *testing.T) {
@@ -389,13 +473,13 @@ func TestExecuteToolsParallel_ResultsOrderPreserved(t *testing.T) {
 	rt, tools := newLoopRT(t, 5, &seqLLMClient{}, toolSet...)
 
 	calls := []base.ToolCallRequest{
-		{ToolCallID: "1", ToolName: "a"},
-		{ToolCallID: "2", ToolName: "b"},
-		{ToolCallID: "3", ToolName: "c"},
+		testToolCall("1", "a"),
+		testToolCall("2", "b"),
+		testToolCall("3", "c"),
 	}
 	msgs, err := rt.executeToolsParallel(context.Background(), loopToolsInput(tools), "m", calls, noopEmit)
 	require.NoError(t, err)
-	require.Equal(t, []string{"A", "B", "C"}, []string{msgs[0].Content, msgs[1].Content, msgs[2].Content})
+	require.Equal(t, []string{"A", "B", "C"}, []string{msgs[0].message.Content, msgs[1].message.Content, msgs[2].message.Content})
 }
 
 // ---------------------------------------------------------------------------
@@ -408,30 +492,27 @@ func TestExecuteToolsSequential_AllSucceed(t *testing.T) {
 	rt, tools := newLoopRT(t, 5, &seqLLMClient{}, t1, t2)
 
 	calls := []base.ToolCallRequest{
-		{ToolCallID: "c1", ToolName: "s1"},
-		{ToolCallID: "c2", ToolName: "s2"},
+		testToolCall("c1", "s1"),
+		testToolCall("c2", "s2"),
 	}
 	msgs, err := rt.executeToolsSequential(context.Background(), loopToolsInput(tools), "msg", calls, noopEmit)
 	require.NoError(t, err)
 	require.Len(t, msgs, 2)
-	require.Equal(t, "v1", msgs[0].Content)
-	require.Equal(t, "v2", msgs[1].Content)
+	require.Equal(t, "v1", msgs[0].message.Content)
+	require.Equal(t, "v2", msgs[1].message.Content)
 }
 
 func TestExecuteToolsSequential_HardErrorOnContextCancel(t *testing.T) {
-	// A tool that blocks until ctx is cancelled → executeSingleTool returns ctx.Err().
-	// Sequential should propagate that error.
 	rt, _ := newLoopRT(t, 5, &seqLLMClient{})
-	// Add a fake tool that needs approval with no channel or handler → unavailable (not an error).
-	// Instead: use a blocking LLM as a proxy — but we need a tool-level error.
-	// We'll cancel the context before calling.
 	ctx, cancel := context.WithCancel(context.Background())
 	cancel() // pre-cancelled
 
-	calls := []base.ToolCallRequest{{ToolCallID: "c1", ToolName: "missing-tool"}}
-	_, err := rt.executeToolsSequential(ctx, AgentLoopInput{}, "msg", calls, noopEmit)
-	// AuthorizeTool returns error for unknown tool.
-	require.Error(t, err)
+	calls := []base.ToolCallRequest{testToolCall("c1", "missing-tool")}
+	results, err := rt.executeToolsSequential(ctx, AgentLoopInput{}, "msg", calls, noopEmit)
+	require.NoError(t, err)
+	require.Len(t, results, 1)
+	require.True(t, results[0].failed)
+	require.Contains(t, results[0].message.Content, "missing-tool")
 }
 
 // ---------------------------------------------------------------------------
@@ -444,12 +525,12 @@ func TestExecuteSingleTool_Approved(t *testing.T) {
 
 	emit, evs := captureEmit()
 	msg, err := rt.executeSingleTool(context.Background(), loopToolsInput(tools), "msg-1",
-		base.ToolCallRequest{ToolCallID: "c1", ToolName: "my-tool"}, emit)
+		testToolCall("c1", "my-tool"), emit)
 
 	require.NoError(t, err)
-	require.Equal(t, "hello", msg.Content)
-	require.Equal(t, interfaces.MessageRoleTool, msg.Role)
-	require.Equal(t, "my-tool", msg.ToolName)
+	require.Equal(t, "hello", msg.message.Content)
+	require.Equal(t, interfaces.MessageRoleTool, msg.message.Role)
+	require.Equal(t, "my-tool", msg.message.ToolName)
 
 	etypes := eventTypes(*evs)
 	require.Contains(t, etypes, events.AgentEventTypeToolCallStart)
@@ -462,16 +543,17 @@ func TestExecuteSingleTool_ToolExecError(t *testing.T) {
 	rt, tools := newLoopRT(t, 5, &seqLLMClient{}, tool)
 
 	msg, err := rt.executeSingleTool(context.Background(), loopToolsInput(tools), "msg",
-		base.ToolCallRequest{ToolCallID: "c1", ToolName: "boom"}, noopEmit)
+		testToolCall("c1", "boom"), noopEmit)
 	require.NoError(t, err) // tool errors become a content message, not a hard error
-	require.Contains(t, msg.Content, "exec failed")
+	require.Contains(t, msg.message.Content, "exec failed")
+	require.True(t, msg.failed)
 }
 
 func TestExecuteSingleTool_UnknownToolErrors(t *testing.T) {
 	rt, _ := newLoopRT(t, 5, &seqLLMClient{}) // no tools registered
 
 	_, err := rt.executeSingleTool(context.Background(), AgentLoopInput{}, "msg",
-		base.ToolCallRequest{ToolCallID: "c1", ToolName: "ghost"}, noopEmit)
+		testToolCall("c1", "ghost"), noopEmit)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "ghost")
 }
@@ -492,9 +574,10 @@ func TestExecuteSingleTool_AuthorizationDenied(t *testing.T) {
 	rt, tools := newLoopRT(t, 5, &seqLLMClient{}, authTool)
 
 	msg, err := rt.executeSingleTool(context.Background(), loopToolsInput(tools), "msg",
-		base.ToolCallRequest{ToolCallID: "c1", ToolName: "restricted"}, noopEmit)
+		testToolCall("c1", "restricted"), noopEmit)
 	require.NoError(t, err)
-	require.Contains(t, msg.Content, msgToolUnauthorized)
+	require.Contains(t, msg.message.Content, msgToolUnauthorized)
+	require.False(t, msg.failed)
 	_ = tool
 }
 
@@ -503,7 +586,7 @@ func TestExecuteSingleTool_AuthorizationError(t *testing.T) {
 	rt, tools := newLoopRT(t, 5, &seqLLMClient{}, authTool)
 
 	_, err := rt.executeSingleTool(context.Background(), loopToolsInput(tools), "msg",
-		base.ToolCallRequest{ToolCallID: "c1", ToolName: "err-tool"}, noopEmit)
+		testToolCall("c1", "err-tool"), noopEmit)
 	require.Error(t, err)
 	require.Contains(t, err.Error(), "auth backend down")
 }
@@ -515,9 +598,9 @@ func TestExecuteSingleTool_ApprovalUnavailable(t *testing.T) {
 
 	msg, err := rt.executeSingleTool(context.Background(),
 		AgentLoopInput{ChannelName: "", ApprovalHandler: nil, Tools: tools}, "msg",
-		base.ToolCallRequest{ToolCallID: "c1", ToolName: "guarded", NeedsApproval: true}, noopEmit)
+		testToolCallNeedsApproval("c1", "guarded"), noopEmit)
 	require.NoError(t, err)
-	require.Contains(t, msg.Content, msgToolApprovalUnavailable)
+	require.Contains(t, msg.message.Content, msgToolApprovalUnavailable)
 }
 
 func TestExecuteSingleTool_ApprovalHandlerApproves(t *testing.T) {
@@ -530,9 +613,9 @@ func TestExecuteSingleTool_ApprovalHandlerApproves(t *testing.T) {
 
 	msg, err := rt.executeSingleTool(context.Background(),
 		AgentLoopInput{ApprovalHandler: handler, Tools: tools}, "msg",
-		base.ToolCallRequest{ToolCallID: "c1", ToolName: "guarded", NeedsApproval: true}, noopEmit)
+		testToolCallNeedsApproval("c1", "guarded"), noopEmit)
 	require.NoError(t, err)
-	require.Equal(t, "ok", msg.Content)
+	require.Equal(t, "ok", msg.message.Content)
 }
 
 func TestExecuteSingleTool_ApprovalHandlerRejects(t *testing.T) {
@@ -545,9 +628,9 @@ func TestExecuteSingleTool_ApprovalHandlerRejects(t *testing.T) {
 
 	msg, err := rt.executeSingleTool(context.Background(),
 		AgentLoopInput{ApprovalHandler: handler, Tools: tools}, "msg",
-		base.ToolCallRequest{ToolCallID: "c1", ToolName: "guarded", NeedsApproval: true}, noopEmit)
+		testToolCallNeedsApproval("c1", "guarded"), noopEmit)
 	require.NoError(t, err)
-	require.Equal(t, msgToolRejected, msg.Content)
+	require.Equal(t, msgToolRejected, msg.message.Content)
 }
 
 func TestExecuteSingleTool_StreamingApproveUnblocks(t *testing.T) {
@@ -585,16 +668,16 @@ func TestExecuteSingleTool_StreamingApproveUnblocks(t *testing.T) {
 
 	done := make(chan struct{})
 	var (
-		resultMsg interfaces.Message
+		result    toolResult
 		resultErr error
 	)
 	go func() {
 		defer close(done)
-		resultMsg, resultErr = rt.executeSingleTool(
+		result, resultErr = rt.executeSingleTool(
 			context.Background(),
 			AgentLoopInput{ChannelName: "some-channel", Tools: tools}, // streaming path
 			"msg",
-			base.ToolCallRequest{ToolCallID: "c1", ToolName: "guarded", NeedsApproval: true},
+			testToolCallNeedsApproval("c1", "guarded"),
 			emit,
 		)
 	}()
@@ -613,7 +696,7 @@ func TestExecuteSingleTool_StreamingApproveUnblocks(t *testing.T) {
 
 	<-done
 	require.NoError(t, resultErr)
-	require.Equal(t, "stream-ok", resultMsg.Content)
+	require.Equal(t, "stream-ok", result.message.Content)
 }
 
 func TestExecuteSingleTool_ApprovalContextCancel(t *testing.T) {
@@ -631,7 +714,7 @@ func TestExecuteSingleTool_ApprovalContextCancel(t *testing.T) {
 
 	_, err := rt.executeSingleTool(ctx,
 		AgentLoopInput{ChannelName: "some-channel", Tools: tools}, "msg",
-		base.ToolCallRequest{ToolCallID: "c1", ToolName: "guarded", NeedsApproval: true}, noopEmit)
+		testToolCallNeedsApproval("c1", "guarded"), noopEmit)
 
 	<-done
 	require.Error(t, err)
@@ -675,11 +758,9 @@ func TestPublishEventToChannel_NoOpWhenNilEventbus(t *testing.T) {
 
 func TestPersistConversationMessages_NilConversation(t *testing.T) {
 	rt, _ := newLoopRT(t, 5, &seqLLMClient{})
-	// No conversation configured — must not panic or error.
-	err := persistConversationMessages(context.Background(), rt, "c", []interfaces.Message{
+	rt.persistConversationMessages(context.Background(), "c", []interfaces.Message{
 		{Role: interfaces.MessageRoleUser, Content: "hi"},
 	})
-	require.NoError(t, err)
 }
 
 func TestPersistConversationMessages_StoresAllMessages(t *testing.T) {
@@ -702,14 +783,13 @@ func TestPersistConversationMessages_StoresAllMessages(t *testing.T) {
 		{Role: interfaces.MessageRoleAssistant, Content: "2"},
 		{Role: interfaces.MessageRoleTool, Content: "3"},
 	}
-	err = persistConversationMessages(context.Background(), rt, "conv-1", msgs)
-	require.NoError(t, err)
+	rt.persistConversationMessages(context.Background(), "conv-1", msgs)
 }
 
-func TestPersistConversationMessages_AddMessageErrorWarnsOnly(t *testing.T) {
+func TestPersistConversationMessages_AddMessageErrorContinues(t *testing.T) {
 	ctrl := gomock.NewController(t)
 	conv := ifmocks.NewMockConversation(ctrl)
-	conv.EXPECT().AddMessage(gomock.Any(), "c", gomock.Any()).Return(errors.New("store err")).AnyTimes()
+	conv.EXPECT().AddMessage(gomock.Any(), "c", gomock.Any()).Return(errors.New("store err"))
 
 	rt, err := NewLocalRuntime(
 		WithLogger(logger.NoopLogger()),
@@ -721,11 +801,33 @@ func TestPersistConversationMessages_AddMessageErrorWarnsOnly(t *testing.T) {
 	)
 	require.NoError(t, err)
 
-	// persistConversationMessages returns nil even when AddMessage fails (warns only).
-	err = persistConversationMessages(context.Background(), rt, "c", []interfaces.Message{
+	rt.persistConversationMessages(context.Background(), "c", []interfaces.Message{
 		{Role: interfaces.MessageRoleUser, Content: "hi"},
 	})
+}
+
+func TestPersistConversationMessages_ContinuesAfterFailure(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	conv := ifmocks.NewMockConversation(ctrl)
+	gomock.InOrder(
+		conv.EXPECT().AddMessage(gomock.Any(), "c", gomock.Any()).Return(errors.New("first fail")),
+		conv.EXPECT().AddMessage(gomock.Any(), "c", gomock.Any()).Return(nil),
+	)
+
+	rt, err := NewLocalRuntime(
+		WithLogger(logger.NoopLogger()),
+		WithAgentConfig(sdkruntime.AgentConfig{
+			LLM:     sdkruntime.AgentLLM{Client: &seqLLMClient{}},
+			Session: sdkruntime.AgentSession{Conversation: conv},
+			Limits:  sdkruntime.AgentLimits{Timeout: 5 * time.Second},
+		}),
+	)
 	require.NoError(t, err)
+
+	rt.persistConversationMessages(context.Background(), "c", []interfaces.Message{
+		{Role: interfaces.MessageRoleUser, Content: "1"},
+		{Role: interfaces.MessageRoleAssistant, Content: "2"},
+	})
 }
 
 // ---------------------------------------------------------------------------
diff --git a/internal/runtime/local/runtime.go b/internal/runtime/local/runtime.go
index 0b5d3dc..96208c5 100644
--- a/internal/runtime/local/runtime.go
+++ b/internal/runtime/local/runtime.go
@@ -143,7 +143,8 @@ func (rt *LocalRuntime) Execute(ctx context.Context, req *sdkruntime.ExecuteRequ
 		AgentName: strings.TrimSpace(agentName),
 		Model:     rt.AgentConfig.LLM.Client.GetModel(),
 		Metadata:  map[string]any{},
-		Usage:     loopResult.Usage,
+		LLMUsage:  loopResult.LLMUsage,
+		Telemetry: loopResult.Telemetry,
 	}, nil
 }
 
@@ -236,7 +237,8 @@ func (rt *LocalRuntime) ExecuteStream(ctx context.Context, req *sdkruntime.Execu
 			AgentName: strings.TrimSpace(agentName),
 			Model:     rt.AgentConfig.LLM.Client.GetModel(),
 			Metadata:  map[string]any{},
-			Usage:     result.Usage,
+			LLMUsage:  result.LLMUsage,
+			Telemetry: result.Telemetry,
 		}
 		rt.publishLifecycleEvent(channel, events.NewAgentRunFinishedEvent(threadID, runID, agentRunResult))
 	}()
diff --git a/internal/runtime/temporal/agent_workflow.go b/internal/runtime/temporal/agent_workflow.go
index 393a779..0077e9f 100644
--- a/internal/runtime/temporal/agent_workflow.go
+++ b/internal/runtime/temporal/agent_workflow.go
@@ -134,8 +134,10 @@ type AgentWorkflowInput struct {
 // AgentWorkflowState is the state of the agent workflow.
 // It is used to store the state of the agent workflow on continue-as-new.
 type AgentWorkflowState struct {
-	Iteration int                  `json:"iteration"`
-	Messages  []interfaces.Message `json:"messages"`
+	Iteration int                   `json:"iteration"`
+	Messages  []interfaces.Message  `json:"messages"`
+	LLMUsage  *interfaces.LLMUsage  `json:"llm_usage,omitempty"`
+	Telemetry *types.AgentTelemetry `json:"telemetry,omitempty"`
 }
 
 // AgentRetrieverInput is the input to AgentRetrieverActivity.
@@ -149,6 +151,8 @@ type AgentRetrieverInput struct {
 // documents were found. It is injected into the system prompt by AgentLLMActivity and AgentLLMStreamActivity.
 type AgentRetrieverResult struct {
 	RetrieverContext string `json:"retriever_context,omitempty"`
+	TotalSearches    int64  `json:"total_searches,omitempty"`
+	FailedSearches   int64  `json:"failed_searches,omitempty"`
 }
 
 // AgentLLMInput is the input to AgentLLMActivity and AgentLLMStreamActivity.
@@ -188,6 +192,7 @@ func baseLLMResultToActivity(r *base.LLMResult) *AgentLLMResult {
 			ToolCallID:      tc.ToolCallID,
 			ToolName:        tc.ToolName,
 			ToolDisplayName: tc.ToolDisplayName,
+			ToolKind:        tc.ToolKind,
 			Args:            tc.Args,
 			NeedsApproval:   tc.NeedsApproval,
 		})
@@ -200,6 +205,7 @@ type ToolCallRequest struct {
 	ToolCallID      string         `json:"tool_call_id"` // from LLM; used to match tool results
 	ToolName        string         `json:"tool_name"`
 	ToolDisplayName string         `json:"tool_display_name,omitempty"`
+	ToolKind        types.ToolKind `json:"tool_kind"`
 	Args            map[string]any `json:"args"`
 	NeedsApproval   bool           `json:"needs_approval"`
 }
@@ -219,9 +225,16 @@ type agentToolCallInput struct {
 // agentToolCallOutput is the output of executeAgentToolCall.
 type agentToolCallOutput struct {
 	msg                  interfaces.Message
+	failed               bool // true: hard err or ExecuteTool err
 	streamingUnavailable bool
 }
 
+// agentToolResult is one tool outcome collected for the conversation and telemetry.
+type agentToolResult struct {
+	message interfaces.Message
+	failed  bool
+}
+
 // AgentToolExecuteInput is the input to AgentToolExecuteActivity.
 type AgentToolExecuteInput struct {
 	ToolName         string               `json:"tool_name"`
@@ -381,13 +394,19 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl
 
 	useStreaming := input.StreamingEnabled && rt.AgentConfig.LLM.Client.IsStreamSupported()
 
-	// State restored after ContinueAsNew (iteration + conversation messages).
+	// State restored after ContinueAsNew (iteration, messages, run telemetry).
 	if input.State == nil {
 		input.State = &AgentWorkflowState{
 			Iteration: 0,
 			Messages:  []interfaces.Message{{Role: interfaces.MessageRoleUser, Content: input.UserPrompt}},
 		}
 	}
+	if input.State.Telemetry == nil {
+		input.State.Telemetry = base.NewAgentTelemetry(workflow.Now(ctx))
+	}
+	telemetry := input.State.Telemetry
+
+	llmUsage := input.State.LLMUsage
 
 	messages := input.State.Messages
 
@@ -411,11 +430,13 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl
 			return nil, err
 		}
 		retrieverContext = retrieverResult.RetrieverContext
+		telemetry.Storage.TotalRetrieverSearches += retrieverResult.TotalSearches
+		telemetry.Storage.FailedRetrieverSearches += retrieverResult.FailedSearches
+		telemetry.Storage.PrefetchSearches += retrieverResult.TotalSearches
 		logger.Debug("workflow: retriever prefetch done", "scope", "workflow", "hasContext", retrieverContext != "")
 	}
 
 	lastContent := ""
-	var runUsage *interfaces.LLMUsage
 	var llmResult AgentLLMResult
 	for iter := input.State.Iteration; iter < maxIter; iter++ {
 
@@ -445,7 +466,8 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl
 			return nil, err
 		}
 
-		runUsage = base.MergeLLMUsage(runUsage, llmResult.Usage)
+		telemetry.Run.TotalLLMCalls++
+		llmUsage = base.MergeLLMUsage(llmUsage, llmResult.Usage)
 
 		if len(llmResult.ToolCalls) == 0 {
 			// Final response: accumulate assistant message for conversation
@@ -469,9 +491,11 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl
 				}
 				return nil, err
 			}
-			runUsage = base.MergeLLMUsage(runUsage, llmResult.Usage)
+			llmUsage = base.MergeLLMUsage(llmUsage, llmResult.Usage)
 			messages = append(messages, interfaces.Message{Role: interfaces.MessageRoleAssistant, Content: llmResult.Content})
 			lastContent = llmResult.Content
+			telemetry.Run.TotalLLMCalls++
+			telemetry.Run.FinishReason = types.FinishReasonMaxIterations
 			break
 		}
 
@@ -490,7 +514,7 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl
 		}
 		messages = append(messages, assistantMsg)
 
-		var toolResults []interfaces.Message
+		var toolResults []agentToolResult
 
 		toolExecMode := rt.ToolExecutionMode
 		if toolExecMode == "" {
@@ -546,7 +570,7 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl
 					})
 				}
 
-				toolResults = make([]interfaces.Message, len(futures))
+				toolResults = make([]agentToolResult, len(futures))
 				for i, fut := range futures {
 					tc := llmResult.ToolCalls[i]
 					var v *agentToolCallOutput
@@ -559,22 +583,26 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl
 							"toolName", tc.ToolName,
 							"toolCallID", tc.ToolCallID,
 							"error", err)
-						// Tool failed — send error as tool result so LLM can handle it
-						toolResults[i] = interfaces.Message{
-							Role:       interfaces.MessageRoleTool,
-							Content:    "Tool execution failed: " + err.Error(),
-							ToolName:   tc.ToolName,
-							ToolCallID: tc.ToolCallID,
+						toolResults[i] = agentToolResult{
+							message: interfaces.Message{
+								Role:       interfaces.MessageRoleTool,
+								Content:    "Tool execution failed: " + err.Error(),
+								ToolName:   tc.ToolName,
+								ToolCallID: tc.ToolCallID,
+							},
+							failed: true,
 						}
 					} else {
-						// Success: branch always Set(non-nil *agentToolCallOutput, nil).
 						logger.Debug("workflow: parallel tool future collected (ok)",
 							"scope", "workflow",
 							"toolIndex", i,
 							"toolName", tc.ToolName,
 							"toolCallID", tc.ToolCallID,
 							"streamingUnavailable", v.streamingUnavailable)
-						toolResults[i] = v.msg
+						toolResults[i] = agentToolResult{
+							message: v.msg,
+							failed:  v.failed,
+						}
 						if v.streamingUnavailable {
 							streamingUnavailable = true
 						}
@@ -588,7 +616,7 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl
 					"executionMode", string(types.AgentToolExecutionModeSequential),
 					"toolCount", len(llmResult.ToolCalls))
 				toolInput := rt.newAgentToolCallInput(ctx, input, activityIDSuffix, messageID, emitAgentEvent, "")
-				// authorize / approve / execute, then TOOL_CALL_END + TOOL_CALL_RESULT.
+				toolResults = make([]agentToolResult, len(llmResult.ToolCalls))
 				for i, tc := range llmResult.ToolCalls {
 					logger.Debug("workflow: sequential tool executing",
 						"scope", "workflow",
@@ -604,7 +632,16 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl
 							"toolName", tc.ToolName,
 							"toolCallID", tc.ToolCallID,
 							"error", runErr)
-						return nil, runErr
+						toolResults[i] = agentToolResult{
+							message: interfaces.Message{
+								Role:       interfaces.MessageRoleTool,
+								Content:    "Tool execution failed: " + runErr.Error(),
+								ToolName:   tc.ToolName,
+								ToolCallID: tc.ToolCallID,
+							},
+							failed: true,
+						}
+						continue
 					}
 					if toolOutput.streamingUnavailable {
 						streamingUnavailable = true
@@ -615,14 +652,30 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl
 						"toolName", tc.ToolName,
 						"toolCallID", tc.ToolCallID,
 						"streamingUnavailable", toolOutput.streamingUnavailable)
-					toolResults = append(toolResults, toolOutput.msg)
+					toolResults[i] = agentToolResult{
+						message: toolOutput.msg,
+						failed:  toolOutput.failed,
+					}
 				}
 			}
 		default:
 			return nil, fmt.Errorf("invalid tool execution mode %q: use %q or %q", toolExecMode, types.AgentToolExecutionModeParallel, types.AgentToolExecutionModeSequential)
 		}
 
-		messages = append(messages, toolResults...)
+		for i, result := range toolResults {
+			tc := llmResult.ToolCalls[i]
+			if tc.ToolKind.CountsTowardToolTelemetry() {
+				telemetry.Tools.Record(tc.ToolName, result.failed)
+			}
+			if tc.ToolKind == types.ToolKindRetriever {
+				telemetry.Storage.TotalRetrieverSearches++
+				telemetry.Storage.AgenticSearches++
+				if result.failed {
+					telemetry.Storage.FailedRetrieverSearches++
+				}
+			}
+			messages = append(messages, result.message)
+		}
 
 		if rt.conversationMemoryEnabled(input.ConversationID) && rt.AgentConfig.Session.ConversationSaveOnIteration && len(messages) > 0 {
 			if err := workflow.ExecuteActivity(convCtx, rt.AddConversationMessagesActivity, AddConversationMessagesInput{
@@ -648,9 +701,12 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl
 				"historySizeBytes", info.GetCurrentHistorySize(),
 				"historySizeBytesLimit", agentWorkflowHistorySizeBytes,
 			)
+
 			input.State = &AgentWorkflowState{
 				Iteration: iter + 1,
 				Messages:  messages,
+				LLMUsage:  llmUsage,
+				Telemetry: telemetry,
 			}
 			return nil, workflow.NewContinueAsNewError(ctx, rt.AgentWorkflow, input)
 		}
@@ -672,9 +728,19 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl
 
 	// Log summary only; avoid full content to prevent leaking sensitive data
 	logger.Info("workflow: agent run completed", "scope", "workflow", "contentLen", len(lastContent))
-	return &types.AgentRunResult{
-		Content: lastContent, AgentName: agentName, Model: model, Metadata: map[string]any{}, Usage: runUsage,
-	}, nil
+
+	telemetry.Run.CompletedAt = workflow.Now(ctx)
+
+	runResult := &types.AgentRunResult{
+		Content:   lastContent,
+		AgentName: agentName,
+		Model:     model,
+		Metadata:  map[string]any{},
+		LLMUsage:  llmUsage,
+		Telemetry: telemetry,
+	}
+
+	return runResult, nil
 }
 
 func (rt *TemporalRuntime) conversationMemoryEnabled(conversationID string) bool {
@@ -781,6 +847,7 @@ func (rt *TemporalRuntime) executeAgentToolCall(input agentToolCallInput, tc Too
 				ToolName:   tc.ToolName,
 				ToolCallID: tc.ToolCallID,
 			},
+			failed:               false,
 			streamingUnavailable: false,
 		}, nil
 	}
@@ -818,6 +885,7 @@ func (rt *TemporalRuntime) executeAgentToolCall(input agentToolCallInput, tc Too
 	}
 
 	var content string
+	failed := false
 	switch approvalStatus {
 	case types.ApprovalStatusApproved:
 		if route, ok := input.input.SubAgentRoutes[tc.ToolName]; ok {
@@ -848,6 +916,7 @@ func (rt *TemporalRuntime) executeAgentToolCall(input agentToolCallInput, tc Too
 			errExec := workflow.ExecuteActivity(input.execCtx, rt.AgentToolExecuteActivity, execInput).Get(input.execCtx, &result)
 			if errExec != nil {
 				content = "Tool execution failed: " + errExec.Error()
+				failed = true
 			} else {
 				content = result
 			}
@@ -869,6 +938,7 @@ func (rt *TemporalRuntime) executeAgentToolCall(input agentToolCallInput, tc Too
 			ToolName:   tc.ToolName,
 			ToolCallID: tc.ToolCallID,
 		},
+		failed:               failed,
 		streamingUnavailable: markStreamingUnavailable,
 	}, nil
 }
@@ -947,7 +1017,18 @@ func (rt *TemporalRuntime) AgentLLMStreamActivity(ctx context.Context, input Age
 		rt.publishAgentEventToStream(ctx, agentName, input.LocalChannelName, input.EventWorkflowID, input.EventTaskQueue, ev)
 	}
 
-	result, err := rt.ExecuteLLMStream(ctx, actLog, agentName, input.MessageID, messages, input.SkipTools, input.RetrieverContext, tools, emit)
+	executeLLMInput := base.ExecuteLLMInput{
+		Logger:           actLog,
+		AgentName:        agentName,
+		MessageID:        input.MessageID,
+		Messages:         messages,
+		SkipTools:        input.SkipTools,
+		RetrieverContext: input.RetrieverContext,
+		Tools:            tools,
+		Emit:             emit,
+	}
+
+	result, err := rt.ExecuteLLMStream(ctx, executeLLMInput)
 	if err != nil {
 		return nil, err
 	}
@@ -964,11 +1045,15 @@ func (rt *TemporalRuntime) AgentRetrieverActivity(ctx context.Context, input Age
 		return nil, err
 	}
 	actLog := newActivityLogger(activity.GetLogger(ctx))
-	retrieverContext, err := rt.ExecuteRetrievers(ctx, actLog, input.UserPrompt)
+	res, err := rt.ExecuteRetrievers(ctx, actLog, input.UserPrompt)
 	if err != nil {
 		return nil, err
 	}
-	return &AgentRetrieverResult{RetrieverContext: retrieverContext}, nil
+	return &AgentRetrieverResult{
+		RetrieverContext: res.Context,
+		TotalSearches:    res.TotalSearches,
+		FailedSearches:   res.FailedSearches,
+	}, nil
 }
 
 // AgentLLMActivity calls the LLM and returns content plus any tool calls.
@@ -997,7 +1082,18 @@ func (rt *TemporalRuntime) AgentLLMActivity(ctx context.Context, input AgentLLMI
 		rt.publishAgentEventToStream(ctx, agentName, input.LocalChannelName, input.EventWorkflowID, input.EventTaskQueue, ev)
 	}
 
-	result, err := rt.ExecuteLLM(ctx, actLog, agentName, input.MessageID, messages, input.SkipTools, input.RetrieverContext, tools, emit)
+	executeLLMInput := base.ExecuteLLMInput{
+		Logger:           actLog,
+		AgentName:        agentName,
+		MessageID:        input.MessageID,
+		Messages:         messages,
+		SkipTools:        input.SkipTools,
+		RetrieverContext: input.RetrieverContext,
+		Tools:            tools,
+		Emit:             emit,
+	}
+
+	result, err := rt.ExecuteLLM(ctx, executeLLMInput)
 	if err != nil {
 		return nil, err
 	}
diff --git a/internal/runtime/temporal/agent_workflow_test.go b/internal/runtime/temporal/agent_workflow_test.go
index 137fd4c..4ec5611 100644
--- a/internal/runtime/temporal/agent_workflow_test.go
+++ b/internal/runtime/temporal/agent_workflow_test.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"testing"
+	"time"
 
 	"github.com/golang/mock/gomock"
 	"github.com/stretchr/testify/mock"
@@ -49,6 +50,19 @@ func wireTestToolsResolver(rt *TemporalRuntime, tools []interfaces.Tool) {
 	}
 }
 
+func testWorkflowToolCall(toolCallID, toolName string, kind types.ToolKind, args map[string]any) ToolCallRequest {
+	if kind == "" {
+		kind = types.ToolKindNative
+	}
+	return ToolCallRequest{
+		ToolCallID:      toolCallID,
+		ToolName:        toolName,
+		ToolDisplayName: toolName,
+		ToolKind:        kind,
+		Args:            args,
+	}
+}
+
 // newActivityTestEnv returns a [testsuite.TestActivityEnvironment] for isolated activity tests.
 func newActivityTestEnv(t *testing.T) *testsuite.TestActivityEnvironment {
 	t.Helper()
@@ -120,13 +134,8 @@ func TestAgentWorkflow_OneToolThenFinal(t *testing.T) {
 		llmCalls++
 		if llmCalls == 1 {
 			return &AgentLLMResult{
-				Content: "using tool",
-				ToolCalls: []ToolCallRequest{{
-					ToolCallID:      "tc1",
-					ToolName:        "echo",
-					ToolDisplayName: "Echo",
-					Args:            map[string]any{"x": 1},
-				}},
+				Content:   "using tool",
+				ToolCalls: []ToolCallRequest{testWorkflowToolCall("tc1", "echo", types.ToolKindNative, map[string]any{"x": 1})},
 			}, nil
 		}
 		return &AgentLLMResult{Content: "after tool", ToolCalls: nil}, nil
@@ -150,6 +159,111 @@ func TestAgentWorkflow_OneToolThenFinal(t *testing.T) {
 	require.NoError(t, env.GetWorkflowResult(&result))
 	require.Equal(t, "after tool", result.Content)
 	require.Equal(t, 2, llmCalls)
+	require.NotNil(t, result.Telemetry)
+	require.Equal(t, int64(1), result.Telemetry.Tools.TotalCalls)
+	require.Equal(t, int64(0), result.Telemetry.Tools.FailedCalls)
+}
+
+func TestAgentWorkflow_ToolTelemetry_ExecError(t *testing.T) {
+	var suite testsuite.WorkflowTestSuite
+	env := suite.NewTestWorkflowEnvironment()
+	rt := testRuntimeForWorkflow(t)
+
+	var llmCalls int
+	env.RegisterWorkflow(rt.AgentWorkflow)
+	env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentLLMInput) (*AgentLLMResult, error) {
+		llmCalls++
+		if llmCalls == 1 {
+			return &AgentLLMResult{
+				Content:   "using tool",
+				ToolCalls: []ToolCallRequest{testWorkflowToolCall("tc1", "bad", types.ToolKindNative, nil)},
+			}, nil
+		}
+		return &AgentLLMResult{Content: "after tool", ToolCalls: nil}, nil
+	})
+	env.OnActivity(rt.AgentToolExecuteActivity, mock.Anything, mock.Anything).Return("", fmt.Errorf("boom"))
+	env.OnActivity(rt.AgentToolAuthorizeActivity, mock.Anything, mock.Anything).Return(AgentToolAuthorizeResult{Allowed: true}, nil)
+
+	env.ExecuteWorkflow(rt.AgentWorkflow, AgentWorkflowInput{UserPrompt: "run"})
+
+	require.True(t, env.IsWorkflowCompleted())
+	var result types.AgentRunResult
+	require.NoError(t, env.GetWorkflowResult(&result))
+	require.Equal(t, int64(1), result.Telemetry.Tools.TotalCalls)
+	require.Equal(t, int64(1), result.Telemetry.Tools.FailedCalls)
+}
+
+func TestAgentWorkflow_ToolTelemetry_SkipsNonCountableKind(t *testing.T) {
+	var suite testsuite.WorkflowTestSuite
+	env := suite.NewTestWorkflowEnvironment()
+	rt := testRuntimeForWorkflow(t)
+
+	var llmCalls int
+	env.RegisterWorkflow(rt.AgentWorkflow)
+	env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentLLMInput) (*AgentLLMResult, error) {
+		llmCalls++
+		if llmCalls == 1 {
+			return &AgentLLMResult{
+				Content:   "using tool",
+				ToolCalls: []ToolCallRequest{testWorkflowToolCall("tc1", "delegate", types.ToolKindSubAgent, nil)},
+			}, nil
+		}
+		return &AgentLLMResult{Content: "done", ToolCalls: nil}, nil
+	})
+	env.OnActivity(rt.AgentToolExecuteActivity, mock.Anything, mock.Anything).Return("should not run", nil)
+	env.OnActivity(rt.AgentToolAuthorizeActivity, mock.Anything, mock.Anything).Return(AgentToolAuthorizeResult{Allowed: true}, nil)
+
+	env.ExecuteWorkflow(rt.AgentWorkflow, AgentWorkflowInput{UserPrompt: "run"})
+
+	require.True(t, env.IsWorkflowCompleted())
+	var result types.AgentRunResult
+	require.NoError(t, env.GetWorkflowResult(&result))
+	require.Equal(t, int64(0), result.Telemetry.Tools.TotalCalls)
+}
+
+func TestAgentWorkflow_SequentialMode_ContinuesOnToolError(t *testing.T) {
+	var suite testsuite.WorkflowTestSuite
+	env := suite.NewTestWorkflowEnvironment()
+	rt := testRuntimeForWorkflow(t)
+	rt.ToolExecutionMode = types.AgentToolExecutionModeSequential
+
+	var llmCalls int
+	env.RegisterWorkflow(rt.AgentWorkflow)
+	env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentLLMInput) (*AgentLLMResult, error) {
+		llmCalls++
+		if llmCalls == 1 {
+			return &AgentLLMResult{
+				Content: "using tools",
+				ToolCalls: []ToolCallRequest{
+					testWorkflowToolCall("tc1", "bad", types.ToolKindNative, nil),
+					testWorkflowToolCall("tc2", "ok", types.ToolKindNative, nil),
+				},
+			}, nil
+		}
+		return &AgentLLMResult{Content: "after tools", ToolCalls: nil}, nil
+	})
+	env.OnActivity(rt.AgentToolAuthorizeActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentToolAuthorizeInput) (AgentToolAuthorizeResult, error) {
+		if in.ToolName == "bad" {
+			return AgentToolAuthorizeResult{}, fmt.Errorf("auth backend down")
+		}
+		return AgentToolAuthorizeResult{Allowed: true}, nil
+	})
+	env.OnActivity(rt.AgentToolExecuteActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentToolExecuteInput) (string, error) {
+		if in.ToolName != "ok" {
+			t.Errorf("unexpected execute for %q", in.ToolName)
+		}
+		return "ok result", nil
+	})
+
+	env.ExecuteWorkflow(rt.AgentWorkflow, AgentWorkflowInput{UserPrompt: "run"})
+
+	require.True(t, env.IsWorkflowCompleted())
+	var result types.AgentRunResult
+	require.NoError(t, env.GetWorkflowResult(&result))
+	require.Equal(t, "after tools", result.Content)
+	require.Equal(t, 2, llmCalls)
+	require.Equal(t, int64(2), result.Telemetry.Tools.TotalCalls)
+	require.Equal(t, int64(1), result.Telemetry.Tools.FailedCalls)
 }
 
 func TestAgentWorkflow_ToolAuthorizationDenied_SkipsExecute(t *testing.T) {
@@ -163,13 +277,8 @@ func TestAgentWorkflow_ToolAuthorizationDenied_SkipsExecute(t *testing.T) {
 		llmCalls++
 		if llmCalls == 1 {
 			return &AgentLLMResult{
-				Content: "using tool",
-				ToolCalls: []ToolCallRequest{{
-					ToolCallID:      "tc-auth-deny",
-					ToolName:        "echo",
-					ToolDisplayName: "Echo",
-					Args:            map[string]any{"x": 1},
-				}},
+				Content:   "using tool",
+				ToolCalls: []ToolCallRequest{testWorkflowToolCall("tc-auth-deny", "echo", types.ToolKindNative, map[string]any{"x": 1})},
 			}, nil
 		}
 		return &AgentLLMResult{Content: "after deny", ToolCalls: nil}, nil
@@ -277,6 +386,7 @@ func TestAgentLLMActivity_MockLLM_ToolCalls(t *testing.T) {
 	require.Len(t, got.ToolCalls, 1)
 	require.Equal(t, "echo", got.ToolCalls[0].ToolName)
 	require.Equal(t, "tc1", got.ToolCalls[0].ToolCallID)
+	require.Equal(t, types.ToolKindNative, got.ToolCalls[0].ToolKind)
 	require.False(t, got.ToolCalls[0].NeedsApproval)
 }
 
@@ -443,13 +553,8 @@ func TestAgentWorkflow_ContinueAsNewOnHistoryLengthAfterTools(t *testing.T) {
 	env.RegisterWorkflow(rt.AgentWorkflow)
 	env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentLLMInput) (*AgentLLMResult, error) {
 		return &AgentLLMResult{
-			Content: "using tool",
-			ToolCalls: []ToolCallRequest{{
-				ToolCallID:      "tc-can",
-				ToolName:        "echo",
-				ToolDisplayName: "Echo",
-				Args:            map[string]any{"x": 1},
-			}},
+			Content:   "using tool",
+			ToolCalls: []ToolCallRequest{testWorkflowToolCall("tc-can", "echo", types.ToolKindNative, map[string]any{"x": 1})},
 		}, nil
 	})
 	env.OnActivity(rt.AgentToolExecuteActivity, mock.Anything, mock.Anything).Return("echo ok", nil)
@@ -467,6 +572,57 @@ func TestAgentWorkflow_ContinueAsNewOnHistoryLengthAfterTools(t *testing.T) {
 	require.True(t, workflow.IsContinueAsNewError(wfErr), "expected continue-as-new, got: %v", wfErr)
 }
 
+func TestAgentWorkflow_ResumesTelemetryFromState(t *testing.T) {
+	var suite testsuite.WorkflowTestSuite
+	env := suite.NewTestWorkflowEnvironment()
+	rt := testRuntimeForWorkflow(t)
+
+	priorStarted := time.Date(2025, 6, 1, 12, 0, 0, 0, time.UTC)
+
+	env.RegisterWorkflow(rt.AgentWorkflow)
+	env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return(
+		&AgentLLMResult{
+			Content: "done",
+			Usage:   &interfaces.LLMUsage{TotalTokens: 50, PromptTokens: 30, CompletionTokens: 20},
+		}, nil)
+
+	env.ExecuteWorkflow(rt.AgentWorkflow, AgentWorkflowInput{
+		UserPrompt: "run",
+		State: &AgentWorkflowState{
+			Iteration: 0,
+			Messages: []interfaces.Message{
+				{Role: interfaces.MessageRoleUser, Content: "run"},
+			},
+			LLMUsage: &types.LLMUsage{TotalTokens: 100},
+			Telemetry: &types.AgentTelemetry{
+				Run: types.RunTelemetry{
+					StartedAt:     priorStarted,
+					TotalLLMCalls: 2,
+					FinishReason:  types.FinishReasonComplete,
+				},
+				Tools: types.ToolTelemetry{
+					TotalCalls:  4,
+					FailedCalls: 1,
+					Breakdown:   map[string]int64{"prior": 4},
+				},
+			},
+		},
+	})
+
+	require.True(t, env.IsWorkflowCompleted())
+	var result types.AgentRunResult
+	require.NoError(t, env.GetWorkflowResult(&result))
+	require.NotNil(t, result.Telemetry)
+	require.NotNil(t, result.Telemetry.Run)
+	require.Equal(t, priorStarted, result.Telemetry.Run.StartedAt)
+	require.Equal(t, int64(3), result.Telemetry.Run.TotalLLMCalls)
+	require.Equal(t, int64(4), result.Telemetry.Tools.TotalCalls)
+	require.Equal(t, int64(1), result.Telemetry.Tools.FailedCalls)
+	require.NotNil(t, result.LLMUsage)
+	require.Equal(t, int64(150), result.LLMUsage.TotalTokens)
+	require.False(t, result.Telemetry.Run.CompletedAt.IsZero())
+}
+
 func TestAgentWorkflow_ContinueAsNewOnHistorySizeAfterTools(t *testing.T) {
 	var suite testsuite.WorkflowTestSuite
 	env := suite.NewTestWorkflowEnvironment()
@@ -478,13 +634,8 @@ func TestAgentWorkflow_ContinueAsNewOnHistorySizeAfterTools(t *testing.T) {
 	env.RegisterWorkflow(rt.AgentWorkflow)
 	env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentLLMInput) (*AgentLLMResult, error) {
 		return &AgentLLMResult{
-			Content: "using tool",
-			ToolCalls: []ToolCallRequest{{
-				ToolCallID:      "tc-can-size",
-				ToolName:        "echo",
-				ToolDisplayName: "Echo",
-				Args:            map[string]any{"x": 1},
-			}},
+			Content:   "using tool",
+			ToolCalls: []ToolCallRequest{testWorkflowToolCall("tc-can-size", "echo", types.ToolKindNative, map[string]any{"x": 1})},
 		}, nil
 	})
 	env.OnActivity(rt.AgentToolExecuteActivity, mock.Anything, mock.Anything).Return("echo ok", nil)
@@ -565,6 +716,8 @@ func TestAgentRetrieverActivity_SingleRetriever(t *testing.T) {
 	require.Contains(t, got.RetrieverContext, "Go is a language")
 	require.Contains(t, got.RetrieverContext, "docs.go.dev")
 	require.Contains(t, got.RetrieverContext, "0.95")
+	require.Equal(t, int64(1), got.TotalSearches)
+	require.Equal(t, int64(0), got.FailedSearches)
 	// Single retriever: no section header
 	require.NotContains(t, got.RetrieverContext, "## kb")
 }
@@ -598,6 +751,8 @@ func TestAgentRetrieverActivity_MultipleRetrievers_SectionHeaders(t *testing.T)
 	require.Contains(t, got.RetrieverContext, "doc from r1")
 	require.Contains(t, got.RetrieverContext, "## r2")
 	require.Contains(t, got.RetrieverContext, "doc from r2")
+	require.Equal(t, int64(2), got.TotalSearches)
+	require.Equal(t, int64(0), got.FailedSearches)
 }
 
 func TestAgentRetrieverActivity_PartialFailure_ContinuesWithPartialContext(t *testing.T) {
@@ -625,9 +780,11 @@ func TestAgentRetrieverActivity_PartialFailure_ContinuesWithPartialContext(t *te
 	require.NoError(t, val.Get(&got))
 	require.Contains(t, got.RetrieverContext, "good doc")
 	require.NotContains(t, got.RetrieverContext, "bad")
+	require.Equal(t, int64(2), got.TotalSearches)
+	require.Equal(t, int64(1), got.FailedSearches)
 }
 
-func TestAgentRetrieverActivity_AllFail_ReturnsError(t *testing.T) {
+func TestAgentRetrieverActivity_AllFail_ContinuesWithEmptyContext(t *testing.T) {
 	ctrl := gomock.NewController(t)
 	defer ctrl.Finish()
 
@@ -639,9 +796,13 @@ func TestAgentRetrieverActivity_AllFail_ReturnsError(t *testing.T) {
 	actEnv := newActivityTestEnv(t)
 	actEnv.RegisterActivity(rt.AgentRetrieverActivity)
 
-	_, err := actEnv.ExecuteActivity(rt.AgentRetrieverActivity, AgentRetrieverInput{UserPrompt: "q"})
-	require.Error(t, err)
-	require.Contains(t, err.Error(), "all")
+	val, err := actEnv.ExecuteActivity(rt.AgentRetrieverActivity, AgentRetrieverInput{UserPrompt: "q"})
+	require.NoError(t, err)
+	var got AgentRetrieverResult
+	require.NoError(t, val.Get(&got))
+	require.Equal(t, "", got.RetrieverContext)
+	require.Equal(t, int64(1), got.TotalSearches)
+	require.Equal(t, int64(1), got.FailedSearches)
 }
 
 func TestAgentRetrieverActivity_EmptyDocs_EmptyContext(t *testing.T) {
@@ -706,7 +867,7 @@ func TestAgentWorkflow_PrefetchMode_CallsRetrieverActivityFirst(t *testing.T) {
 		func(ctx context.Context, in AgentRetrieverInput) (*AgentRetrieverResult, error) {
 			retrieverCalled = true
 			require.Equal(t, "user query", in.UserPrompt)
-			return &AgentRetrieverResult{RetrieverContext: "[1] prefetched doc"}, nil
+			return &AgentRetrieverResult{RetrieverContext: "[1] prefetched doc", TotalSearches: 1, FailedSearches: 0}, nil
 		})
 
 	env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return(
@@ -724,6 +885,11 @@ func TestAgentWorkflow_PrefetchMode_CallsRetrieverActivityFirst(t *testing.T) {
 	var result types.AgentRunResult
 	require.NoError(t, env.GetWorkflowResult(&result))
 	require.Equal(t, "answer", result.Content)
+	require.NotNil(t, result.Telemetry)
+	require.Equal(t, int64(1), result.Telemetry.Storage.TotalRetrieverSearches)
+	require.Equal(t, int64(0), result.Telemetry.Storage.FailedRetrieverSearches)
+	require.Equal(t, int64(1), result.Telemetry.Storage.PrefetchSearches)
+	require.Equal(t, int64(0), result.Telemetry.Storage.AgenticSearches)
 }
 
 func TestAgentWorkflow_AgenticMode_SkipsRetrieverActivity(t *testing.T) {
diff --git a/internal/runtime/temporal/approval.go b/internal/runtime/temporal/approval.go
new file mode 100644
index 0000000..216ad2e
--- /dev/null
+++ b/internal/runtime/temporal/approval.go
@@ -0,0 +1,82 @@
+package temporal
+
+import (
+	"errors"
+	"fmt"
+
+	"github.com/agenticenv/agent-sdk-go/internal/events"
+	"github.com/agenticenv/agent-sdk-go/internal/types"
+)
+
+// ErrNotApprovalCustomEvent means the CUSTOM event name is not tool or delegation approval.
+var ErrNotApprovalCustomEvent = errors.New("temporal: custom event is not a recognized approval kind")
+
+// toolApprovalFromEventValue copies the CUSTOM approval payload into an SDK approval value.
+func toolApprovalFromEventValue(ev events.AgentCustomEventApprovalValue) types.ToolApprovalRequestValue {
+	return types.ToolApprovalRequestValue{
+		AgentName:       ev.AgentName,
+		ToolCallID:      ev.ToolCallID,
+		ToolName:        ev.ToolName,
+		ToolDisplayName: ev.ToolDisplayName,
+		Args:            cloneArgsMap(ev.Args),
+		ApprovalToken:   ev.ApprovalToken,
+	}
+}
+
+// delegationApprovalFromEventValue copies the CUSTOM delegation payload into an SDK approval value.
+func delegationApprovalFromEventValue(ev events.AgentCustomEventDelegationValue) types.SubAgentDelegationApprovalRequestValue {
+	return types.SubAgentDelegationApprovalRequestValue{
+		AgentName:     ev.AgentName,
+		SubAgentName:  ev.SubAgentName,
+		Args:          cloneArgsMap(ev.Args),
+		ApprovalToken: ev.ApprovalToken,
+	}
+}
+
+// prepareApprovalFromCustomEvent parses a CUSTOM event and returns Name + Value as SDK types and the approval token for Temporal CompleteActivity.
+// Respond is nil; the caller must set it before calling types.ApprovalHandler.
+// Returns ErrNotApprovalCustomEvent when ev.Name is not tool or delegation approval.
+func prepareApprovalFromCustomEvent(ev *events.AgentCustomEvent) (req *types.ApprovalRequest, approvalToken string, err error) {
+	if ev == nil {
+		return nil, "", fmt.Errorf("temporal: nil custom event")
+	}
+	switch events.AgentCustomEventName(ev.Name) {
+	case events.AgentCustomEventNameToolApproval:
+		raw, err := events.ParseCustomEventApproval(ev)
+		if err != nil {
+			return nil, "", err
+		}
+		v := toolApprovalFromEventValue(raw)
+		return &types.ApprovalRequest{
+				Name:  types.ApprovalRequestNameTool,
+				Value: v,
+			},
+			v.ApprovalToken,
+			nil
+	case events.AgentCustomEventNameSubAgentDelegation:
+		raw, err := events.ParseCustomEventDelegation(ev)
+		if err != nil {
+			return nil, "", err
+		}
+		v := delegationApprovalFromEventValue(raw)
+		return &types.ApprovalRequest{
+				Name:  types.ApprovalRequestNameSubAgent,
+				Value: v,
+			},
+			v.ApprovalToken,
+			nil
+	default:
+		return nil, "", ErrNotApprovalCustomEvent
+	}
+}
+
+func cloneArgsMap(m map[string]any) map[string]any {
+	if m == nil {
+		return nil
+	}
+	out := make(map[string]any, len(m))
+	for k, v := range m {
+		out[k] = v
+	}
+	return out
+}
diff --git a/internal/runtime/temporal/approval_test.go b/internal/runtime/temporal/approval_test.go
new file mode 100644
index 0000000..f6c2c60
--- /dev/null
+++ b/internal/runtime/temporal/approval_test.go
@@ -0,0 +1,146 @@
+package temporal
+
+import (
+	"errors"
+	"strings"
+	"testing"
+
+	"github.com/agenticenv/agent-sdk-go/internal/events"
+	"github.com/agenticenv/agent-sdk-go/internal/types"
+)
+
+func TestPrepareApprovalFromCustomEvent_NilEvent(t *testing.T) {
+	req, token, err := prepareApprovalFromCustomEvent(nil)
+	if req != nil || token != "" || err == nil {
+		t.Fatalf("expected nil req/token and error, got req=%v token=%q err=%v", req, token, err)
+	}
+	if !strings.Contains(err.Error(), "nil custom event") {
+		t.Fatalf("unexpected error: %v", err)
+	}
+}
+
+func TestPrepareApprovalFromCustomEvent_UnknownName(t *testing.T) {
+	ev := events.NewAgentCustomEvent("other_custom", map[string]any{"x": 1})
+	req, token, err := prepareApprovalFromCustomEvent(ev)
+	if req != nil || token != "" {
+		t.Fatalf("expected nil req/token, got req=%v token=%q", req, token)
+	}
+	if !errors.Is(err, ErrNotApprovalCustomEvent) {
+		t.Fatalf("expected ErrNotApprovalCustomEvent, got %v", err)
+	}
+}
+
+func TestPrepareApprovalFromCustomEvent_ToolApprovalTypedValue(t *testing.T) {
+	ev := events.NewAgentCustomEvent(string(events.AgentCustomEventNameToolApproval), &events.AgentCustomEventApprovalValue{
+		AgentName:       "agent-a",
+		ToolCallID:      "call-1",
+		ToolName:        "calculator",
+		ToolDisplayName: "Calculator",
+		Args:            map[string]any{"x": 1},
+		ApprovalToken:   "tok-tool",
+	})
+
+	req, token, err := prepareApprovalFromCustomEvent(ev)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if token != "tok-tool" {
+		t.Fatalf("token: got %q want tok-tool", token)
+	}
+	if req == nil || req.Name != types.ApprovalRequestNameTool {
+		t.Fatalf("unexpected req: %#v", req)
+	}
+
+	parsed, err := types.ParseToolApproval(req)
+	if err != nil {
+		t.Fatalf("ParseToolApproval: %v", err)
+	}
+	if parsed.AgentName != "agent-a" || parsed.ToolCallID != "call-1" || parsed.ToolName != "calculator" ||
+		parsed.ToolDisplayName != "Calculator" || parsed.ApprovalToken != "tok-tool" {
+		t.Fatalf("unexpected parsed tool approval: %#v", parsed)
+	}
+	if parsed.Args["x"] != float64(1) && parsed.Args["x"] != 1 {
+		t.Fatalf("unexpected args: %#v", parsed.Args)
+	}
+}
+
+func TestPrepareApprovalFromCustomEvent_ToolApprovalMapValue(t *testing.T) {
+	ev := events.NewAgentCustomEvent(string(events.AgentCustomEventNameToolApproval), map[string]any{
+		"toolName":      "search",
+		"approvalToken": "tok-map",
+		"args":          map[string]any{"q": "hello"},
+	})
+
+	req, token, err := prepareApprovalFromCustomEvent(ev)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if token != "tok-map" {
+		t.Fatalf("token: got %q want tok-map", token)
+	}
+	parsed, err := types.ParseToolApproval(req)
+	if err != nil {
+		t.Fatalf("ParseToolApproval: %v", err)
+	}
+	if parsed.ToolName != "search" || parsed.ApprovalToken != "tok-map" {
+		t.Fatalf("unexpected parsed: %#v", parsed)
+	}
+}
+
+func TestPrepareApprovalFromCustomEvent_Delegation(t *testing.T) {
+	ev := events.NewAgentCustomEvent(string(events.AgentCustomEventNameSubAgentDelegation), map[string]any{
+		"agentName":     "parent",
+		"subAgentName":  "child",
+		"approvalToken": "tok-delegate",
+		"args":          map[string]any{"task": "summarize"},
+	})
+
+	req, token, err := prepareApprovalFromCustomEvent(ev)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if token != "tok-delegate" {
+		t.Fatalf("token: got %q want tok-delegate", token)
+	}
+	if req == nil || req.Name != types.ApprovalRequestNameSubAgent {
+		t.Fatalf("unexpected req: %#v", req)
+	}
+
+	parsed, err := types.ParseDelegationApproval(req)
+	if err != nil {
+		t.Fatalf("ParseDelegationApproval: %v", err)
+	}
+	if parsed.AgentName != "parent" || parsed.SubAgentName != "child" || parsed.ApprovalToken != "tok-delegate" {
+		t.Fatalf("unexpected parsed delegation: %#v", parsed)
+	}
+}
+
+func TestPrepareApprovalFromCustomEvent_ClonesArgs(t *testing.T) {
+	args := map[string]any{"k": "v"}
+	ev := events.NewAgentCustomEvent(string(events.AgentCustomEventNameToolApproval), &events.AgentCustomEventApprovalValue{
+		ToolName:      "t",
+		ApprovalToken: "tok",
+		Args:          args,
+	})
+
+	req, _, err := prepareApprovalFromCustomEvent(ev)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	parsed, err := types.ParseToolApproval(req)
+	if err != nil {
+		t.Fatalf("ParseToolApproval: %v", err)
+	}
+	parsed.Args["k"] = "mutated"
+	if args["k"] != "v" {
+		t.Fatal("expected args map to be cloned, source was mutated")
+	}
+}
+
+func TestPrepareApprovalFromCustomEvent_ParseError(t *testing.T) {
+	ev := events.NewAgentCustomEvent(string(events.AgentCustomEventNameToolApproval), 123)
+	_, _, err := prepareApprovalFromCustomEvent(ev)
+	if err == nil {
+		t.Fatal("expected parse error for invalid value shape")
+	}
+}
diff --git a/internal/runtime/temporal/runtime.go b/internal/runtime/temporal/runtime.go
index 2bca17b..743a282 100644
--- a/internal/runtime/temporal/runtime.go
+++ b/internal/runtime/temporal/runtime.go
@@ -429,9 +429,9 @@ func (rt *TemporalRuntime) Execute(ctx context.Context, req *runtime.ExecuteRequ
 			if !ok {
 				continue
 			}
-			apprReq, token, err := types.PrepareApprovalFromCustomEvent(approvalEv)
+			apprReq, token, err := prepareApprovalFromCustomEvent(approvalEv)
 			if err != nil {
-				if errors.Is(err, types.ErrNotApprovalCustomEvent) {
+				if errors.Is(err, ErrNotApprovalCustomEvent) {
 					continue
 				}
 				rt.logger.Error(runCtx, "runtime approval custom event decode failed", slog.String("scope", "runtime"), slog.Any("error", err))
diff --git a/internal/runtime/temporal/runtime_test.go b/internal/runtime/temporal/runtime_test.go
index be46b02..15f276c 100644
--- a/internal/runtime/temporal/runtime_test.go
+++ b/internal/runtime/temporal/runtime_test.go
@@ -68,37 +68,44 @@ func TestAgentNameFromRuntime(t *testing.T) {
 
 func TestSyntheticStreamCompleteEvent(t *testing.T) {
 	ev := syntheticStreamCompleteEvent(nil, "threadID", "runID", "root")
-	if ev == nil || ev.Type() != events.AgentEventTypeRunFinished || ev.(*events.AgentRunFinishedEvent).Result.(*types.AgentRunResult).AgentName != "root" {
+	fin, _ := ev.(*events.AgentRunFinishedEvent)
+	if ev == nil || ev.Type() != events.AgentEventTypeRunFinished || fin.Result == nil || fin.Result.AgentName != "root" {
 		t.Fatalf("nil resp: %+v", ev)
 	}
 
 	ev2 := syntheticStreamCompleteEvent(&types.AgentRunResult{
 		Content:   "body",
 		AgentName: "from-result",
-		Usage:     &types.LLMUsage{TotalTokens: 9},
+		LLMUsage:  &types.LLMUsage{TotalTokens: 9},
 	}, "threadID", "runID", "root")
 
-	result, ok := ev2.(*events.AgentRunFinishedEvent).Result.(*types.AgentRunResult)
-	if !ok {
-		t.Fatalf("expected AgentRunResult, got %T", ev2.(*events.AgentRunFinishedEvent).Result)
+	fin2, _ := ev2.(*events.AgentRunFinishedEvent)
+	result := fin2.Result
+	if result == nil {
+		t.Fatalf("expected AgentRunResult, got nil")
 	}
-	if result.Content != "body" || result.AgentName != "from-result" || result.Usage.TotalTokens != 9 {
+	if result.LLMUsage == nil {
+		t.Fatal("llm usage should be set")
+	}
+	if result.Content != "body" || result.AgentName != "from-result" || result.LLMUsage.TotalTokens != 9 {
 		t.Fatalf("with AgentName: %+v", ev2)
 	}
 
 	ev3 := syntheticStreamCompleteEvent(&types.AgentRunResult{Content: "c", AgentName: ""}, "threadID", "runID", "fallback")
-	result, ok = ev3.(*events.AgentRunFinishedEvent).Result.(*types.AgentRunResult)
-	if !ok {
-		t.Fatalf("expected AgentRunResult, got %T", ev3.(*events.AgentRunFinishedEvent).Result)
+	fin3, _ := ev3.(*events.AgentRunFinishedEvent)
+	result = fin3.Result
+	if result == nil {
+		t.Fatalf("expected AgentRunResult, got nil")
 	}
 	if result.AgentName != "fallback" {
 		t.Fatalf("fallback name: got %q", result.AgentName)
 	}
 
 	ev4 := syntheticStreamCompleteEvent(&types.AgentRunResult{Content: "only"}, "threadID", "runID", "")
-	result, ok = ev4.(*events.AgentRunFinishedEvent).Result.(*types.AgentRunResult)
-	if !ok {
-		t.Fatalf("expected AgentRunResult, got %T", ev4.(*events.AgentRunFinishedEvent).Result)
+	fin4, _ := ev4.(*events.AgentRunFinishedEvent)
+	result = fin4.Result
+	if result == nil {
+		t.Fatalf("expected AgentRunResult, got nil")
 	}
 	if result.AgentName != "" {
 		t.Fatalf("empty rootName with empty resp.AgentName: got %q", result.AgentName)
diff --git a/internal/types/agent.go b/internal/types/agent.go
index 384df0a..815bb70 100644
--- a/internal/types/agent.go
+++ b/internal/types/agent.go
@@ -24,7 +24,11 @@ type AgentRunResult struct {
 	Model     string         `json:"model"`
 	Metadata  map[string]any `json:"metadata"`
 	// Usage is the sum of token usage across all LLM calls in this run (when reported by the provider).
-	Usage *LLMUsage `json:"usage,omitempty"`
+	// Usage acts as the historical root for aggregated token counters.
+	LLMUsage *LLMUsage `json:"llm_usage,omitempty"`
+
+	// Telemetry contains the strongly typed nested metrics domain payload.
+	Telemetry *AgentTelemetry `json:"telemetry,omitempty"`
 }
 
 // AgentRunAsyncResult is the single outcome from AgentRunAsync. After the channel closes, Err is non-nil
diff --git a/internal/types/approval.go b/internal/types/approval.go
index 0f93db4..e61a22c 100644
--- a/internal/types/approval.go
+++ b/internal/types/approval.go
@@ -6,16 +6,11 @@ import (
 	"errors"
 	"fmt"
 	"time"
-
-	"github.com/agenticenv/agent-sdk-go/internal/events"
 )
 
 // maxApprovalTimeout caps how long a single approval wait may last in the run.
 const MaxApprovalTimeout = 31 * 24 * time.Hour
 
-// ErrNotApprovalCustomEvent means the CUSTOM event name is not tool or delegation approval.
-var ErrNotApprovalCustomEvent = errors.New("types: custom event is not a recognized approval kind")
-
 type ApprovalStatus string
 
 const (
@@ -45,7 +40,7 @@ const (
 
 // ApprovalRequest is one pending approval callback. Name + Value match CUSTOM semantics;
 // Value is a [ToolApprovalRequestValue] or [SubAgentDelegationApprovalRequestValue].
-// Set Respond before invoking the handler (see [PrepareApprovalFromCustomEvent]).
+// Set Respond before invoking the handler.
 type ApprovalRequest struct {
 	Name    ApprovalRequestName `json:"name,omitempty"`
 	Value   any                 `json:"value,omitempty"`
@@ -70,76 +65,6 @@ type SubAgentDelegationApprovalRequestValue struct {
 	ApprovalToken string         `json:"approvalToken,omitempty"`
 }
 
-// ToolApprovalFromEventValue copies the CUSTOM approval payload into an SDK approval value.
-func ToolApprovalFromEventValue(ev events.AgentCustomEventApprovalValue) ToolApprovalRequestValue {
-	return ToolApprovalRequestValue{
-		AgentName:       ev.AgentName,
-		ToolCallID:      ev.ToolCallID,
-		ToolName:        ev.ToolName,
-		ToolDisplayName: ev.ToolDisplayName,
-		Args:            cloneArgsMap(ev.Args),
-		ApprovalToken:   ev.ApprovalToken,
-	}
-}
-
-// DelegationApprovalFromEventValue copies the CUSTOM delegation payload into an SDK approval value.
-func DelegationApprovalFromEventValue(ev events.AgentCustomEventDelegationValue) SubAgentDelegationApprovalRequestValue {
-	return SubAgentDelegationApprovalRequestValue{
-		AgentName:     ev.AgentName,
-		SubAgentName:  ev.SubAgentName,
-		Args:          cloneArgsMap(ev.Args),
-		ApprovalToken: ev.ApprovalToken,
-	}
-}
-
-func cloneArgsMap(m map[string]any) map[string]any {
-	if m == nil {
-		return nil
-	}
-	out := make(map[string]any, len(m))
-	for k, v := range m {
-		out[k] = v
-	}
-	return out
-}
-
-// PrepareApprovalFromCustomEvent parses a CUSTOM event and returns Name + Value as SDK types and the approval token for Temporal CompleteActivity.
-// Respond is nil; the caller must set it before calling [ApprovalHandler].
-// Returns [ErrNotApprovalCustomEvent] when ev.Name is not tool or delegation approval.
-func PrepareApprovalFromCustomEvent(ev *events.AgentCustomEvent) (req *ApprovalRequest, approvalToken string, err error) {
-	if ev == nil {
-		return nil, "", fmt.Errorf("types: nil custom event")
-	}
-	switch events.AgentCustomEventName(ev.Name) {
-	case events.AgentCustomEventNameToolApproval:
-		raw, err := events.ParseCustomEventApproval(ev)
-		if err != nil {
-			return nil, "", err
-		}
-		v := ToolApprovalFromEventValue(raw)
-		return &ApprovalRequest{
-				Name:  ApprovalRequestNameTool,
-				Value: v,
-			},
-			v.ApprovalToken,
-			nil
-	case events.AgentCustomEventNameSubAgentDelegation:
-		raw, err := events.ParseCustomEventDelegation(ev)
-		if err != nil {
-			return nil, "", err
-		}
-		v := DelegationApprovalFromEventValue(raw)
-		return &ApprovalRequest{
-				Name:  ApprovalRequestNameSubAgent,
-				Value: v,
-			},
-			v.ApprovalToken,
-			nil
-	default:
-		return nil, "", ErrNotApprovalCustomEvent
-	}
-}
-
 func parseApprovalPayload[V any](v any) (out V, err error) {
 	if v == nil {
 		return out, fmt.Errorf("types: nil approval value")
diff --git a/internal/types/telemetry.go b/internal/types/telemetry.go
new file mode 100644
index 0000000..69c4cc1
--- /dev/null
+++ b/internal/types/telemetry.go
@@ -0,0 +1,96 @@
+package types
+
+import "time"
+
+// AgentTelemetry is the unified container for operational insights across
+// a single agent run, run lifecycle, tool calls, and storage operations.
+type AgentTelemetry struct {
+	// Run captures the orchestration lifecycle metrics for the run.
+	Run RunTelemetry `json:"run"`
+
+	// Tools tracks tool invocation counts and breakdowns for the run.
+	Tools ToolTelemetry `json:"tools"`
+
+	// Storage tracks data storage and retrieval operations for the run.
+	Storage StorageTelemetry `json:"storage"`
+}
+
+type FinishReason string
+
+const (
+	// FinishReasonComplete indicates that the agent run completed normally.
+	FinishReasonComplete FinishReason = "complete"
+	// FinishReasonMaxIterations indicates that the agent run completed because the maximum number of iterations was reached.
+	FinishReasonMaxIterations FinishReason = "max_iterations"
+)
+
+// RunTelemetry captures the orchestration lifecycle metrics for a single agent run.
+type RunTelemetry struct {
+	// StartedAt tracks the start time of the agent run.
+	StartedAt time.Time `json:"started_at"`
+
+	// CompletedAt tracks the completion time of the agent run.
+	CompletedAt time.Time `json:"completed_at"`
+
+	// TotalLLMCalls counts how many LLM calls were made during the run.
+	TotalLLMCalls int64 `json:"total_llm_calls"`
+
+	// FinishReason explains how the run concluded. See FinishReason for possible values.
+	FinishReason FinishReason `json:"finish_reason"`
+}
+
+// ToolTelemetry tracks tool invocation counts and per-tool breakdowns across a single agent run.
+type ToolTelemetry struct {
+	// TotalCalls is the total number of tool invocations made by the agent.
+	TotalCalls int64 `json:"total_calls"`
+
+	// FailedCalls is the number of tool invocations that returned an error.
+	// Excludes approval-denied and unauthorized cases.
+	FailedCalls int64 `json:"failed_calls"`
+
+	// Breakdown tracks invocation counts per tool name.
+	// Key: tool name (e.g. "palo_alto_fw_lookup"), Value: invocation count
+	Breakdown map[string]int64 `json:"breakdown,omitempty"`
+
+	// FailedBreakdown tracks failed invocation counts per tool name.
+	// Excludes approval-denied and unauthorized cases.
+	// Key: tool name (e.g. "palo_alto_fw_lookup"), Value: failed invocation count
+	FailedBreakdown map[string]int64 `json:"failed_breakdown,omitempty"`
+}
+
+// Record increments tool invocation counters for name. When failed is true, failed counters are updated too.
+// Breakdown keys are omitted when name is empty. Caller must serialize concurrent Record calls (e.g. mutex in parallel tool execution).
+func (t *ToolTelemetry) Record(name string, failed bool) {
+	if t == nil {
+		return
+	}
+	t.TotalCalls++
+	if t.Breakdown == nil {
+		t.Breakdown = make(map[string]int64)
+	}
+	if name != "" {
+		t.Breakdown[name]++
+	}
+	if !failed {
+		return
+	}
+	t.FailedCalls++
+	if t.FailedBreakdown == nil {
+		t.FailedBreakdown = make(map[string]int64)
+	}
+	if name != "" {
+		t.FailedBreakdown[name]++
+	}
+}
+
+// StorageTelemetry tracks RAG retrieval operations across prefetch, agentic, and hybrid modes.
+// All fields are zero when no retriever is configured.
+type StorageTelemetry struct {
+	// Retriever — RAG searches (prefetch/agentic/hybrid), zero if not configured.
+	TotalRetrieverSearches  int64 `json:"total_retriever_searches"`
+	FailedRetrieverSearches int64 `json:"failed_retriever_searches"`
+
+	// Breakdown by mode — zero if mode not used.
+	PrefetchSearches int64 `json:"prefetch_searches,omitempty"`
+	AgenticSearches  int64 `json:"agentic_searches,omitempty"`
+}
diff --git a/internal/types/telemetry_test.go b/internal/types/telemetry_test.go
new file mode 100644
index 0000000..3eadf68
--- /dev/null
+++ b/internal/types/telemetry_test.go
@@ -0,0 +1,61 @@
+package types
+
+import "testing"
+
+func TestToolTelemetry_Record_nilReceiver(t *testing.T) {
+	var tels *ToolTelemetry
+	tels.Record("calc", false)
+}
+
+func TestToolTelemetry_Record_success(t *testing.T) {
+	var tools ToolTelemetry
+	tools.Record("calculator", false)
+	if tools.TotalCalls != 1 || tools.FailedCalls != 0 {
+		t.Fatalf("calls: total=%d failed=%d", tools.TotalCalls, tools.FailedCalls)
+	}
+	if tools.Breakdown["calculator"] != 1 {
+		t.Fatalf("breakdown: %#v", tools.Breakdown)
+	}
+	if len(tools.FailedBreakdown) != 0 {
+		t.Fatalf("unexpected failed breakdown: %#v", tools.FailedBreakdown)
+	}
+}
+
+func TestToolTelemetry_Record_failed(t *testing.T) {
+	var tools ToolTelemetry
+	tools.Record("weather", true)
+	if tools.TotalCalls != 1 || tools.FailedCalls != 1 {
+		t.Fatalf("calls: total=%d failed=%d", tools.TotalCalls, tools.FailedCalls)
+	}
+	if tools.Breakdown["weather"] != 1 || tools.FailedBreakdown["weather"] != 1 {
+		t.Fatalf("breakdown=%#v failed=%#v", tools.Breakdown, tools.FailedBreakdown)
+	}
+}
+
+func TestToolTelemetry_Record_multiple(t *testing.T) {
+	var tools ToolTelemetry
+	tools.Record("calculator", false)
+	tools.Record("calculator", true)
+	tools.Record("mcp_srv_search", false)
+
+	if tools.TotalCalls != 3 || tools.FailedCalls != 1 {
+		t.Fatalf("calls: total=%d failed=%d", tools.TotalCalls, tools.FailedCalls)
+	}
+	if tools.Breakdown["calculator"] != 2 || tools.Breakdown["mcp_srv_search"] != 1 {
+		t.Fatalf("breakdown: %#v", tools.Breakdown)
+	}
+	if tools.FailedBreakdown["calculator"] != 1 {
+		t.Fatalf("failed breakdown: %#v", tools.FailedBreakdown)
+	}
+}
+
+func TestToolTelemetry_Record_emptyName(t *testing.T) {
+	var tools ToolTelemetry
+	tools.Record("", true)
+	if tools.TotalCalls != 1 || tools.FailedCalls != 1 {
+		t.Fatalf("calls: total=%d failed=%d", tools.TotalCalls, tools.FailedCalls)
+	}
+	if len(tools.Breakdown) != 0 || len(tools.FailedBreakdown) != 0 {
+		t.Fatalf("expected no breakdown keys for empty name, got breakdown=%#v failed=%#v", tools.Breakdown, tools.FailedBreakdown)
+	}
+}
diff --git a/internal/types/tool.go b/internal/types/tool.go
index f1e15bb..42d5ee8 100644
--- a/internal/types/tool.go
+++ b/internal/types/tool.go
@@ -14,3 +14,42 @@ type JSONSchema map[string]any
 func (s JSONSchema) MarshalJSON() ([]byte, error) {
 	return json.Marshal(map[string]any(s))
 }
+
+// ToolKind classifies SDK-built tool wrappers. User-registered tools default to [ToolKindNative].
+type ToolKind string
+
+const (
+	ToolKindNative    ToolKind = "native"
+	ToolKindMCP       ToolKind = "mcp"
+	ToolKindA2A       ToolKind = "a2a"
+	ToolKindSubAgent  ToolKind = "sub_agent"
+	ToolKindRetriever ToolKind = "retriever"
+)
+
+// ToolKindProvider is implemented by SDK tool wrappers (MCP, A2A, sub-agent, retriever).
+type ToolKindProvider interface {
+	ToolKind() ToolKind
+}
+
+// KindOf returns the tool kind when t implements [ToolKindProvider], otherwise [ToolKindNative].
+func KindOf(t any) ToolKind {
+	if t == nil {
+		return ToolKindNative
+	}
+	if k, ok := t.(ToolKindProvider); ok {
+		if kind := k.ToolKind(); kind != "" {
+			return kind
+		}
+	}
+	return ToolKindNative
+}
+
+// CountsTowardToolTelemetry reports whether invocations of this kind belong in [ToolTelemetry].
+func (k ToolKind) CountsTowardToolTelemetry() bool {
+	switch k {
+	case ToolKindSubAgent, ToolKindA2A, ToolKindRetriever:
+		return false
+	default:
+		return true
+	}
+}
diff --git a/internal/types/tool_test.go b/internal/types/tool_test.go
new file mode 100644
index 0000000..43f3f9e
--- /dev/null
+++ b/internal/types/tool_test.go
@@ -0,0 +1,35 @@
+package types
+
+import "testing"
+
+type stubKindTool struct{ kind ToolKind }
+
+func (s stubKindTool) ToolKind() ToolKind { return s.kind }
+
+type stubNativeTool struct{}
+
+func TestKindOf(t *testing.T) {
+	if KindOf(nil) != ToolKindNative {
+		t.Fatalf("nil = %q", KindOf(nil))
+	}
+	if KindOf(stubNativeTool{}) != ToolKindNative {
+		t.Fatal("native tool without provider")
+	}
+	if KindOf(stubKindTool{kind: ToolKindMCP}) != ToolKindMCP {
+		t.Fatal("mcp kind")
+	}
+	if KindOf(stubKindTool{kind: ""}) != ToolKindNative {
+		t.Fatal("empty kind falls back to native")
+	}
+}
+
+func TestToolKind_CountsTowardToolTelemetry(t *testing.T) {
+	if !ToolKindNative.CountsTowardToolTelemetry() || !ToolKindMCP.CountsTowardToolTelemetry() {
+		t.Fatal("native and mcp count toward tool telemetry")
+	}
+	for _, k := range []ToolKind{ToolKindSubAgent, ToolKindA2A, ToolKindRetriever} {
+		if k.CountsTowardToolTelemetry() {
+			t.Fatalf("%q should not count toward tool telemetry", k)
+		}
+	}
+}
diff --git a/pkg/agent/a2a.go b/pkg/agent/a2a.go
index 99f12dc..d12da38 100644
--- a/pkg/agent/a2a.go
+++ b/pkg/agent/a2a.go
@@ -23,7 +23,7 @@ var (
 const defaultA2AToolTimeout = types.DefaultA2ATimeout
 
 var _ interfaces.Tool = (*A2ATool)(nil)
-var _ interfaces.ToolKindProvider = (*A2ATool)(nil)
+var _ types.ToolKindProvider = (*A2ATool)(nil)
 
 // NOTE: A2ATools for the same server share one A2AClient. The default pkg/a2a/client is safe
 // for concurrent use; custom A2AClient implementations should document concurrency behaviour.
@@ -76,8 +76,8 @@ func NewA2ATool(serverName string, spec interfaces.ToolSpec, skillSpec interface
 	return &A2ATool{ServerName: serverName, Spec: spec, SkillSpec: skillSpec, Client: client}
 }
 
-// ToolKind implements [interfaces.ToolKindProvider].
-func (t *A2ATool) ToolKind() string { return "a2a" }
+// ToolKind implements [types.ToolKindProvider].
+func (t *A2ATool) ToolKind() types.ToolKind { return types.ToolKindA2A }
 
 // Name implements [interfaces.Tool].
 func (t *A2ATool) Name() string {
diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go
index b1c5d15..a71aaa6 100644
--- a/pkg/agent/agent.go
+++ b/pkg/agent/agent.go
@@ -41,6 +41,23 @@ type AgentRunResult = types.AgentRunResult
 // on failure; otherwise Result is non-nil.
 type AgentRunAsyncResult = types.AgentRunAsyncResult
 
+// AgentTelemetry is the unified container for operational insights across
+// a single agent run, covering run lifecycle, tool calls, and storage operations.
+// Token usage is reported separately on AgentRunResult.LLMUsage.
+type AgentTelemetry = types.AgentTelemetry
+
+// LLMUsage is the token usage for a single LLM call.
+type LLMUsage = types.LLMUsage
+
+// RunTelemetry captures the orchestration lifecycle metrics for a single agent run.
+type RunTelemetry = types.RunTelemetry
+
+// ToolTelemetry tracks tool invocation counts and per-tool breakdowns across a single agent run.
+type ToolTelemetry = types.ToolTelemetry
+
+// StorageTelemetry tracks RAG retrieval operations (prefetch, agentic, and hybrid searches).
+type StorageTelemetry = types.StorageTelemetry
+
 // buildAgent builds an Agent from options. Validates approval handler when tools require approval.
 func buildAgent(opts []Option) (*Agent, error) {
 	cfg, err := buildAgentConfig(opts)
diff --git a/pkg/agent/agent_test.go b/pkg/agent/agent_test.go
index 6593a28..69bd15c 100644
--- a/pkg/agent/agent_test.go
+++ b/pkg/agent/agent_test.go
@@ -111,9 +111,9 @@ func TestAgent_Stream_SetsStreamingEnabled(t *testing.T) {
 	if !ok || fin == nil {
 		t.Fatalf("event not *AgentRunFinishedEvent: %+v", ev)
 	}
-	result, ok := fin.Result.(*types.AgentRunResult)
-	if !ok || result == nil {
-		t.Fatalf("Result not *AgentRunResult: %+v", fin.Result)
+	result := fin.Result
+	if result == nil {
+		t.Fatalf("Result is nil")
 	}
 	if result.Content != "done" {
 		t.Fatalf("result.Content = %q", result.Content)
diff --git a/pkg/agent/config.go b/pkg/agent/config.go
index f25dd7a..3df6850 100644
--- a/pkg/agent/config.go
+++ b/pkg/agent/config.go
@@ -1140,11 +1140,11 @@ func validateToolNames(tools []interfaces.Tool) error {
 			return fmt.Errorf("tool must not be nil")
 		}
 		name := tool.Name()
-		kind := interfaces.KindOf(tool)
+		kind := types.KindOf(tool)
 		if prev, ok := seen[name]; ok {
 			return fmt.Errorf("duplicate tool name %q: %s tool conflicts with an existing %s tool", name, kind, prev)
 		}
-		seen[name] = kind
+		seen[name] = string(kind)
 	}
 	return nil
 }
diff --git a/pkg/agent/mcp.go b/pkg/agent/mcp.go
index 8db4daa..7b6d76e 100644
--- a/pkg/agent/mcp.go
+++ b/pkg/agent/mcp.go
@@ -19,7 +19,7 @@ var (
 )
 
 var _ interfaces.Tool = (*MCPTool)(nil)
-var _ interfaces.ToolKindProvider = (*MCPTool)(nil)
+var _ types.ToolKindProvider = (*MCPTool)(nil)
 
 // NOTE: MCPTools for the same server share one MCPClient. The default pkg/mcp/client serializes
 // RPCs on that client with a mutex; custom MCPClient implementations should document concurrency behavior.
@@ -63,8 +63,8 @@ func NewMCPTool(serverName string, spec interfaces.ToolSpec, client interfaces.M
 	}
 }
 
-// ToolKind implements [interfaces.ToolKindProvider].
-func (t *MCPTool) ToolKind() string { return "mcp" }
+// ToolKind implements [types.ToolKindProvider].
+func (t *MCPTool) ToolKind() types.ToolKind { return types.ToolKindMCP }
 
 // Name implements interfaces.Tool.
 func (t *MCPTool) Name() string {
diff --git a/pkg/agent/retriever.go b/pkg/agent/retriever.go
index 057642c..cbaf906 100644
--- a/pkg/agent/retriever.go
+++ b/pkg/agent/retriever.go
@@ -20,7 +20,7 @@ var (
 )
 
 var _ interfaces.Tool = (*RetrieverTool)(nil)
-var _ interfaces.ToolKindProvider = (*RetrieverTool)(nil)
+var _ types.ToolKindProvider = (*RetrieverTool)(nil)
 
 // RetrieverTool implements [interfaces.Tool] for [RetrieverModeAgentic] and [RetrieverModeHybrid].
 type RetrieverTool struct {
@@ -61,8 +61,8 @@ func NewRetrieverTool(retriever interfaces.Retriever) interfaces.Tool {
 	return &RetrieverTool{RetrieverName: rn, Retriever: retriever}
 }
 
-// ToolKind implements [interfaces.ToolKindProvider].
-func (t *RetrieverTool) ToolKind() string { return "retriever" }
+// ToolKind implements [types.ToolKindProvider].
+func (t *RetrieverTool) ToolKind() types.ToolKind { return types.ToolKindRetriever }
 
 // Name implements [interfaces.Tool].
 func (t *RetrieverTool) Name() string {
diff --git a/pkg/agent/subagent.go b/pkg/agent/subagent.go
index 6be6237..3e79eb2 100644
--- a/pkg/agent/subagent.go
+++ b/pkg/agent/subagent.go
@@ -8,13 +8,14 @@ import (
 	"strings"
 
 	"github.com/agenticenv/agent-sdk-go/internal/runtime"
+	"github.com/agenticenv/agent-sdk-go/internal/types"
 	"github.com/agenticenv/agent-sdk-go/pkg/interfaces"
 	"github.com/agenticenv/agent-sdk-go/pkg/tools"
 )
 
 var _ AgentTool = (*subAgentTool)(nil)
 var _ interfaces.Tool = (*subAgentTool)(nil)
-var _ interfaces.ToolKindProvider = (*subAgentTool)(nil)
+var _ types.ToolKindProvider = (*subAgentTool)(nil)
 
 // Sub-agent tool names must be identifier-like for LLM tool APIs; normalize display names accordingly.
 var subAgentToolNameNonIdent = regexp.MustCompile(`[^a-zA-Z0-9]+`)
@@ -119,5 +120,5 @@ func (t *subAgentTool) Execute(_ context.Context, _ map[string]any) (any, error)
 
 func (t *subAgentTool) SubAgent() *Agent { return t.agent }
 
-// ToolKind implements [interfaces.ToolKindProvider].
-func (t *subAgentTool) ToolKind() string { return "sub-agent" }
+// ToolKind implements [types.ToolKindProvider].
+func (t *subAgentTool) ToolKind() types.ToolKind { return types.ToolKindSubAgent }
diff --git a/pkg/interfaces/mocks/mock_tool.go b/pkg/interfaces/mocks/mock_tool.go
index 068c279..f519ff4 100644
--- a/pkg/interfaces/mocks/mock_tool.go
+++ b/pkg/interfaces/mocks/mock_tool.go
@@ -1,5 +1,5 @@
 // Code generated by MockGen. DO NOT EDIT.
-// Source: github.com/agenticenv/agent-sdk-go/pkg/interfaces (interfaces: Tool,ToolApproval,ToolAuthorizer,ToolKindProvider)
+// Source: github.com/agenticenv/agent-sdk-go/pkg/interfaces (interfaces: Tool,ToolApproval,ToolAuthorizer)
 
 // Package mocks is a generated GoMock package.
 package mocks
@@ -181,40 +181,3 @@ func (mr *MockToolAuthorizerMockRecorder) Authorize(arg0, arg1 interface{}) *gom
 	mr.mock.ctrl.T.Helper()
 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Authorize", reflect.TypeOf((*MockToolAuthorizer)(nil).Authorize), arg0, arg1)
 }
-
-// MockToolKindProvider is a mock of ToolKindProvider interface.
-type MockToolKindProvider struct {
-	ctrl     *gomock.Controller
-	recorder *MockToolKindProviderMockRecorder
-}
-
-// MockToolKindProviderMockRecorder is the mock recorder for MockToolKindProvider.
-type MockToolKindProviderMockRecorder struct {
-	mock *MockToolKindProvider
-}
-
-// NewMockToolKindProvider creates a new mock instance.
-func NewMockToolKindProvider(ctrl *gomock.Controller) *MockToolKindProvider {
-	mock := &MockToolKindProvider{ctrl: ctrl}
-	mock.recorder = &MockToolKindProviderMockRecorder{mock}
-	return mock
-}
-
-// EXPECT returns an object that allows the caller to indicate expected use.
-func (m *MockToolKindProvider) EXPECT() *MockToolKindProviderMockRecorder {
-	return m.recorder
-}
-
-// ToolKind mocks base method.
-func (m *MockToolKindProvider) ToolKind() string {
-	m.ctrl.T.Helper()
-	ret := m.ctrl.Call(m, "ToolKind")
-	ret0, _ := ret[0].(string)
-	return ret0
-}
-
-// ToolKind indicates an expected call of ToolKind.
-func (mr *MockToolKindProviderMockRecorder) ToolKind() *gomock.Call {
-	mr.mock.ctrl.T.Helper()
-	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ToolKind", reflect.TypeOf((*MockToolKindProvider)(nil).ToolKind))
-}
diff --git a/pkg/interfaces/tool.go b/pkg/interfaces/tool.go
index b8aa00d..011ffb9 100644
--- a/pkg/interfaces/tool.go
+++ b/pkg/interfaces/tool.go
@@ -6,7 +6,7 @@ import (
 	"github.com/agenticenv/agent-sdk-go/internal/types"
 )
 
-//go:generate mockgen -destination=./mocks/mock_tool.go -package=mocks github.com/agenticenv/agent-sdk-go/pkg/interfaces Tool,ToolApproval,ToolAuthorizer,ToolKindProvider
+//go:generate mockgen -destination=./mocks/mock_tool.go -package=mocks github.com/agenticenv/agent-sdk-go/pkg/interfaces Tool,ToolApproval,ToolAuthorizer
 
 // ToolApproval is an optional interface for tools that require interactive human approval before execution.
 // When implemented, the agent honors ApprovalRequired() when no agent-level approval policy is set.
@@ -73,18 +73,3 @@ func ToolsToSpecs(tools []Tool) []ToolSpec {
 	}
 	return specs
 }
-
-// ToolKindProvider is an optional interface for tools that report their origin.
-type ToolKindProvider interface {
-	ToolKind() string
-}
-
-// KindOf returns ToolKind() from t when implemented, or "native".
-func KindOf(t Tool) string {
-	if k, ok := t.(ToolKindProvider); ok {
-		if s := k.ToolKind(); s != "" {
-			return s
-		}
-	}
-	return "native"
-}
diff --git a/pkg/interfaces/tool_test.go b/pkg/interfaces/tool_test.go
deleted file mode 100644
index 73eda79..0000000
--- a/pkg/interfaces/tool_test.go
+++ /dev/null
@@ -1,40 +0,0 @@
-package interfaces
-
-import (
-	"context"
-	"testing"
-)
-
-type stubKindTool struct{ kind string }
-
-func (s stubKindTool) ToolKind() string                                       { return s.kind }
-func (stubKindTool) Name() string                                             { return "x" }
-func (stubKindTool) DisplayName() string                                      { return "x" }
-func (stubKindTool) Description() string                                      { return "" }
-func (stubKindTool) Parameters() JSONSchema                                   { return JSONSchema{"type": "object"} }
-func (stubKindTool) Execute(_ context.Context, _ map[string]any) (any, error) { return nil, nil }
-
-type stubNativeTool struct{}
-
-func (stubNativeTool) Name() string           { return "n" }
-func (stubNativeTool) DisplayName() string    { return "n" }
-func (stubNativeTool) Description() string    { return "" }
-func (stubNativeTool) Parameters() JSONSchema { return JSONSchema{"type": "object"} }
-func (stubNativeTool) Execute(_ context.Context, _ map[string]any) (any, error) {
-	return nil, nil
-}
-
-func TestKindOf(t *testing.T) {
-	if KindOf(nil) != "native" {
-		t.Fatalf("nil = %q", KindOf(nil))
-	}
-	if KindOf(stubNativeTool{}) != "native" {
-		t.Fatal("native tool without provider")
-	}
-	if KindOf(stubKindTool{kind: "mcp"}) != "mcp" {
-		t.Fatal("mcp kind")
-	}
-	if KindOf(stubKindTool{kind: ""}) != "native" {
-		t.Fatal("empty kind falls back to native")
-	}
-}