diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f0a3f10..a9b32c7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,6 +46,38 @@ jobs: - name: Build run: make build + eval-harness: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: PromptFoo + working-directory: eval-harness/promptfoo + run: npx --yes promptfoo@latest eval -c config.yaml + + - name: DeepEval + working-directory: eval-harness/deepeval + run: | + python3 -m venv .venv + .venv/bin/pip install -r requirements.txt + .venv/bin/pytest test_agent.py -v + # examples:local — off until if: true and EXAMPLES_* repo secrets (examples/.env.defaults). examples: if: false diff --git a/.gitignore b/.gitignore index aef69b8..f69f2b3 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,9 @@ pnpm-debug.log* .env*.local .DS_Store + +# Python / DeepEval (eval-harness/deepeval) +eval-harness/deepeval/.venv/ +eval-harness/deepeval/.pytest_cache/ +eval-harness/deepeval/.deepeval/ +eval-harness/deepeval/__pycache__/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b7cb3b8..32fdeb4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -62,7 +62,7 @@ Keep your branch short and descriptive. Sync with `main` before opening a PR: `g make check ``` -Runs `fmt-check`, spell check, `make lint`, `make test`, `make build`, and `make secrets-scan` — same core gates as CI (coverage is CI-only; use `make test-coverage` locally if you want a report). +Runs `fmt-check`, spell check, `make lint`, `make test`, `make build`, and `make secrets-scan` — same core gates as the main CI job (coverage is CI-only; use `make test-coverage` locally if you want a report). `make test` includes eval-harness Go tests; the full Promptfoo/DeepEval suite runs in CI and via `make eval-harness` (see below). Also run the full example suite on any code change to catch regressions unit tests may miss: @@ -72,6 +72,14 @@ task examples:all Requires Task, Docker, and LLM credentials — see [examples/README.md](examples/README.md). +If you change **agent behavior** (e.g. `pkg/agent`, telemetry, tools, runtime) or **`eval-harness/`**, run: + +```bash +make eval-harness +``` + +Behavioral regression tests use mock LLM/tools and assert on run output — SDK changes can break them even when eval-harness files are untouched. Requires Node.js and Python 3.10+ — see [eval-harness/README.md](eval-harness/README.md). CI runs this automatically on PRs (`eval-harness` job). + **CI runs automatically** on pull requests to `main` (open a PR or push updates to an existing PR to re-run checks). Pushes or merges to `main` do not trigger CI; use **workflow_dispatch** in GitHub Actions for an on-demand run. Run `make check` locally before opening a PR; CI must pass on the PR before merge. To run only tests (e.g. while iterating): @@ -172,6 +180,7 @@ Using the SDK and ran into issues, unclear docs, or confusing behavior? **Raise 2. **Tests** - Add tests for new features and bug fixes. - Unit tests go in `*_test.go` files alongside the code. + - Agent behavior changes (`pkg/agent`, telemetry, tools, runtime) or **`eval-harness/`** edits — run `make eval-harness` before submitting a PR. 3. **Commits** - Use [conventional commits](https://www.conventionalcommits.org) — these drive the release changelog: diff --git a/Makefile b/Makefile index 460b18e..ec2b529 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: build install test lint tidy clean fmt fmt-check spell secrets-scan check +.PHONY: build install test lint tidy clean fmt fmt-check spell secrets-scan check eval-harness BIN_DIR := cmd/bin BINARY := $(BIN_DIR)/agentctl @@ -26,13 +26,24 @@ install: build cp $(BINARY) $(GOPATH_BIN)/agentctl @echo "Installed to $(GOPATH_BIN)/agentctl" -# Run tests under pkg +# Run Go tests (pkg, internal, eval-harness runner) test: @echo "==> Running tests..." go test ./pkg/... -count=1 go test ./internal/... -count=1 + go test ./eval-harness/... -count=1 @echo "==> Tests complete" +# Promptfoo + DeepEval (same as CI eval-harness job). Requires Node.js and Python 3.10+. +eval-harness: + @echo "==> Running eval-harness (Promptfoo + DeepEval)..." + cd eval-harness/promptfoo && npx --yes promptfoo@latest eval -c config.yaml + cd eval-harness/deepeval && \ + (test -d .venv || python3 -m venv .venv) && \ + .venv/bin/pip install -q -r requirements.txt && \ + .venv/bin/pytest test_agent.py -v + @echo "==> Eval-harness complete" + # Run before push: lint, test, build, and secrets scan (same core gates as CI; no auto-format). # Coverage is CI-only (`make test-coverage` when you want the report). If fmt-check fails, run `make fmt`. check: lint test build secrets-scan diff --git a/README.md b/README.md index a2f4063..4b5e219 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ - [Agent and worker in separate processes](#agent-and-worker-in-separate-processes) - [Conversation](#conversation-message-history) - [AG-UI Protocol](#ag-ui-protocol) +- [Telemetry](#telemetry) - [Observability](#observability) - [Wire OTLP](#wire-otlp-traces--metrics--logs-in-one-block) - [Bring your own tracer / metrics](#bring-your-own-tracer--metrics) @@ -54,6 +55,7 @@ - [Code Coverage](#code-coverage) - [Setup and run examples](#setup-and-run-examples) - [Benchmarks](#benchmarks) +- [Eval Harness](#eval-harness) - [Production Readiness Checklist](#production-readiness-checklist) - [Disclaimer](#disclaimer) @@ -328,8 +330,8 @@ Streaming text deltas (`TEXT_MESSAGE_*`) versus the `**RUN_FINISHED**` body ofte Each LLM completion can report token counts via `[interfaces.LLMUsage](pkg/interfaces/llm.go)` on `[interfaces.LLMResponse.Usage](pkg/interfaces/llm.go)`. OpenAI, Anthropic, and Gemini clients populate `**PromptTokens**`, `**CompletionTokens**`, `**TotalTokens**`, and optional `**CachedPromptTokens**` / `**ReasoningTokens**` when the provider returns them. -- `**Agent.Run` / `RunAsync`:** `**Usage`** on [*AgentRunResult](pkg/agent/agent.go) is the **sum** across all LLM calls in that run (including tool rounds). Use it for cost estimates, quotas, and logging. -- `**Stream`:** the same aggregate appears as `**Usage*`* on `**RUN_FINISHED**`: assert `**[*AgentRunFinishedEvent](pkg/agent/agent.go)**`, then `**Result**` as `**[*AgentRunResult](pkg/agent/agent.go)**`. OpenAI streaming `**include_usage**` surfaces totals there. Helpers: [examples/shared/utils.go](examples/shared/utils.go) (`UsageFooter`, `RunResultFromFinishedEvent`). +- `**Agent.Run` / `RunAsync`:** `**LLMUsage**` on [*AgentRunResult](pkg/agent/agent.go) is the **sum** across all LLM calls in that run (including tool rounds). Use it for cost estimates, quotas, and logging. +- `**Stream`:** the same aggregate is on `**LLMUsage**` in `**RUN_FINISHED**` `**Result**` (`**[*AgentRunFinishedEvent](pkg/agent/agent.go)**`; `**Result**` is `**[*AgentRunResult](pkg/agent/agent.go)**`). OpenAI streaming `**include_usage**` surfaces totals there. Helpers: [examples/shared/utils.go](examples/shared/utils.go) (`LLMUsageFooter`, `RunResultFromFinishedEvent`). Examples: [examples/simple_agent](examples/simple_agent) (prints usage after `Run`), [examples/agent_with_stream](examples/agent_with_stream) (prints usage on `**RUN_FINISHED**`). @@ -348,7 +350,6 @@ Custom tools may also implement: - `interfaces.ToolApproval` — tool-level hint for **interactive human approval**. Use this when a person should decide whether the tool runs, and no agent-level approval policy is set. - `interfaces.ToolAuthorizer` — tool-level **programmatic authorization**. Use this when code should decide whether the tool runs before approval/execute (for example: scopes, tenancy, environment flags, or feature access). Return `Allow=false` to deny the tool call without executing it. -- `interfaces.ToolKindProvider` — optional interface that reports the tool's origin category. The built-in tool wrappers already implement it (`"mcp"`, `"a2a"`, `"sub-agent"`, `"retriever"`). Implement it on custom tools when you want to distinguish origin in logs or metrics. Use `interfaces.KindOf(tool)` to read the kind from any tool; returns `"native"` when the interface is not implemented. ```go reg := agent.NewToolRegistry() @@ -1116,6 +1117,41 @@ for ev := range ch { --- +## Telemetry + +Every run populates `AgentTelemetry` inside `AgentRunResult` with behavioral metrics across three areas: + +> Telemetry fields are designed to support eval harness assertions — see [eval-harness/](eval-harness/) for examples with PromptFoo and DeepEval. + +- **Run** — start/end time, total LLM calls, and finish reason (`complete` or `max_iterations`) +- **Tools** — total calls, failed calls, and per-tool breakdown for registered tools and MCP tools +- **Storage** — RAG retriever search counts split by mode (`prefetch_searches`, `agentic_searches`) and failure count; all fields are zero when no retriever is configured + +```go +result, _ := ag.Run(ctx, "prompt") +t := result.Telemetry +fmt.Printf("llm_calls=%d finish=%s\n", t.Run.TotalLLMCalls, t.Run.FinishReason) +fmt.Printf("tool_calls=%d failed=%d\n", t.Tools.TotalCalls, t.Tools.FailedCalls) +fmt.Printf("retriever_searches=%d prefetch=%d agentic=%d\n", + t.Storage.TotalRetrieverSearches, + t.Storage.PrefetchSearches, + t.Storage.AgenticSearches) +``` + +**Stream** — telemetry is on `Result.Telemetry` inside the `RUN_FINISHED` event: + +```go +for ev := range ch { + if result := shared.RunResultFromFinishedEvent(ev); result != nil { + fmt.Println(result.Telemetry.Run.TotalLLMCalls) + } +} +``` + +Examples can print a formatted telemetry footer — see [examples/README.md](examples/README.md#run-output). + +--- + ## Observability The SDK emits **traces**, **metrics**, and **logs** via OpenTelemetry. All signals are **no-op by default** — if you set nothing, the agent runs without any overhead. Wire them only when you need them. @@ -1248,7 +1284,7 @@ A Temporal connection (`WithTemporalConfig` or `WithTemporalClient`) is **option - **WithMaxSubAgentDepth**: Maximum delegation hops from this agent (default 2). See [Sub-agents](#sub-agents). - **WithMaxIterations**: Max LLM rounds (default 5). - **WithStream**: Enable `Stream` partial content streaming. -- **Token usage:** Not a separate option. On `**Run`**, read `**Usage**` on `**[*AgentRunResult](pkg/agent/agent.go)**` when set. On `**Stream**`, assert `**[*AgentRunFinishedEvent](pkg/agent/agent.go)**` with `**[*AgentRunResult](pkg/agent/agent.go)**` in `**Result**` (aggregate across LLM/tool rounds when the provider reports it). See [Token usage](#token-usage-llmusage). +- **Token usage:** Not a separate option. On `**Run`**, read `**LLMUsage**` on `**[*AgentRunResult](pkg/agent/agent.go)**` when set. On `**Stream**`, assert `**[*AgentRunFinishedEvent](pkg/agent/agent.go)**` and read `**Result.LLMUsage**` (aggregate across LLM/tool rounds when the provider reports it). See [Token usage](#token-usage-llmusage). - **WithLLMSampling**: Pass `&agent.LLMSampling{...}`; nil or zero fields leave that knob to the provider default. Which fields apply where: - `**Temperature`** — OpenAI, Anthropic, Gemini. - `**MaxTokens**` — OpenAI, Anthropic, Gemini (max output / completion tokens). @@ -1272,7 +1308,7 @@ A Temporal connection (`WithTemporalConfig` or `WithTemporalClient`) is **option Contributors: see **[CONTRIBUTING.md](CONTRIBUTING.md)** for prerequisites (Go, Temporal setup, workflow, and guidelines). Project policies: **[SECURITY.md](SECURITY.md)** for vulnerability reporting and **[CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md)** for community standards. -Quick commands: `make test` | `make lint` | `make fmt` | `make spell` | `make tidy` | `make test-coverage` (`make lint` runs `gofmt -s`, `misspell`, then `go vet` + `golangci-lint`) +Quick commands: `make test` | `make check` | `make eval-harness` | `make lint` | `make fmt` | `make spell` | `make tidy` | `make test-coverage` (`make lint` runs `gofmt -s`, `misspell`, then `go vet` + `golangci-lint`) ## Code Coverage @@ -1319,6 +1355,12 @@ Config-driven benchmark suite to measure agent performance in your environment. See [benchmarks/README.md](benchmarks/README.md). +## Eval Harness + +Behavioral regression suite for agent runs — verify tools, completion, and telemetry without a live LLM. Use it to catch breaking changes in CI and as a reference for wiring your own agents into eval tools. + +See [eval-harness/README.md](eval-harness/README.md). + ## Production Readiness Checklist - **Run and approval limits** — Use `WithTimeout` and/or a context deadline on `Run` / `Stream`; use `WithApprovalTimeout` when tools require approval (activity retry counts inside workflows are fixed in the SDK, not user-tunable). diff --git a/cmd/main.go b/cmd/main.go index d635812..507456b 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -234,8 +234,7 @@ func runResultFromFinishedEvent(ev agent.AgentEvent) *agent.AgentRunResult { if !ok || fin == nil { return nil } - res, _ := fin.Result.(*agent.AgentRunResult) - return res + return fin.Result } func printEvent(ev agent.AgentEvent, streamedContent bool) { diff --git a/eval-harness/README.md b/eval-harness/README.md new file mode 100644 index 0000000..0cc9450 --- /dev/null +++ b/eval-harness/README.md @@ -0,0 +1,161 @@ +# Eval harness + +Runs a single agent execution with mock LLM and mock tools. Prints JSON to stdout with `content`, `llm_usage`, and `telemetry` for evaluation assertions. + +## Runner + +From the repo root: + +```bash +go run ./eval-harness/runner +go run ./eval-harness/runner -prompt "custom prompt" +go run ./eval-harness/runner -runtime temporal +go run ./eval-harness/runner -tools 2 +go run ./eval-harness/runner -config eval-harness/runner/config.yaml +``` + +### Arguments + +| Flag | Default | Description | +|------|---------|-------------| +| `-config` | `eval-harness/runner/config.yaml` | Path to config file | +| `-prompt` | from config | Override `user_prompt` | +| `-runtime` | from config | Override `runtime` (`local` or `temporal`) | +| `-tools` | from config | Override `agent.tool_count` | + +### config.yaml + +Default path: `eval-harness/runner/config.yaml` + +| Field | Default | Description | +|-------|---------|-------------| +| `runtime` | `local` | `local` or `temporal` | +| `user_prompt` | — | User message (required) | +| `agent.name` | `eval-agent` | Agent name | +| `agent.system_prompt` | built-in eval prompt | System instructions | +| `agent.tool_count` | `3` | Number of mock tools | +| `temporal.host` | `localhost` | Temporal host when `runtime: temporal` | +| `temporal.port` | `7233` | Temporal port | +| `temporal.namespace` | `default` | Temporal namespace | +| `temporal.task_queue` | `eval-harness` | Task queue | + +Temporal mode uses an embedded local worker. Start Temporal before running (e.g. `task infra:temporal:up` from `examples/`). + +### Output + +Stdout is always JSON: + +```json +{ + "content": "eval complete", + "llm_usage": { "prompt_tokens": 600, "completion_tokens": 400, "total_tokens": 1000 }, + "telemetry": { "run": { ... }, "tools": { ... }, "storage": { ... } } +} +``` + +## PromptFoo + +Config: `eval-harness/promptfoo/config.yaml` + +PromptFoo runs the eval harness as an [exec provider](https://www.promptfoo.dev/docs/providers/custom-script/). Each test invokes the runner once, parses the JSON stdout, and asserts on `content`, `llm_usage`, and `telemetry`. + +### Run + +```bash +cd eval-harness/promptfoo +npx promptfoo eval -c config.yaml +``` + +View results in the web UI: + +```bash +npx promptfoo view +``` + +Requires Node.js. PromptFoo is installed on demand via `npx`; no local install is required. + +### How it works + +| Piece | Role | +|-------|------| +| **Provider** | `exec:../run_agent.sh` — shared wrapper in `eval-harness/` | +| **Prompt** | `"run eval check"` — passed as the first arg to `run_agent.sh` (overrides `user_prompt`) | +| **Output** | Runner JSON on stdout; assertions use `JSON.parse(output)` | +| **Paths** | `eval-harness/run_agent.sh` resolves repo root and runner config | + +The runner accepts PromptFoo’s prompt as a positional argument when `-prompt` is not set. Agent settings (`tool_count`, `runtime`, etc.) still come from `eval-harness/runner/config.yaml`. + +### Tests + +Four test cases in `config.yaml`, each with a JavaScript assertion on runner JSON: + +| Test | Checks | +|------|--------| +| all mock tools were called | `telemetry.tools.breakdown` — `eval_tool_1`, `eval_tool_2`, `eval_tool_3`, each called once | +| agent completed successfully | `telemetry.run.finish_reason === "complete"` and `content === "eval complete"` | +| no failed tool calls | `telemetry.tools.failed_calls === 0` | +| llm usage reported | `llm_usage.total_tokens > 0` | + +### Customizing + +- **Change the prompt** — edit `prompts` in `promptfoo/config.yaml`, or add `vars` and use `{{var}}` in the prompt string. +- **Change agent behavior** — edit `eval-harness/runner/config.yaml` (tool count, runtime, system prompt), or adjust `eval-harness/run_agent.sh`. +- **Add tests** — append cases under `tests:` with `type: javascript` and `value:` returning a boolean. +- **Filter providers** — use `label: eval-agent` in test `options.providers` if you add more providers later. + +## DeepEval + +Python tests in `eval-harness/deepeval/`. The suite runs the Go eval harness, parses the JSON stdout, and asserts on `content`, `llm_usage`, and `telemetry` — the same output contract as the runner and PromptFoo. + +### Run + +```bash +cd eval-harness/deepeval +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +pytest test_agent.py -v +``` + +Requires Python 3.10+ and Go. No API key is required for the default tests. + +### How it works + +1. `harness.run_agent()` calls `eval-harness/run_agent.sh` and parses JSON. +2. Tests read telemetry from the agent SDK run output. +3. `assert_test()` runs DeepEval metrics where useful; plain pytest asserts cover the rest. + +| Source field | Used for | +|--------------|----------| +| `content` | Agent response text | +| `llm_usage.total_tokens` | Token usage reported | +| `telemetry.run.finish_reason` | Run completed (`"complete"`) | +| `telemetry.tools.failed_calls` | No tool failures | +| `telemetry.tools.total_calls` | Expected call count | +| `telemetry.tools.breakdown` | Per-tool call counts; fed into `tools_called` for `ToolCorrectnessMetric` | + +Example — extract tools from telemetry: + +```python +agent_res = run_agent() +tools = list(agent_res["telemetry"]["tools"]["breakdown"].keys()) +finish_reason = agent_res["telemetry"]["run"]["finish_reason"] +``` + +### Tests + +Two pytest tests in `test_agent.py`: + +| Test | Checks | +|------|--------| +| `test_agent_completes_with_telemetry` | `content`, `llm_usage`, `finish_reason`, `failed_calls`, `total_calls`, `breakdown` keys | +| `test_agent_tool_correctness` | `ToolCorrectnessMetric` — `tools_called` from telemetry vs expected tools | + +### Customizing + +- **Change the prompt** — pass a different string to `run_agent(prompt=...)`. +- **Change agent behavior** — edit `eval-harness/runner/config.yaml` or `eval-harness/run_agent.sh`. +- **Add tests** — extend `test_agent.py` with more telemetry asserts or DeepEval `LLMTestCase` fields. + +> **Note:** CI runs both PromptFoo and DeepEval on PRs — see `.github/workflows/ci.yml` (`eval-harness` job). Locally: `make eval-harness` from the repo root. + diff --git a/eval-harness/deepeval/harness.py b/eval-harness/deepeval/harness.py new file mode 100644 index 0000000..a5a16b2 --- /dev/null +++ b/eval-harness/deepeval/harness.py @@ -0,0 +1,41 @@ +"""Helpers for running the Go eval harness from DeepEval tests.""" + +from __future__ import annotations + +import json +import subprocess +from pathlib import Path + +from deepeval.models import DeepEvalBaseLLM + +REPO_ROOT = Path(__file__).resolve().parents[2] +DEFAULT_PROMPT = "run eval check" + + +class StubJudge(DeepEvalBaseLLM): + """Placeholder model for metrics that only use deterministic scoring.""" + + def load_model(self): + return self + + def generate(self, *args, **kwargs) -> str: + raise RuntimeError("stub judge should not be invoked for deterministic metrics") + + async def a_generate(self, *args, **kwargs) -> str: + raise RuntimeError("stub judge should not be invoked for deterministic metrics") + + def get_model_name(self, *args, **kwargs) -> str: + return "stub-judge" + + +def run_agent(prompt: str = DEFAULT_PROMPT) -> dict: + """Execute the eval harness runner and return parsed JSON output.""" + script = REPO_ROOT / "eval-harness" / "run_agent.sh" + raw = subprocess.check_output([str(script), prompt], cwd=REPO_ROOT, text=True) + return json.loads(raw) + + +def tools_called(agent_res: dict) -> list[str]: + """Return tool names from telemetry breakdown.""" + breakdown = agent_res["telemetry"]["tools"]["breakdown"] + return list(breakdown.keys()) diff --git a/eval-harness/deepeval/requirements.txt b/eval-harness/deepeval/requirements.txt new file mode 100644 index 0000000..f84d9a6 --- /dev/null +++ b/eval-harness/deepeval/requirements.txt @@ -0,0 +1,2 @@ +deepeval>=2.0.0 +pytest>=8.0.0 diff --git a/eval-harness/deepeval/test_agent.py b/eval-harness/deepeval/test_agent.py new file mode 100644 index 0000000..a89ee35 --- /dev/null +++ b/eval-harness/deepeval/test_agent.py @@ -0,0 +1,56 @@ +"""DeepEval pytest suite for the Go eval harness.""" + +from deepeval import assert_test +from deepeval.metrics import ToolCorrectnessMetric +from deepeval.test_case import LLMTestCase, ToolCall + +from harness import DEFAULT_PROMPT, StubJudge, run_agent, tools_called + +EXPECTED_TOOLS = [ + ToolCall(name="eval_tool_1"), + ToolCall(name="eval_tool_2"), + ToolCall(name="eval_tool_3"), +] + + +def test_agent_completes_with_telemetry(): + """Assert on agent SDK run output: content, llm_usage, and telemetry.""" + agent_res = run_agent() + + assert agent_res["content"] == "eval complete" + assert agent_res["llm_usage"]["total_tokens"] > 0 + + run_telemetry = agent_res["telemetry"]["run"] + tools_telemetry = agent_res["telemetry"]["tools"] + + assert run_telemetry["finish_reason"] == "complete" + assert tools_telemetry["failed_calls"] == 0 + assert tools_telemetry["total_calls"] == 3 + assert set(tools_called(agent_res)) == { + "eval_tool_1", + "eval_tool_2", + "eval_tool_3", + } + + +def test_agent_tool_correctness(): + """ToolCorrectnessMetric using tools_called from telemetry.breakdown.""" + agent_res = run_agent() + called = [ToolCall(name=name) for name in tools_called(agent_res)] + + test_case = LLMTestCase( + input=DEFAULT_PROMPT, + actual_output=agent_res["content"], + tools_called=called, + expected_tools=EXPECTED_TOOLS, + ) + + metric = ToolCorrectnessMetric( + model=StubJudge(), + threshold=1.0, + strict_mode=True, + should_exact_match=True, + include_reason=True, + async_mode=False, + ) + assert_test(test_case, [metric]) diff --git a/eval-harness/promptfoo/config.yaml b/eval-harness/promptfoo/config.yaml new file mode 100644 index 0000000..106c260 --- /dev/null +++ b/eval-harness/promptfoo/config.yaml @@ -0,0 +1,46 @@ +# Run from this directory: +# npx promptfoo eval -c config.yaml + +description: eval-harness agent run assertions + +prompts: + - "run eval check" + +providers: + - id: exec:../run_agent.sh + label: eval-agent + +tests: + - description: all mock tools were called + assert: + - type: javascript + value: | + const res = JSON.parse(output); + const breakdown = res.telemetry.tools.breakdown; + const tools = Object.keys(breakdown); + return tools.length === 3 + && breakdown.eval_tool_1 === 1 + && breakdown.eval_tool_2 === 1 + && breakdown.eval_tool_3 === 1; + + - description: agent completed successfully + assert: + - type: javascript + value: | + const res = JSON.parse(output); + return res.telemetry.run.finish_reason === "complete" + && res.content === "eval complete"; + + - description: no failed tool calls + assert: + - type: javascript + value: | + const res = JSON.parse(output); + return res.telemetry.tools.failed_calls === 0; + + - description: llm usage reported + assert: + - type: javascript + value: | + const res = JSON.parse(output); + return res.llm_usage.total_tokens > 0; diff --git a/eval-harness/run_agent.sh b/eval-harness/run_agent.sh new file mode 100755 index 0000000..522838b --- /dev/null +++ b/eval-harness/run_agent.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Eval harness wrapper: runs the Go runner and prints JSON to stdout. +# Args: $1 prompt (optional). Promptfoo also passes $2/$3 (JSON); ignored. + +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +CONFIG="${ROOT}/eval-harness/runner/config.yaml" + +cd "$ROOT" +if [[ -n "${1:-}" ]]; then + exec go run ./eval-harness/runner -config "$CONFIG" -prompt "$1" +else + exec go run ./eval-harness/runner -config "$CONFIG" +fi diff --git a/eval-harness/runner/config.yaml b/eval-harness/runner/config.yaml new file mode 100644 index 0000000..0c8b260 --- /dev/null +++ b/eval-harness/runner/config.yaml @@ -0,0 +1,14 @@ +runtime: local # local or temporal + +user_prompt: "run eval check" + +agent: + name: eval-agent + system_prompt: "You are an evaluation agent. Use available tools when helpful, then answer concisely." + tool_count: 3 + +temporal: + host: localhost + port: 7233 + namespace: default + task_queue: eval-harness diff --git a/eval-harness/runner/main.go b/eval-harness/runner/main.go new file mode 100644 index 0000000..1b8ad64 --- /dev/null +++ b/eval-harness/runner/main.go @@ -0,0 +1,49 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "log" + "os" + + "github.com/agenticenv/agent-sdk-go/eval-harness/runner/setup" +) + +func main() { + configPath := flag.String("config", "", "path to config.yaml (default: runner/config.yaml)") + prompt := flag.String("prompt", "", "override user_prompt from config") + runtimeFlag := flag.String("runtime", "", "override runtime: local or temporal") + toolCount := flag.Int("tools", 0, "override agent.tool_count (0 = use config)") + flag.Parse() + + fileCfg, err := setup.LoadConfig(*configPath) + if err != nil { + log.Fatalf("load config: %v", err) + } + + runCfg := fileCfg.Config() + if *prompt != "" { + runCfg.UserPrompt = *prompt + } else if args := flag.Args(); len(args) > 0 && args[0] != "" { + // Promptfoo exec provider passes the rendered prompt as the first positional arg. + runCfg.UserPrompt = args[0] + } + if *runtimeFlag != "" { + runCfg.Runtime = setup.Runtime(*runtimeFlag) + } + if *toolCount > 0 { + runCfg.ToolCount = *toolCount + } + + result, err := Run(context.Background(), runCfg) + if err != nil { + log.Fatalf("eval run failed: %v", err) + } + + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + if err := enc.Encode(OutputFromResult(result)); err != nil { + log.Fatalf("encode result: %v", err) + } +} diff --git a/eval-harness/runner/output.go b/eval-harness/runner/output.go new file mode 100644 index 0000000..fa94cf8 --- /dev/null +++ b/eval-harness/runner/output.go @@ -0,0 +1,22 @@ +package main + +import "github.com/agenticenv/agent-sdk-go/pkg/agent" + +// Output is a JSON-friendly view of an agent run for eval harness tools. +type Output struct { + Content string `json:"content"` + LLMUsage *agent.LLMUsage `json:"llm_usage,omitempty"` + Telemetry *agent.AgentTelemetry `json:"telemetry,omitempty"` +} + +// OutputFromResult maps an AgentRunResult into Output for assertions or CLI JSON output. +func OutputFromResult(result *agent.AgentRunResult) *Output { + if result == nil { + return nil + } + return &Output{ + Content: result.Content, + LLMUsage: result.LLMUsage, + Telemetry: result.Telemetry, + } +} diff --git a/eval-harness/runner/runner.go b/eval-harness/runner/runner.go new file mode 100644 index 0000000..40f0d74 --- /dev/null +++ b/eval-harness/runner/runner.go @@ -0,0 +1,29 @@ +package main + +import ( + "context" + "fmt" + + "github.com/agenticenv/agent-sdk-go/eval-harness/runner/setup" + "github.com/agenticenv/agent-sdk-go/pkg/agent" +) + +// Run executes one agent run with mock LLM and mock tools, then closes the agent. +func Run(ctx context.Context, cfg setup.Config) (*agent.AgentRunResult, error) { + cfg.ApplyDefaults() + if err := cfg.Validate(); err != nil { + return nil, err + } + + a, err := setup.BuildAgent(cfg) + if err != nil { + return nil, err + } + defer a.Close() + + result, err := a.Run(ctx, cfg.UserPrompt, nil) + if err != nil { + return nil, fmt.Errorf("agent run: %w", err) + } + return result, nil +} diff --git a/eval-harness/runner/runner_test.go b/eval-harness/runner/runner_test.go new file mode 100644 index 0000000..19fb5f5 --- /dev/null +++ b/eval-harness/runner/runner_test.go @@ -0,0 +1,69 @@ +package main + +import ( + "context" + "os" + "testing" + + "github.com/agenticenv/agent-sdk-go/eval-harness/runner/setup" + "github.com/stretchr/testify/require" +) + +func TestLoadConfig_Defaults(t *testing.T) { + cfg, err := setup.LoadConfig("config.yaml") + require.NoError(t, err) + require.Equal(t, "run eval check", cfg.UserPrompt) + require.Equal(t, "local", cfg.Runtime) + require.Equal(t, 3, cfg.Agent.ToolCount) +} + +func TestRun_FromFileConfig(t *testing.T) { + fileCfg, err := setup.LoadConfig("config.yaml") + require.NoError(t, err) + + result, err := Run(context.Background(), fileCfg.Config()) + require.NoError(t, err) + require.NotEmpty(t, result.Content) + require.Equal(t, int64(3), result.Telemetry.Tools.TotalCalls) +} + +func TestRun_LocalRuntime(t *testing.T) { + result, err := Run(context.Background(), setup.Config{ + UserPrompt: "run eval check", + Runtime: setup.RuntimeLocal, + ToolCount: 2, + }) + require.NoError(t, err) + require.NotNil(t, result) + require.NotEmpty(t, result.Content) + require.NotNil(t, result.Telemetry) + require.Equal(t, int64(2), result.Telemetry.Tools.TotalCalls) +} + +func TestRun_TemporalRuntime(t *testing.T) { + if os.Getenv("EVAL_HARNESS_TEMPORAL") != "true" { + t.Skip("set EVAL_HARNESS_TEMPORAL=true with Temporal running on localhost:7233") + } + + result, err := Run(context.Background(), setup.Config{ + UserPrompt: "run eval check", + Runtime: setup.RuntimeTemporal, + ToolCount: 2, + }) + require.NoError(t, err) + require.NotEmpty(t, result.Content) + require.Equal(t, int64(2), result.Telemetry.Tools.TotalCalls) +} + +func TestRun_RequiresUserPrompt(t *testing.T) { + _, err := Run(context.Background(), setup.Config{}) + require.Error(t, err) +} + +func TestRun_InvalidRuntime(t *testing.T) { + _, err := Run(context.Background(), setup.Config{ + UserPrompt: "hello", + Runtime: "invalid", + }) + require.Error(t, err) +} diff --git a/eval-harness/runner/setup/agent.go b/eval-harness/runner/setup/agent.go new file mode 100644 index 0000000..863fb64 --- /dev/null +++ b/eval-harness/runner/setup/agent.go @@ -0,0 +1,53 @@ +package setup + +import ( + "fmt" + "math/rand" + "time" + + "github.com/agenticenv/agent-sdk-go/pkg/agent" +) + +const evalRNGSeed int64 = 42 + +// BuildAgent constructs an agent from cfg using mock LLM and tools when not overridden. +func BuildAgent(cfg Config) (*agent.Agent, error) { + rng := rand.New(rand.NewSource(evalRNGSeed)) + + llmClient := cfg.LLMClient + if llmClient == nil { + llmClient = NewMockLLMClient(cfg.LLM, rng) + } + + toolRegistry := cfg.ToolRegistry + if toolRegistry == nil { + toolRegistry = RegisterMockTools(cfg.ToolCount, cfg.Tool, rng) + } + + opts := []agent.Option{ + agent.WithName(cfg.AgentName), + agent.WithDescription("Eval harness agent for single-run testing."), + agent.WithSystemPrompt(cfg.SystemPrompt), + agent.WithLLMClient(llmClient), + agent.WithToolRegistry(toolRegistry), + agent.WithToolApprovalPolicy(agent.AutoToolApprovalPolicy()), + agent.WithLogger(cfg.Logger), + } + if cfg.UseTemporal() { + opts = append(opts, agent.WithTemporalConfig(&agent.TemporalConfig{ + Host: cfg.Temporal.Host, + Port: cfg.Temporal.Port, + Namespace: cfg.Temporal.Namespace, + TaskQueue: cfg.Temporal.TaskQueue, + })) + } + + a, err := agent.NewAgent(opts...) + if err != nil { + return nil, fmt.Errorf("new agent: %w", err) + } + if cfg.UseTemporal() { + time.Sleep(300 * time.Millisecond) + } + return a, nil +} diff --git a/eval-harness/runner/setup/config.go b/eval-harness/runner/setup/config.go new file mode 100644 index 0000000..de4f53f --- /dev/null +++ b/eval-harness/runner/setup/config.go @@ -0,0 +1,115 @@ +package setup + +import ( + "fmt" + "strings" + + "github.com/agenticenv/agent-sdk-go/pkg/agent" + "github.com/agenticenv/agent-sdk-go/pkg/interfaces" + "github.com/agenticenv/agent-sdk-go/pkg/logger" +) + +const ( + DefaultAgentName = "eval-agent" + DefaultToolCount = 3 + DefaultMockTokens = 500 + DefaultSystemPrompt = "You are an evaluation agent. Use available tools when helpful, then answer concisely." + DefaultRuntime = RuntimeLocal +) + +// Runtime selects the agent execution backend. +type Runtime string + +const ( + RuntimeLocal Runtime = "local" + RuntimeTemporal Runtime = "temporal" +) + +// LLMConfig configures the built-in mock LLM (internal defaults, not in YAML). +type LLMConfig struct { + MockTokens int +} + +// ToolConfig configures mock tools (internal defaults, not in YAML). +type ToolConfig struct{} + +// TemporalConfig configures Temporal when Runtime is temporal. +type TemporalConfig struct { + Host string `mapstructure:"host"` + Port int `mapstructure:"port"` + Namespace string `mapstructure:"namespace"` + TaskQueue string `mapstructure:"task_queue"` +} + +// Config holds settings for a single eval agent run. +type Config struct { + UserPrompt string + Runtime Runtime + Temporal TemporalConfig + AgentName string + SystemPrompt string + LLM LLMConfig + Tool ToolConfig + ToolCount int + LLMClient interfaces.LLMClient + ToolRegistry agent.ToolRegistry + Logger logger.Logger +} + +// UseTemporal reports whether cfg selects the Temporal runtime. +func (c *Config) UseTemporal() bool { + return c != nil && strings.EqualFold(strings.TrimSpace(string(c.Runtime)), string(RuntimeTemporal)) +} + +// ApplyDefaults fills unset config fields. +func (c *Config) ApplyDefaults() { + if c == nil { + return + } + if strings.TrimSpace(string(c.Runtime)) == "" { + c.Runtime = DefaultRuntime + } + if c.AgentName == "" { + c.AgentName = DefaultAgentName + } + if c.SystemPrompt == "" { + c.SystemPrompt = DefaultSystemPrompt + } + if c.ToolCount <= 0 { + c.ToolCount = DefaultToolCount + } + if c.LLM.MockTokens <= 0 { + c.LLM.MockTokens = DefaultMockTokens + } + if c.Logger == nil { + c.Logger = logger.NoopLogger() + } + if c.Temporal.TaskQueue == "" { + c.Temporal.TaskQueue = "eval-harness" + } + if c.Temporal.Port == 0 { + c.Temporal.Port = 7233 + } + if c.Temporal.Host == "" { + c.Temporal.Host = "localhost" + } + if c.Temporal.Namespace == "" { + c.Temporal.Namespace = "default" + } +} + +// Validate checks required config fields. +func (c *Config) Validate() error { + if c == nil { + return fmt.Errorf("config is required") + } + if c.UserPrompt == "" { + return fmt.Errorf("user prompt is required") + } + switch strings.ToLower(strings.TrimSpace(string(c.Runtime))) { + case string(RuntimeLocal), string(RuntimeTemporal): + default: + return fmt.Errorf("runtime must be %q or %q", RuntimeLocal, RuntimeTemporal) + } + return nil +} diff --git a/eval-harness/runner/setup/load.go b/eval-harness/runner/setup/load.go new file mode 100644 index 0000000..4ad0f76 --- /dev/null +++ b/eval-harness/runner/setup/load.go @@ -0,0 +1,116 @@ +package setup + +import ( + "fmt" + "os" + "strings" + + "github.com/spf13/viper" +) + +// FileConfig is the YAML configuration for eval-harness runs. +type FileConfig struct { + Runtime string `mapstructure:"runtime"` + UserPrompt string `mapstructure:"user_prompt"` + Agent FileAgentConfig `mapstructure:"agent"` + Temporal TemporalConfig `mapstructure:"temporal"` +} + +// FileAgentConfig holds agent fields from YAML. +type FileAgentConfig struct { + Name string `mapstructure:"name"` + SystemPrompt string `mapstructure:"system_prompt"` + ToolCount int `mapstructure:"tool_count"` +} + +// Config returns a runner Config from the file config. +func (f *FileConfig) Config() Config { + if f == nil { + return Config{} + } + return Config{ + UserPrompt: f.UserPrompt, + Runtime: Runtime(f.Runtime), + Temporal: f.Temporal, + AgentName: f.Agent.Name, + SystemPrompt: f.Agent.SystemPrompt, + ToolCount: f.Agent.ToolCount, + } +} + +// LoadConfig reads and validates eval-harness config from a YAML file. +func LoadConfig(path string) (*FileConfig, error) { + if path == "" { + path = defaultConfigPath() + } + v := viper.New() + v.SetConfigFile(path) + v.SetConfigType("yaml") + if err := v.ReadInConfig(); err != nil { + return nil, fmt.Errorf("read config %q: %w", path, err) + } + var cfg FileConfig + if err := v.Unmarshal(&cfg); err != nil { + return nil, fmt.Errorf("parse config: %w", err) + } + if err := cfg.validate(); err != nil { + return nil, err + } + return &cfg, nil +} + +// DefaultConfigPath returns the default eval-harness config file path. +func DefaultConfigPath() string { return defaultConfigPath() } + +func defaultConfigPath() string { + for _, candidate := range []string{ + "eval-harness/runner/config.yaml", + "runner/config.yaml", + "config.yaml", + } { + if _, err := os.Stat(candidate); err == nil { + return candidate + } + } + return "config.yaml" +} + +func (f *FileConfig) validate() error { + if f == nil { + return fmt.Errorf("config is required") + } + if strings.TrimSpace(f.UserPrompt) == "" { + return fmt.Errorf("user_prompt is required") + } + switch strings.ToLower(strings.TrimSpace(f.Runtime)) { + case "", string(RuntimeLocal): + if f.Runtime == "" { + f.Runtime = string(RuntimeLocal) + } + case string(RuntimeTemporal): + default: + return fmt.Errorf("runtime must be %q or %q", RuntimeLocal, RuntimeTemporal) + } + if f.Agent.ToolCount <= 0 { + f.Agent.ToolCount = DefaultToolCount + } + if f.Agent.Name == "" { + f.Agent.Name = DefaultAgentName + } + if f.Agent.SystemPrompt == "" { + f.Agent.SystemPrompt = DefaultSystemPrompt + } + if f.Temporal.TaskQueue == "" { + f.Temporal.TaskQueue = "eval-harness" + } + if f.Temporal.Port == 0 { + f.Temporal.Port = 7233 + } + if f.Temporal.Host == "" { + f.Temporal.Host = "localhost" + } + if f.Temporal.Namespace == "" { + f.Temporal.Namespace = "default" + } + return nil +} diff --git a/eval-harness/runner/setup/mock_llm.go b/eval-harness/runner/setup/mock_llm.go new file mode 100644 index 0000000..8ef9064 --- /dev/null +++ b/eval-harness/runner/setup/mock_llm.go @@ -0,0 +1,119 @@ +package setup + +import ( + "context" + "fmt" + "math/rand" + "time" + + "github.com/agenticenv/agent-sdk-go/pkg/interfaces" +) + +const mockLLMModel = "eval-mock" + +// MockLLMClient is a deterministic mock LLM for eval harness runs. +type MockLLMClient struct { + cfg LLMConfig + rng *rand.Rand +} + +// NewMockLLMClient builds a mock LLM client from cfg. +func NewMockLLMClient(cfg LLMConfig, rng *rand.Rand) *MockLLMClient { + if rng == nil { + rng = rand.New(rand.NewSource(time.Now().UnixNano())) + } + return &MockLLMClient{cfg: cfg, rng: rng} +} + +func (m *MockLLMClient) Generate(ctx context.Context, request *interfaces.LLMRequest) (*interfaces.LLMResponse, error) { + promptTokens, completionTokens := splitMockTokens(m.cfg.MockTokens) + usage := &interfaces.LLMUsage{ + PromptTokens: int64(promptTokens), + CompletionTokens: int64(completionTokens), + TotalTokens: int64(promptTokens + completionTokens), + } + + if hasToolResultMessages(request) { + return &interfaces.LLMResponse{ + Content: "eval complete", + Usage: usage, + }, nil + } + + toolCalls := make([]*interfaces.ToolCall, 0, len(request.Tools)) + for i, spec := range request.Tools { + toolCalls = append(toolCalls, &interfaces.ToolCall{ + ToolCallID: fmt.Sprintf("tc-%d", i+1), + ToolName: spec.Name, + Args: map[string]any{"input": "eval"}, + }) + } + + return &interfaces.LLMResponse{ + Content: "executing tools", + ToolCalls: toolCalls, + Usage: usage, + }, nil +} + +func (m *MockLLMClient) GenerateStream(ctx context.Context, request *interfaces.LLMRequest) (interfaces.LLMStream, error) { + resp, err := m.Generate(ctx, request) + if err != nil { + return nil, err + } + return &mockLLMStream{resp: resp}, nil +} + +func (m *MockLLMClient) GetModel() string { return mockLLMModel } + +func (m *MockLLMClient) GetProvider() interfaces.LLMProvider { + return interfaces.LLMProviderOpenAI +} + +func (m *MockLLMClient) IsStreamSupported() bool { return false } + +type mockLLMStream struct { + resp *interfaces.LLMResponse + done bool + err error +} + +func (s *mockLLMStream) Next() bool { + if s.done { + return false + } + s.done = true + return true +} + +func (s *mockLLMStream) Current() *interfaces.LLMStreamChunk { + if s.resp == nil { + return nil + } + return &interfaces.LLMStreamChunk{ContentDelta: s.resp.Content, ToolCalls: s.resp.ToolCalls} +} + +func (s *mockLLMStream) Err() error { return s.err } + +func (s *mockLLMStream) GetResult() *interfaces.LLMResponse { return s.resp } + +func hasToolResultMessages(request *interfaces.LLMRequest) bool { + if request == nil { + return false + } + for _, msg := range request.Messages { + if msg.Role == interfaces.MessageRoleTool { + return true + } + } + return false +} + +func splitMockTokens(total int) (prompt, completion int) { + if total <= 0 { + return 0, 0 + } + prompt = total * 3 / 5 + completion = total - prompt + return prompt, completion +} diff --git a/eval-harness/runner/setup/mock_tool.go b/eval-harness/runner/setup/mock_tool.go new file mode 100644 index 0000000..e63bbf8 --- /dev/null +++ b/eval-harness/runner/setup/mock_tool.go @@ -0,0 +1,61 @@ +package setup + +import ( + "context" + "fmt" + "math/rand" + + "github.com/agenticenv/agent-sdk-go/pkg/agent" + "github.com/agenticenv/agent-sdk-go/pkg/interfaces" + "github.com/agenticenv/agent-sdk-go/pkg/tools" +) + +const mockToolPrefix = "eval_tool_" + +// MockTool is a mock tool for eval harness runs. +type MockTool struct { + name string + cfg ToolConfig + rng *rand.Rand +} + +func newMockTool(index int, cfg ToolConfig, rng *rand.Rand) *MockTool { + return &MockTool{ + name: fmt.Sprintf("%s%d", mockToolPrefix, index), + cfg: cfg, + rng: rng, + } +} + +func (t *MockTool) Name() string { return t.name } + +func (t *MockTool) DisplayName() string { return t.name } + +func (t *MockTool) Description() string { + return "Eval harness mock tool." +} + +func (t *MockTool) Parameters() interfaces.JSONSchema { + return tools.Params(map[string]interfaces.JSONSchema{ + "input": tools.ParamString("Input payload for the eval tool."), + }, "input") +} + +func (t *MockTool) Execute(ctx context.Context, args map[string]any) (any, error) { + input, _ := args["input"].(string) + if input == "" { + input = "eval" + } + return map[string]any{"tool": t.name, "input": input, "status": "ok"}, nil +} + +// RegisterMockTools registers count mock tools on a new registry. +func RegisterMockTools(count int, cfg ToolConfig, rng *rand.Rand) agent.ToolRegistry { + reg := agent.NewToolRegistry() + for i := 1; i <= count; i++ { + if err := reg.Register(newMockTool(i, cfg, rng)); err != nil { + panic(err) + } + } + return reg +} diff --git a/examples/.env.defaults b/examples/.env.defaults index faf076b..a722111 100644 --- a/examples/.env.defaults +++ b/examples/.env.defaults @@ -7,6 +7,11 @@ # error | warn | info | debug LOG_LEVEL=error +# --- Example run output (examples/shared PrintRunFooters) --- +# Set to true to print token usage and/or run telemetry after each agent run. +SHOW_LLM_USAGE=false +SHOW_TELEMETRY=false + # --- Agent runtime --- # local = in-process (default). temporal = requires Temporal server (see temporal-setup.md). AGENT_RUNTIME=local diff --git a/examples/README.md b/examples/README.md index e8c654c..3476cb2 100644 --- a/examples/README.md +++ b/examples/README.md @@ -270,6 +270,23 @@ Examples send conversation (user prompt, assistant response) to **stdout** and i go run ./simple_agent "Hello" 2>/dev/null ``` +## Run output + +All examples call [`shared.PrintRunFooters`](shared/utils.go) after each run. Set these in `examples/.env` (defaults in [`.env.defaults`](.env.defaults)) to print formatted footers: + +| Env var | Default | When `true` | +|---------|---------|-------------| +| `SHOW_LLM_USAGE` | `false` | Prints token usage (`prompt_tokens`, `completion_tokens`, etc.) | +| `SHOW_TELEMETRY` | `false` | Prints run telemetry (`total_llm_calls`, tool counts, retriever searches, etc.) | + +```bash +SHOW_LLM_USAGE=true go run ./simple_agent "Hello, what can you do?" +SHOW_TELEMETRY=true go run ./simple_agent "Hello, what can you do?" +SHOW_LLM_USAGE=true SHOW_TELEMETRY=true go run ./agent_with_stream "What's 17 * 23?" +``` + +For retriever examples, `SHOW_TELEMETRY=true` also prints prefetch/agentic search breakdowns — see [agent_with_retriever/README.md](agent_with_retriever/README.md). + ## Env vars | Env var | Description | @@ -283,6 +300,8 @@ Examples send conversation (user prompt, assistant response) to **stdout** and i | `LLM_MODEL` | e.g. `gpt-4o`, `claude-3-5-sonnet-20241022` | | `LLM_BASEURL` | Optional (custom/proxy endpoints) | | `LOG_LEVEL` | `error` (default), `warn`, `info`, `debug` — logs go to stderr | +| `SHOW_LLM_USAGE` | Set to `true` to print token usage footer after each run (default: `false`) | +| `SHOW_TELEMETRY` | Set to `true` to print run telemetry footer after each run (default: `false`) | | `SERPER_API_KEY` | For search tool | | `MCP_TRANSPORT` | **Required** for MCP examples: `stdio` or `streamable_http` (aliases: `local`, `http`, `remote`, …) | | `MCP_SERVER_NAME` | Optional server id for wiring (defaults: `local` for stdio, `remote` for HTTP) | diff --git a/examples/agent_with_a2a_client/main.go b/examples/agent_with_a2a_client/main.go index 3a57ab8..20863f3 100644 --- a/examples/agent_with_a2a_client/main.go +++ b/examples/agent_with_a2a_client/main.go @@ -8,6 +8,7 @@ import ( "strings" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" a2aclient "github.com/agenticenv/agent-sdk-go/pkg/a2a/client" "github.com/agenticenv/agent-sdk-go/pkg/agent" ) @@ -79,4 +80,5 @@ func main() { return } fmt.Println("assistant:", result.Content) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_a2a_config/main.go b/examples/agent_with_a2a_config/main.go index 409093f..70a3ebe 100644 --- a/examples/agent_with_a2a_config/main.go +++ b/examples/agent_with_a2a_config/main.go @@ -8,6 +8,7 @@ import ( "strings" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" ) @@ -54,4 +55,5 @@ func main() { return } fmt.Println("assistant:", result.Content) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_conversation/main.go b/examples/agent_with_conversation/main.go index ff503f3..8623c09 100644 --- a/examples/agent_with_conversation/main.go +++ b/examples/agent_with_conversation/main.go @@ -9,6 +9,7 @@ import ( "strings" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" "github.com/agenticenv/agent-sdk-go/pkg/conversation/redis" "github.com/agenticenv/agent-sdk-go/pkg/tools/calculator" @@ -96,6 +97,7 @@ func runSingleTurn(ctx context.Context, a *agent.Agent, prompt, convID string) { return } fmt.Println("assistant:", result.Content) + shared.PrintRunFooters(result) } func runInteractive(ctx context.Context, a *agent.Agent, convID string) { @@ -124,5 +126,6 @@ func runInteractive(ctx context.Context, a *agent.Agent, convID string) { continue } fmt.Println("assistant:", result.Content) + shared.PrintRunFooters(result) } } diff --git a/examples/agent_with_json_response/main.go b/examples/agent_with_json_response/main.go index 6226026..8f946f9 100644 --- a/examples/agent_with_json_response/main.go +++ b/examples/agent_with_json_response/main.go @@ -9,6 +9,7 @@ import ( "strings" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" "github.com/agenticenv/agent-sdk-go/pkg/interfaces" ) @@ -81,4 +82,5 @@ func main() { return } fmt.Printf("assistant (JSON):\n%s\n", string(pretty)) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_mcp_client/main.go b/examples/agent_with_mcp_client/main.go index 8087c02..2f6ea3a 100644 --- a/examples/agent_with_mcp_client/main.go +++ b/examples/agent_with_mcp_client/main.go @@ -8,6 +8,7 @@ import ( "strings" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" mcpclient "github.com/agenticenv/agent-sdk-go/pkg/mcp/client" ) @@ -74,4 +75,5 @@ func main() { return } fmt.Println("assistant:", result.Content) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_mcp_config/main.go b/examples/agent_with_mcp_config/main.go index fa53816..910742a 100644 --- a/examples/agent_with_mcp_config/main.go +++ b/examples/agent_with_mcp_config/main.go @@ -8,6 +8,7 @@ import ( "strings" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" ) @@ -70,4 +71,5 @@ func main() { return } fmt.Println("assistant:", result.Content) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_observability/config/main.go b/examples/agent_with_observability/config/main.go index e88a6f2..60e7762 100644 --- a/examples/agent_with_observability/config/main.go +++ b/examples/agent_with_observability/config/main.go @@ -15,6 +15,7 @@ import ( excfg "github.com/agenticenv/agent-sdk-go/examples" "github.com/agenticenv/agent-sdk-go/examples/agent_with_observability/setup" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" ) @@ -50,4 +51,5 @@ func main() { return } fmt.Printf("assistant: %s\n", result.Content) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_observability/objects/main.go b/examples/agent_with_observability/objects/main.go index 70f0d64..1dd8f6a 100644 --- a/examples/agent_with_observability/objects/main.go +++ b/examples/agent_with_observability/objects/main.go @@ -18,6 +18,7 @@ import ( excfg "github.com/agenticenv/agent-sdk-go/examples" "github.com/agenticenv/agent-sdk-go/examples/agent_with_observability/setup" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" "github.com/agenticenv/agent-sdk-go/pkg/observability" ) @@ -92,4 +93,5 @@ func main() { return } fmt.Printf("assistant: %s\n", result.Content) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_reasoning/main.go b/examples/agent_with_reasoning/main.go index 1a8da25..cce8386 100644 --- a/examples/agent_with_reasoning/main.go +++ b/examples/agent_with_reasoning/main.go @@ -114,6 +114,7 @@ func printEvent(ev agent.AgentEvent, streamedSoFar bool) { if res != nil && res.Content != "" && !streamedSoFar { fmt.Printf("\n[complete] %s\n", res.Content) } + shared.PrintRunFooters(res) default: // Ignore tool events (none registered). } diff --git a/examples/agent_with_retriever/README.md b/examples/agent_with_retriever/README.md index 0eaaafa..dbf0878 100644 --- a/examples/agent_with_retriever/README.md +++ b/examples/agent_with_retriever/README.md @@ -69,6 +69,12 @@ go run ./agent_with_retriever/weaviate "What is the return policy?" RETRIEVER_MODE=prefetch go run ./agent_with_retriever/weaviate "What is the return policy?" ``` +Add `SHOW_TELEMETRY=true` to see retriever search counts (total, failed, prefetch/agentic breakdown) printed after the run: + +```bash +SHOW_TELEMETRY=true go run ./agent_with_retriever/weaviate "What is the return policy?" +``` + ### Weaviate troubleshooting | Symptom | What to do | @@ -122,6 +128,12 @@ go run ./agent_with_retriever/pgvector "What is the return policy?" RETRIEVER_MODE=prefetch go run ./agent_with_retriever/pgvector "What is the return policy?" ``` +Add `SHOW_TELEMETRY=true` to see retriever search counts (total, failed, prefetch/agentic breakdown) printed after the run: + +```bash +SHOW_TELEMETRY=true go run ./agent_with_retriever/pgvector "What is the return policy?" +``` + ### pgvector troubleshooting | Symptom | What to do | diff --git a/examples/agent_with_retriever/pgvector/main.go b/examples/agent_with_retriever/pgvector/main.go index 107dc9c..efdeae9 100644 --- a/examples/agent_with_retriever/pgvector/main.go +++ b/examples/agent_with_retriever/pgvector/main.go @@ -16,6 +16,7 @@ import ( examplecfg "github.com/agenticenv/agent-sdk-go/examples" "github.com/agenticenv/agent-sdk-go/examples/agent_with_retriever/common" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" pgretriever "github.com/agenticenv/agent-sdk-go/pkg/retriever/pgvector" ) @@ -87,4 +88,5 @@ func main() { return } fmt.Println("assistant:", result.Content) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_retriever/weaviate/main.go b/examples/agent_with_retriever/weaviate/main.go index fb89da3..c6dc3ba 100644 --- a/examples/agent_with_retriever/weaviate/main.go +++ b/examples/agent_with_retriever/weaviate/main.go @@ -16,6 +16,7 @@ import ( examplecfg "github.com/agenticenv/agent-sdk-go/examples" "github.com/agenticenv/agent-sdk-go/examples/agent_with_retriever/common" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" weaviate "github.com/agenticenv/agent-sdk-go/pkg/retriever/weaviate" ) @@ -77,4 +78,5 @@ func main() { return } fmt.Println("assistant:", result.Content) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_run_async/main.go b/examples/agent_with_run_async/main.go index df47395..efb840f 100644 --- a/examples/agent_with_run_async/main.go +++ b/examples/agent_with_run_async/main.go @@ -12,6 +12,7 @@ import ( "strings" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" "github.com/agenticenv/agent-sdk-go/pkg/tools/calculator" "github.com/agenticenv/agent-sdk-go/pkg/tools/echo" @@ -82,6 +83,7 @@ func main() { return } fmt.Println("agent:", res.Result.Content) + shared.PrintRunFooters(res.Result) } func makeApprovalHandler(lineCh <-chan string) agent.ApprovalHandler { diff --git a/examples/agent_with_stream/main.go b/examples/agent_with_stream/main.go index fa6a22f..ad3987f 100644 --- a/examples/agent_with_stream/main.go +++ b/examples/agent_with_stream/main.go @@ -141,10 +141,7 @@ func printEvent(ev agent.AgentEvent, streamedSoFar bool) { } else if res != nil && res.Content != "" { fmt.Printf("[%s]", eventType) } - if u := shared.UsageFooter(res); u != "" { - fmt.Println() - fmt.Println(u) - } + shared.PrintRunFooters(res) default: //fmt.Printf("[%s] %+v\n", ev.Type(), ev) return diff --git a/examples/agent_with_stream_conversation/main.go b/examples/agent_with_stream_conversation/main.go index 1ca885d..4a60fe9 100644 --- a/examples/agent_with_stream_conversation/main.go +++ b/examples/agent_with_stream_conversation/main.go @@ -172,13 +172,15 @@ func printEvent(ev agent.AgentEvent, streamedContent bool) { } case agent.AgentEventTypeRunFinished: res := shared.RunResultFromFinishedEvent(ev) - if res != nil && res.Content != "" && !streamedContent { - fmt.Printf("\n[%s] %s\n", eventType, res.Content) + if res == nil { + return } - if u := shared.UsageFooter(res); u != "" { - fmt.Println() - fmt.Println(u) + if res.Content != "" && !streamedContent { + fmt.Printf("\n[%s] %s\n", eventType, res.Content) + } else { + fmt.Printf("\n[%s]\n", eventType) } + shared.PrintRunFooters(res) default: //fmt.Printf("[%s] %+v\n", eventType, ev) return diff --git a/examples/agent_with_subagents/main.go b/examples/agent_with_subagents/main.go index 6d1b0e9..466cb47 100644 --- a/examples/agent_with_subagents/main.go +++ b/examples/agent_with_subagents/main.go @@ -182,6 +182,7 @@ func main() { who = "agent" } fmt.Printf("\n[%s] [%s complete] %s\n", eventType, who, res.Content) + shared.PrintRunFooters(res) } } } diff --git a/examples/agent_with_temporal_client/main.go b/examples/agent_with_temporal_client/main.go index b5c9fb5..1035889 100644 --- a/examples/agent_with_temporal_client/main.go +++ b/examples/agent_with_temporal_client/main.go @@ -13,6 +13,7 @@ import ( "strings" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/internal/runtime/temporal" "github.com/agenticenv/agent-sdk-go/pkg/agent" "go.temporal.io/sdk/client" @@ -65,4 +66,5 @@ func main() { return } fmt.Println("assistant:", result.Content) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_tools/approval/main.go b/examples/agent_with_tools/approval/main.go index 22f1e97..785afc2 100644 --- a/examples/agent_with_tools/approval/main.go +++ b/examples/agent_with_tools/approval/main.go @@ -10,6 +10,7 @@ import ( "strings" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" "github.com/agenticenv/agent-sdk-go/pkg/tools/calculator" "github.com/agenticenv/agent-sdk-go/pkg/tools/echo" @@ -69,6 +70,7 @@ func main() { return } fmt.Println("agent:", result.Content) + shared.PrintRunFooters(result) } func makeApprovalHandler(lineCh <-chan string) agent.ApprovalHandler { diff --git a/examples/agent_with_tools/authorizer/main.go b/examples/agent_with_tools/authorizer/main.go index 4cc27c8..c67908a 100644 --- a/examples/agent_with_tools/authorizer/main.go +++ b/examples/agent_with_tools/authorizer/main.go @@ -114,6 +114,7 @@ func printEvent(ev agent.AgentEvent, streamedContent bool) { } fmt.Printf("\n[%s complete] %s\n", who, res.Content) } + shared.PrintRunFooters(res) default: //fmt.Printf("[%s] %+v\n", ev.Type(), ev) return diff --git a/examples/agent_with_tools/basic/main.go b/examples/agent_with_tools/basic/main.go index 2e7958a..488377a 100644 --- a/examples/agent_with_tools/basic/main.go +++ b/examples/agent_with_tools/basic/main.go @@ -8,6 +8,7 @@ import ( "strings" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" "github.com/agenticenv/agent-sdk-go/pkg/tools/calculator" "github.com/agenticenv/agent-sdk-go/pkg/tools/currenttime" @@ -67,4 +68,5 @@ func main() { return } fmt.Println("agent:", result.Content) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_tools/custom/main.go b/examples/agent_with_tools/custom/main.go index 9a14dd1..03080f0 100644 --- a/examples/agent_with_tools/custom/main.go +++ b/examples/agent_with_tools/custom/main.go @@ -8,6 +8,7 @@ import ( "strings" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" ) @@ -50,4 +51,5 @@ func main() { return } fmt.Println("agent:", result.Content) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_tools/dynamic_registry/main.go b/examples/agent_with_tools/dynamic_registry/main.go index 3e36e63..edd0ad2 100644 --- a/examples/agent_with_tools/dynamic_registry/main.go +++ b/examples/agent_with_tools/dynamic_registry/main.go @@ -6,6 +6,7 @@ import ( "log" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" "github.com/agenticenv/agent-sdk-go/pkg/tools/calculator" "github.com/agenticenv/agent-sdk-go/pkg/tools/echo" @@ -51,6 +52,7 @@ func main() { log.Printf("run 1 failed: %v", err) } else { fmt.Println("agent:", result.Content) + shared.PrintRunFooters(result) } if err := a.ToolRegistry().Register(calculator.New()); err != nil { @@ -66,4 +68,5 @@ func main() { return } fmt.Println("agent:", result.Content) + shared.PrintRunFooters(result) } diff --git a/examples/agent_with_worker/agent/main.go b/examples/agent_with_worker/agent/main.go index 884ba35..d3c490e 100644 --- a/examples/agent_with_worker/agent/main.go +++ b/examples/agent_with_worker/agent/main.go @@ -153,9 +153,7 @@ func runStream(ctx context.Context, a *agent.Agent, scanner *bufio.Scanner, prom } else { fmt.Println("[complete]") } - if u := shared.UsageFooter(res); u != "" { - fmt.Println(u) - } + shared.PrintRunFooters(res) default: //fmt.Printf("[%s] %+v\n", ev.Type(), ev) diff --git a/examples/config.go b/examples/config.go index 71907da..ff47a9d 100644 --- a/examples/config.go +++ b/examples/config.go @@ -125,15 +125,13 @@ func getEnvInt(key string, def int) int { return def } -// ToolApprovalOptions applies AutoToolApprovalPolicy when EXAMPLES_AUTO_APPROVE -// is set (task batch runs). Manual go run leaves it unset (default require-all + prompts). +// ToolApprovalOptions applies AutoToolApprovalPolicy when EXAMPLES_AUTO_APPROVE=true +// (task batch runs). Manual go run leaves it unset or false (default require-all + prompts). func ToolApprovalOptions() []agent.Option { - switch strings.ToLower(strings.TrimSpace(os.Getenv("EXAMPLES_AUTO_APPROVE"))) { - case "1", "true", "yes", "y": + if strings.EqualFold(strings.TrimSpace(os.Getenv("EXAMPLES_AUTO_APPROVE")), "true") { return []agent.Option{agent.WithToolApprovalPolicy(agent.AutoToolApprovalPolicy())} - default: - return nil } + return nil } func init() { diff --git a/examples/durable_agent/agent/main.go b/examples/durable_agent/agent/main.go index a4cf4ee..e50ab76 100644 --- a/examples/durable_agent/agent/main.go +++ b/examples/durable_agent/agent/main.go @@ -163,9 +163,7 @@ func runStream(ctx context.Context, a *agent.Agent, scanner *bufio.Scanner, prom } else { fmt.Println("[complete]") } - if u := shared.UsageFooter(res); u != "" { - fmt.Println(u) - } + shared.PrintRunFooters(res) default: //fmt.Printf("[%s] %+v\n", ev.Type(), ev) diff --git a/examples/multiple_agents/main.go b/examples/multiple_agents/main.go index 452369f..9233fb5 100644 --- a/examples/multiple_agents/main.go +++ b/examples/multiple_agents/main.go @@ -9,6 +9,7 @@ import ( "sync" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" ) @@ -65,6 +66,7 @@ func main() { return } fmt.Printf("%s: %s\n", name, result.Content) + shared.PrintRunFooters(result) } if mode == "concurrent" { diff --git a/examples/shared/utils.go b/examples/shared/utils.go index 7ff8d0e..0085db2 100644 --- a/examples/shared/utils.go +++ b/examples/shared/utils.go @@ -2,7 +2,10 @@ package shared import ( "fmt" + "os" + "sort" "strings" + "time" "github.com/agenticenv/agent-sdk-go/pkg/agent" ) @@ -16,8 +19,7 @@ func RunResultFromFinishedEvent(ev agent.AgentEvent) *agent.AgentRunResult { if !ok || fin == nil { return nil } - res, _ := fin.Result.(*agent.AgentRunResult) - return res + return fin.Result } // ToolApprovalValueFromEvent returns the CUSTOM tool-approval payload when ev is that stream event. @@ -59,19 +61,111 @@ func MarksStreamDelta(ev agent.AgentEvent) bool { } } -// UsageFooter returns a non-empty line describing token usage from a finished run, or "". -func UsageFooter(res *agent.AgentRunResult) string { - if res == nil || res.Usage == nil { +// ShowLLMUsage reports whether examples should print token usage (SHOW_LLM_USAGE; default false). +func ShowLLMUsage() bool { + return envBool("SHOW_LLM_USAGE") +} + +// ShowTelemetry reports whether examples should print run telemetry (SHOW_TELEMETRY; default false). +func ShowTelemetry() bool { + return envBool("SHOW_TELEMETRY") +} + +// PrintRunFooters prints usage and telemetry when SHOW_LLM_USAGE / SHOW_TELEMETRY are enabled. +func PrintRunFooters(result *agent.AgentRunResult) { + if result == nil { + return + } + if ShowLLMUsage() { + if footer := LLMUsageFooter(result.LLMUsage); footer != "" { + fmt.Println(footer) + } + } + if ShowTelemetry() { + if footer := TelemetryFooter(result.Telemetry); footer != "" { + fmt.Println(footer) + } + } +} + +func envBool(key string) bool { + return strings.EqualFold(strings.TrimSpace(os.Getenv(key)), "true") +} + +// LLMUsageFooter returns a multi-line block describing token usage, or "". +func LLMUsageFooter(llmUsage *agent.LLMUsage) string { + if llmUsage == nil { return "" } - u := res.Usage - b := strings.Builder{} - fmt.Fprintf(&b, "[USAGE] prompt=%d completion=%d total=%d", u.PromptTokens, u.CompletionTokens, u.TotalTokens) - if u.CachedPromptTokens > 0 { - fmt.Fprintf(&b, " cached_prompt=%d", u.CachedPromptTokens) + lines := []string{ + "\n[USAGE]", + fmt.Sprintf(" prompt_tokens: %d", llmUsage.PromptTokens), + fmt.Sprintf(" completion_tokens: %d", llmUsage.CompletionTokens), + fmt.Sprintf(" total_tokens: %d", llmUsage.TotalTokens), } - if u.ReasoningTokens > 0 { - fmt.Fprintf(&b, " reasoning=%d", u.ReasoningTokens) + if llmUsage.CachedPromptTokens > 0 { + lines = append(lines, fmt.Sprintf(" cached_prompt: %d", llmUsage.CachedPromptTokens)) + } + if llmUsage.ReasoningTokens > 0 { + lines = append(lines, fmt.Sprintf(" reasoning_tokens: %d", llmUsage.ReasoningTokens)) + } + return strings.Join(lines, "\n") +} + +// TelemetryFooter returns a multi-line block describing run telemetry, or "". +func TelemetryFooter(telemetry *agent.AgentTelemetry) string { + if telemetry == nil { + return "" + } + lines := []string{ + "[TELEMETRY RUN]", + fmt.Sprintf(" total_llm_calls: %d", telemetry.Run.TotalLLMCalls), + fmt.Sprintf(" started_at: %s", formatTelemetryTime(telemetry.Run.StartedAt)), + fmt.Sprintf(" completed_at: %s", formatTelemetryTime(telemetry.Run.CompletedAt)), + } + if telemetry.Run.FinishReason != "" { + lines = append(lines, fmt.Sprintf(" finish_reason: %s", telemetry.Run.FinishReason)) + } + + lines = append(lines, + "[TELEMETRY TOOLS]", + fmt.Sprintf(" total_calls: %d", telemetry.Tools.TotalCalls), + fmt.Sprintf(" failed_calls: %d", telemetry.Tools.FailedCalls), + ) + if len(telemetry.Tools.Breakdown) > 0 { + lines = append(lines, " breakdown:") + for _, name := range sortedKeys(telemetry.Tools.Breakdown) { + lines = append(lines, fmt.Sprintf(" %s: %d", name, telemetry.Tools.Breakdown[name])) + } + } + if len(telemetry.Tools.FailedBreakdown) > 0 { + lines = append(lines, " failed_breakdown:") + for _, name := range sortedKeys(telemetry.Tools.FailedBreakdown) { + lines = append(lines, fmt.Sprintf(" %s: %d", name, telemetry.Tools.FailedBreakdown[name])) + } + } + lines = append(lines, + "[TELEMETRY STORAGE]", + fmt.Sprintf(" total_retriever_searches: %d", telemetry.Storage.TotalRetrieverSearches), + fmt.Sprintf(" failed_retriever_searches: %d", telemetry.Storage.FailedRetrieverSearches), + fmt.Sprintf(" prefetch_searches: %d", telemetry.Storage.PrefetchSearches), + fmt.Sprintf(" agentic_searches: %d", telemetry.Storage.AgenticSearches), + ) + return strings.Join(lines, "\n") +} + +func formatTelemetryTime(t time.Time) string { + if t.IsZero() { + return "-" + } + return t.UTC().Format(time.RFC3339) +} + +func sortedKeys(m map[string]int64) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) } - return b.String() + sort.Strings(keys) + return keys } diff --git a/examples/simple_agent/main.go b/examples/simple_agent/main.go index 2268276..7d8e932 100644 --- a/examples/simple_agent/main.go +++ b/examples/simple_agent/main.go @@ -8,6 +8,7 @@ import ( "strings" config "github.com/agenticenv/agent-sdk-go/examples" + "github.com/agenticenv/agent-sdk-go/examples/shared" "github.com/agenticenv/agent-sdk-go/pkg/agent" ) @@ -45,14 +46,5 @@ func main() { return } fmt.Printf("assistant: %s\n", result.Content) - if result.Usage != nil { - fmt.Printf("\nusage: prompt=%d completion=%d total=%d", result.Usage.PromptTokens, result.Usage.CompletionTokens, result.Usage.TotalTokens) - if result.Usage.CachedPromptTokens > 0 { - fmt.Printf(" cached_prompt=%d", result.Usage.CachedPromptTokens) - } - if result.Usage.ReasoningTokens > 0 { - fmt.Printf(" reasoning=%d", result.Usage.ReasoningTokens) - } - fmt.Println() - } + shared.PrintRunFooters(result) } diff --git a/internal/events/events_test.go b/internal/events/events_test.go index 81ec8b8..ce3b4cf 100644 --- a/internal/events/events_test.go +++ b/internal/events/events_test.go @@ -4,6 +4,8 @@ import ( "encoding/json" "strings" "testing" + + "github.com/agenticenv/agent-sdk-go/internal/types" ) func TestConstructorsAndRoundTripForAllEventTypes(t *testing.T) { @@ -26,7 +28,7 @@ func TestConstructorsAndRoundTripForAllEventTypes(t *testing.T) { }, { name: "run_finished_with_result", - event: NewAgentRunFinishedEvent("thread-2", "run-2", map[string]any{"ok": true}), + event: NewAgentRunFinishedEvent("thread-2", "run-2", &types.AgentRunResult{Content: "ok"}), wantType: AgentEventTypeRunFinished, assertFields: func(t *testing.T, raw map[string]any) { t.Helper() @@ -263,6 +265,37 @@ func TestConstructorsAndRoundTripForAllEventTypes(t *testing.T) { } } +func TestRunFinishedResultRoundTrip(t *testing.T) { + orig := NewAgentRunFinishedEvent("thread-1", "run-1", &types.AgentRunResult{ + Content: "hello", + AgentName: "agent-a", + Model: "gpt-test", + LLMUsage: &types.LLMUsage{TotalTokens: 42}, + }) + data, err := orig.ToJSON() + if err != nil { + t.Fatalf("ToJSON error: %v", err) + } + + decoded, err := EventFromJSON(data) + if err != nil { + t.Fatalf("EventFromJSON error: %v", err) + } + fin, ok := decoded.(*AgentRunFinishedEvent) + if !ok || fin == nil { + t.Fatalf("decoded type: %T", decoded) + } + if fin.Result == nil { + t.Fatal("decoded Result is nil") + } + if fin.Result.Content != "hello" || fin.Result.AgentName != "agent-a" || fin.Result.Model != "gpt-test" { + t.Fatalf("unexpected result: %#v", fin.Result) + } + if fin.Result.LLMUsage == nil || fin.Result.LLMUsage.TotalTokens != 42 { + t.Fatalf("unexpected llm usage: %#v", fin.Result.LLMUsage) + } +} + func TestEventTypeFromJSON(t *testing.T) { data := []byte(`{"type":"TOOL_CALL_END","toolCallId":"x"}`) typ, err := EventTypeFromJSON(data) diff --git a/internal/events/lifecycle_events.go b/internal/events/lifecycle_events.go index cb04aaa..7a9a70d 100644 --- a/internal/events/lifecycle_events.go +++ b/internal/events/lifecycle_events.go @@ -1,6 +1,10 @@ package events -import "encoding/json" +import ( + "encoding/json" + + "github.com/agenticenv/agent-sdk-go/internal/types" +) // RUN_STARTED type AgentRunStartedEvent struct { @@ -27,12 +31,12 @@ func (e *AgentRunStartedEvent) ToJSON() ([]byte, error) { return json.Marshal(e) // RUN_FINISHED type AgentRunFinishedEvent struct { *BaseEvent - ThreadID string `json:"threadId"` - RunID string `json:"runId"` - Result any `json:"result,omitempty"` // ← any not json.RawMessage + ThreadID string `json:"threadId"` + RunID string `json:"runId"` + Result *types.AgentRunResult `json:"result,omitempty"` } -func NewAgentRunFinishedEvent(threadID, runID string, result any) *AgentRunFinishedEvent { +func NewAgentRunFinishedEvent(threadID, runID string, result *types.AgentRunResult) *AgentRunFinishedEvent { return &AgentRunFinishedEvent{ BaseEvent: NewBaseEvent(AgentEventTypeRunFinished), ThreadID: threadID, diff --git a/internal/runtime/base/runtime.go b/internal/runtime/base/runtime.go index 1dc0792..7a9cfa1 100644 --- a/internal/runtime/base/runtime.go +++ b/internal/runtime/base/runtime.go @@ -31,6 +31,17 @@ type Runtime struct { ToolExecutionMode types.AgentToolExecutionMode } +type ExecuteLLMInput struct { + Logger logger.Logger + AgentName string + MessageID string + Messages []interfaces.Message + SkipTools bool + RetrieverContext string + Tools []interfaces.Tool + Emit func(events.AgentEvent) +} + // BuildLLMRequest constructs an LLMRequest from the given messages and options. // When retrieverContext is non-empty it is appended to the system prompt (prefetch/hybrid mode). // tools is the per-run resolved tool list from [runtime.ExecuteRequest] or activity resolve. @@ -116,6 +127,7 @@ func (rt *Runtime) llmResponseToResult(resp *interfaces.LLMResponse, tools []int ToolCallID: tc.ToolCallID, ToolName: tc.ToolName, ToolDisplayName: displayName, + ToolKind: types.KindOf(tool), Args: tc.Args, NeedsApproval: rt.RequiresApproval(tool), }) @@ -133,17 +145,8 @@ func emitEvent(fn func(events.AgentEvent), ev events.AgentEvent) { // ExecuteLLM calls the LLM in non-streaming mode, records metrics and traces, emits // TEXT_MESSAGE_START / TEXT_MESSAGE_CONTENT / TEXT_MESSAGE_END events, and returns LLMResult. // messageID and agentName are used only for event construction; emit may be nil. -func (rt *Runtime) ExecuteLLM( - ctx context.Context, - log logger.Logger, - agentName, messageID string, - messages []interfaces.Message, - skipTools bool, - retrieverContext string, - tools []interfaces.Tool, - emit func(events.AgentEvent), -) (*LLMResult, error) { - req := rt.BuildLLMRequest(messages, skipTools, retrieverContext, tools) +func (rt *Runtime) ExecuteLLM(ctx context.Context, input ExecuteLLMInput) (*LLMResult, error) { + req := rt.BuildLLMRequest(input.Messages, input.SkipTools, input.RetrieverContext, input.Tools) llmClient := rt.AgentConfig.LLM.Client model := llmClient.GetModel() @@ -151,14 +154,14 @@ func (rt *Runtime) ExecuteLLM( modelAttr := interfaces.Attribute{Key: types.MetricAttrModel, Value: model} providerAttr := interfaces.Attribute{Key: types.MetricAttrProvider, Value: provider} - log.Debug(ctx, "runtime: LLM generate started", slog.String("scope", "runtime"), slog.Int("messageCount", len(messages))) + input.Logger.Debug(ctx, "runtime: LLM generate started", slog.String("scope", "runtime"), slog.Int("messageCount", len(input.Messages))) rt.Metrics.IncrementCounter(ctx, types.MetricLLMCallStarted, modelAttr, providerAttr) llmStart := time.Now() ctx, sp := rt.Tracer.StartSpan(ctx, "llm.generate", - interfaces.Attribute{Key: "agent.name", Value: strings.TrimSpace(agentName)}, - interfaces.Attribute{Key: "message.count", Value: len(messages)}, + interfaces.Attribute{Key: "agent.name", Value: strings.TrimSpace(input.AgentName)}, + interfaces.Attribute{Key: "message.count", Value: len(input.Messages)}, modelAttr, providerAttr, ) @@ -180,16 +183,16 @@ func (rt *Runtime) ExecuteLLM( rt.Metrics.RecordHistogram(ctx, types.MetricLLMTokensOutput, float64(resp.Usage.CompletionTokens), modelAttr, providerAttr) } - log.Debug(ctx, "runtime: LLM generate completed", slog.String("scope", "runtime"), slog.Int("messageCount", len(messages))) + input.Logger.Debug(ctx, "runtime: LLM generate completed", slog.String("scope", "runtime"), slog.Int("messageCount", len(input.Messages))) - result, err := rt.llmResponseToResult(resp, tools) + result, err := rt.llmResponseToResult(resp, input.Tools) if err != nil { return nil, err } - emitEvent(emit, events.NewAgentTextMessageStartEvent(messageID, string(interfaces.MessageRoleAssistant))) - emitEvent(emit, events.NewAgentTextMessageContentEvent(messageID, result.Content)) - emitEvent(emit, events.NewAgentTextMessageEndEvent(messageID)) + emitEvent(input.Emit, events.NewAgentTextMessageStartEvent(input.MessageID, string(interfaces.MessageRoleAssistant))) + emitEvent(input.Emit, events.NewAgentTextMessageContentEvent(input.MessageID, result.Content)) + emitEvent(input.Emit, events.NewAgentTextMessageEndEvent(input.MessageID)) return result, nil } @@ -197,17 +200,8 @@ func (rt *Runtime) ExecuteLLM( // it falls back to Generate automatically. Delta events (text content, reasoning) are emitted via // emit as chunks arrive; a final TEXT_MESSAGE_START/CONTENT/END triple is emitted for non-streaming // fallback. emit may be nil. -func (rt *Runtime) ExecuteLLMStream( - ctx context.Context, - log logger.Logger, - agentName, messageID string, - messages []interfaces.Message, - skipTools bool, - retrieverContext string, - tools []interfaces.Tool, - emit func(events.AgentEvent), -) (*LLMResult, error) { - req := rt.BuildLLMRequest(messages, skipTools, retrieverContext, tools) +func (rt *Runtime) ExecuteLLMStream(ctx context.Context, input ExecuteLLMInput) (*LLMResult, error) { + req := rt.BuildLLMRequest(input.Messages, input.SkipTools, input.RetrieverContext, input.Tools) llmClient := rt.AgentConfig.LLM.Client model := llmClient.GetModel() @@ -220,8 +214,8 @@ func (rt *Runtime) ExecuteLLMStream( llmStart := time.Now() ctx, sp := rt.Tracer.StartSpan(ctx, "llm.stream", - interfaces.Attribute{Key: "agent.name", Value: strings.TrimSpace(agentName)}, - interfaces.Attribute{Key: "message.count", Value: len(messages)}, + interfaces.Attribute{Key: "agent.name", Value: strings.TrimSpace(input.AgentName)}, + interfaces.Attribute{Key: "message.count", Value: len(input.Messages)}, interfaces.Attribute{Key: "streaming", Value: isStreamSupported}, modelAttr, providerAttr, @@ -234,14 +228,14 @@ func (rt *Runtime) ExecuteLLMStream( if textMsgOpen { return } - emitEvent(emit, events.NewAgentTextMessageStartEvent(messageID, string(interfaces.MessageRoleAssistant))) + emitEvent(input.Emit, events.NewAgentTextMessageStartEvent(input.MessageID, string(interfaces.MessageRoleAssistant))) textMsgOpen = true } closeTextMsg := func() { if !textMsgOpen { return } - emitEvent(emit, events.NewAgentTextMessageEndEvent(messageID)) + emitEvent(input.Emit, events.NewAgentTextMessageEndEvent(input.MessageID)) textMsgOpen = false } // If the model never sent text chunks still emit one assistant turn (empty for tool-only). @@ -251,13 +245,13 @@ func (rt *Runtime) ExecuteLLMStream( return } openTextMsg() - emitEvent(emit, events.NewAgentTextMessageContentEvent(messageID, result.Content)) + emitEvent(input.Emit, events.NewAgentTextMessageContentEvent(input.MessageID, result.Content)) closeTextMsg() } // Non-streaming fallback: use Generate and emit a complete text message. if !isStreamSupported { - log.Debug(ctx, "runtime: LLM stream unsupported, using generate", slog.String("scope", "runtime")) + input.Logger.Debug(ctx, "runtime: LLM stream unsupported, using generate", slog.String("scope", "runtime")) resp, err := llmClient.Generate(ctx, req) llmLatency := float64(time.Since(llmStart).Milliseconds()) if err != nil { @@ -266,7 +260,7 @@ func (rt *Runtime) ExecuteLLMStream( rt.Metrics.RecordHistogram(ctx, types.MetricLLMLatencyMs, llmLatency, modelAttr, providerAttr) return nil, err } - result, err := rt.llmResponseToResult(resp, tools) + result, err := rt.llmResponseToResult(resp, input.Tools) if err != nil { sp.RecordError(err) rt.Metrics.IncrementCounter(ctx, types.MetricLLMCallFailed, modelAttr, providerAttr) @@ -298,11 +292,11 @@ func (rt *Runtime) ExecuteLLMStream( reasoningMsgOpen := false flushReasoning := func() { if reasoningMsgOpen { - emitEvent(emit, events.NewAgentReasoningMessageEndEvent(reasoningMID)) + emitEvent(input.Emit, events.NewAgentReasoningMessageEndEvent(reasoningMID)) reasoningMsgOpen = false } if reasoningPhaseOpen { - emitEvent(emit, events.NewAgentReasoningEndEvent(reasoningMID)) + emitEvent(input.Emit, events.NewAgentReasoningEndEvent(reasoningMID)) reasoningPhaseOpen = false } } @@ -311,9 +305,9 @@ func (rt *Runtime) ExecuteLLMStream( return } reasoningMID = uuid.New().String() - emitEvent(emit, events.NewAgentReasoningStartEvent(reasoningMID)) + emitEvent(input.Emit, events.NewAgentReasoningStartEvent(reasoningMID)) reasoningPhaseOpen = true - emitEvent(emit, events.NewAgentReasoningMessageStartEvent(reasoningMID, string(interfaces.MessageRoleReasoning))) + emitEvent(input.Emit, events.NewAgentReasoningMessageStartEvent(reasoningMID, string(interfaces.MessageRoleReasoning))) reasoningMsgOpen = true } @@ -325,11 +319,11 @@ func (rt *Runtime) ExecuteLLMStream( if chunk.ContentDelta != "" { flushReasoning() openTextMsg() - emitEvent(emit, events.NewAgentTextMessageContentEvent(messageID, chunk.ContentDelta)) + emitEvent(input.Emit, events.NewAgentTextMessageContentEvent(input.MessageID, chunk.ContentDelta)) } if chunk.ThinkingDelta != "" { openReasoning() - emitEvent(emit, events.NewAgentReasoningMessageContentEvent(reasoningMID, chunk.ThinkingDelta)) + emitEvent(input.Emit, events.NewAgentReasoningMessageContentEvent(reasoningMID, chunk.ThinkingDelta)) } } flushReasoning() @@ -351,7 +345,7 @@ func (rt *Runtime) ExecuteLLMStream( return nil, err } - result, err := rt.llmResponseToResult(resp, tools) + result, err := rt.llmResponseToResult(resp, input.Tools) if err != nil { sp.RecordError(err) rt.Metrics.IncrementCounter(ctx, types.MetricLLMCallFailed, modelAttr, providerAttr) @@ -366,7 +360,7 @@ func (rt *Runtime) ExecuteLLMStream( rt.Metrics.RecordHistogram(ctx, types.MetricLLMTokensOutput, float64(resp.Usage.CompletionTokens), modelAttr, providerAttr) } - log.Debug(ctx, "runtime: LLM stream completed", slog.String("scope", "runtime")) + input.Logger.Debug(ctx, "runtime: LLM stream completed", slog.String("scope", "runtime")) finalizeAssistantText(result) return result, nil } @@ -453,10 +447,10 @@ func (rt *Runtime) AuthorizeTool(ctx context.Context, log logger.Logger, tools [ // ExecuteRetrievers runs all configured retrievers in parallel for the given query and // returns a combined document context string for injection into the LLM system prompt. // Partial failures are logged and skipped; all retrievers failing returns an error. -func (rt *Runtime) ExecuteRetrievers(ctx context.Context, log logger.Logger, query string) (string, error) { +func (rt *Runtime) ExecuteRetrievers(ctx context.Context, log logger.Logger, query string) (*RetrieverResult, error) { retrievers := rt.AgentConfig.Retrievers.Retrievers if len(retrievers) == 0 { - return "", nil + return &RetrieverResult{}, nil } log.Debug(ctx, "runtime: retriever prefetch started", slog.String("scope", "runtime"), slog.Int("retrieverCount", len(retrievers)), slog.String("query", query)) @@ -517,14 +511,15 @@ func (rt *Runtime) ExecuteRetrievers(ctx context.Context, log logger.Logger, que sb.WriteString(FormatRetrieverDocs(res.docs)) } - if failedCount == len(retrievers) { - return "", fmt.Errorf("retriever prefetch: all %d retriever(s) failed", len(retrievers)) - } if failedCount > 0 { log.Warn(ctx, "runtime: some retrievers failed, continuing with partial context", slog.String("scope", "runtime"), slog.Int("failed", failedCount), slog.Int("total", len(retrievers))) } retrieverContext := strings.TrimSpace(sb.String()) log.Debug(ctx, "runtime: retriever prefetch completed", slog.String("scope", "runtime"), slog.Int("retrieverCount", len(retrievers)), slog.Bool("hasContext", retrieverContext != "")) - return retrieverContext, nil + return &RetrieverResult{ + Context: retrieverContext, + TotalSearches: int64(len(retrievers)), + FailedSearches: int64(failedCount), + }, nil } diff --git a/internal/runtime/base/runtime_test.go b/internal/runtime/base/runtime_test.go index db3a882..d01ed4b 100644 --- a/internal/runtime/base/runtime_test.go +++ b/internal/runtime/base/runtime_test.go @@ -244,7 +244,8 @@ func TestExecuteRetrievers_NoRetrievers(t *testing.T) { rt := newTestRuntime(sdkruntime.AgentConfig{}) got, err := rt.ExecuteRetrievers(context.Background(), noopLog(), "query") require.NoError(t, err) - require.Equal(t, "", got) + require.Equal(t, "", got.Context) + require.Equal(t, int64(0), got.TotalSearches) } func TestExecuteRetrievers_AllFail(t *testing.T) { @@ -256,9 +257,11 @@ func TestExecuteRetrievers_AllFail(t *testing.T) { rt := newTestRuntime(sdkruntime.AgentConfig{ Retrievers: sdkruntime.AgentRetrievers{Retrievers: []interfaces.Retriever{r}}, }) - _, err := rt.ExecuteRetrievers(context.Background(), noopLog(), "q") - require.Error(t, err) - require.Contains(t, err.Error(), "all") + got, err := rt.ExecuteRetrievers(context.Background(), noopLog(), "q") + require.NoError(t, err) + require.Equal(t, "", got.Context) + require.Equal(t, int64(1), got.TotalSearches) + require.Equal(t, int64(1), got.FailedSearches) } func TestExecuteRetrievers_Success(t *testing.T) { @@ -274,7 +277,9 @@ func TestExecuteRetrievers_Success(t *testing.T) { }) got, err := rt.ExecuteRetrievers(context.Background(), noopLog(), "my query") require.NoError(t, err) - require.Contains(t, got, "doc content") + require.Contains(t, got.Context, "doc content") + require.Equal(t, int64(1), got.TotalSearches) + require.Equal(t, int64(0), got.FailedSearches) } // --- ExecuteLLM --- @@ -283,7 +288,17 @@ func TestExecuteLLM_LLMError(t *testing.T) { rt := newTestRuntime(sdkruntime.AgentConfig{ LLM: sdkruntime.AgentLLM{Client: stubLLMClient{err: errors.New("llm unavailable")}}, }) - _, err := rt.ExecuteLLM(context.Background(), noopLog(), "agent", "msg-1", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg-1", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + _, err := rt.ExecuteLLM(context.Background(), input) require.Error(t, err) require.Contains(t, err.Error(), "llm unavailable") } @@ -294,7 +309,17 @@ func TestExecuteLLM_Success_NoTools(t *testing.T) { resp: &interfaces.LLMResponse{Content: "hello world"}, }}, }) - result, err := rt.ExecuteLLM(context.Background(), noopLog(), "agent", "msg-1", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg-1", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + result, err := rt.ExecuteLLM(context.Background(), input) require.NoError(t, err) require.Equal(t, "hello world", result.Content) require.Empty(t, result.ToolCalls) @@ -312,7 +337,17 @@ func TestExecuteLLM_EmitsTextMessageEvents(t *testing.T) { emitted = append(emitted, ev.Type()) } - _, err := rt.ExecuteLLM(context.Background(), noopLog(), "agent", "msg-1", nil, false, "", nil, emit) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg-1", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: emit, + } + _, err := rt.ExecuteLLM(context.Background(), input) require.NoError(t, err) require.Equal(t, []events.AgentEventType{ events.AgentEventTypeTextMessageStart, @@ -328,7 +363,17 @@ func TestExecuteLLM_NilEmitDoesNotPanic(t *testing.T) { }}, }) require.NotPanics(t, func() { - _, _ = rt.ExecuteLLM(context.Background(), noopLog(), "a", "m", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "a", + MessageID: "m", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + _, _ = rt.ExecuteLLM(context.Background(), input) }) } @@ -343,7 +388,17 @@ func TestExecuteLLM_UnknownToolCallReturnsError(t *testing.T) { }, }}, }) - _, err := rt.ExecuteLLM(context.Background(), noopLog(), "a", "m", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "a", + MessageID: "m", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + _, err := rt.ExecuteLLM(context.Background(), input) require.Error(t, err) require.Contains(t, err.Error(), "unknown tool") } @@ -357,7 +412,17 @@ func TestExecuteLLM_WithUsageMetrics(t *testing.T) { }, }}, }) - result, err := rt.ExecuteLLM(context.Background(), noopLog(), "a", "m", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "a", + MessageID: "m", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + result, err := rt.ExecuteLLM(context.Background(), input) require.NoError(t, err) require.NotNil(t, result.Usage) require.EqualValues(t, 10, result.Usage.PromptTokens) @@ -380,7 +445,17 @@ func TestExecuteLLM_ToolCallWithEmptyDisplayName(t *testing.T) { }, }}, }) - result, err := rt.ExecuteLLM(context.Background(), noopLog(), "a", "m", nil, false, "", []interfaces.Tool{tool}, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "a", + MessageID: "m", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: []interfaces.Tool{tool}, + Emit: nil, + } + result, err := rt.ExecuteLLM(context.Background(), input) require.NoError(t, err) require.Len(t, result.ToolCalls, 1) require.Equal(t, "my-tool", result.ToolCalls[0].ToolDisplayName) @@ -395,7 +470,17 @@ func TestExecuteLLM_NilToolCallInResponse(t *testing.T) { }, }}, }) - result, err := rt.ExecuteLLM(context.Background(), noopLog(), "a", "m", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "a", + MessageID: "m", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + result, err := rt.ExecuteLLM(context.Background(), input) require.NoError(t, err) require.Empty(t, result.ToolCalls) } @@ -443,7 +528,9 @@ func TestExecuteRetrievers_PartialFailure(t *testing.T) { }) got, err := rt.ExecuteRetrievers(context.Background(), noopLog(), "q") require.NoError(t, err) // partial is ok - require.Contains(t, got, "useful") + require.Contains(t, got.Context, "useful") + require.Equal(t, int64(2), got.TotalSearches) + require.Equal(t, int64(1), got.FailedSearches) } // --- ExecuteLLMStream --- @@ -491,7 +578,17 @@ func TestExecuteLLMStream_FallbackGenerate_Success(t *testing.T) { resp: &interfaces.LLMResponse{Content: "fallback answer"}, }}, }) - result, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + result, err := rt.ExecuteLLMStream(context.Background(), input) require.NoError(t, err) require.Equal(t, "fallback answer", result.Content) } @@ -500,7 +597,17 @@ func TestExecuteLLMStream_FallbackGenerate_LLMError(t *testing.T) { rt := newTestRuntime(sdkruntime.AgentConfig{ LLM: sdkruntime.AgentLLM{Client: stubLLMClient{err: errors.New("llm down")}}, }) - _, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + _, err := rt.ExecuteLLMStream(context.Background(), input) require.Error(t, err) require.Contains(t, err.Error(), "llm down") } @@ -514,7 +621,17 @@ func TestExecuteLLMStream_FallbackGenerate_EmitsEvents(t *testing.T) { var emitted []events.AgentEventType emit := func(ev events.AgentEvent) { emitted = append(emitted, ev.Type()) } - _, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, emit) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: emit, + } + _, err := rt.ExecuteLLMStream(context.Background(), input) require.NoError(t, err) require.Equal(t, []events.AgentEventType{ events.AgentEventTypeTextMessageStart, @@ -529,7 +646,17 @@ func TestExecuteLLMStream_GenerateStreamError(t *testing.T) { streamErr: errors.New("stream init failed"), }}, }) - _, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + _, err := rt.ExecuteLLMStream(context.Background(), input) require.Error(t, err) require.Contains(t, err.Error(), "stream init failed") } @@ -543,7 +670,17 @@ func TestExecuteLLMStream_StreamError_AfterChunks(t *testing.T) { rt := newTestRuntime(sdkruntime.AgentConfig{ LLM: sdkruntime.AgentLLM{Client: streamCapableLLMClient{stream: s}}, }) - _, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + _, err := rt.ExecuteLLMStream(context.Background(), input) require.Error(t, err) require.Contains(t, err.Error(), "connection reset") } @@ -554,7 +691,17 @@ func TestExecuteLLMStream_StreamNilResult(t *testing.T) { rt := newTestRuntime(sdkruntime.AgentConfig{ LLM: sdkruntime.AgentLLM{Client: streamCapableLLMClient{stream: s}}, }) - _, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + _, err := rt.ExecuteLLMStream(context.Background(), input) require.Error(t, err) require.Contains(t, err.Error(), "stream completed without result") } @@ -572,7 +719,17 @@ func TestExecuteLLMStream_TextChunks_EmitsCorrectEvents(t *testing.T) { var emitted []events.AgentEventType emit := func(ev events.AgentEvent) { emitted = append(emitted, ev.Type()) } - result, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, emit) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: emit, + } + result, err := rt.ExecuteLLMStream(context.Background(), input) require.NoError(t, err) require.Equal(t, "hello world", result.Content) require.Equal(t, events.AgentEventTypeTextMessageStart, emitted[0]) @@ -594,7 +751,17 @@ func TestExecuteLLMStream_ReasoningChunks_EmitsReasoningEvents(t *testing.T) { var emitted []events.AgentEventType emit := func(ev events.AgentEvent) { emitted = append(emitted, ev.Type()) } - _, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, emit) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: emit, + } + _, err := rt.ExecuteLLMStream(context.Background(), input) require.NoError(t, err) // Reasoning events must appear before text events @@ -626,7 +793,17 @@ func TestExecuteLLMStream_ToolOnlyResponse_EmitsEmptyAssistantTurn(t *testing.T) var emitted []events.AgentEventType emit := func(ev events.AgentEvent) { emitted = append(emitted, ev.Type()) } - result, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", []interfaces.Tool{tool}, emit) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: []interfaces.Tool{tool}, + Emit: emit, + } + result, err := rt.ExecuteLLMStream(context.Background(), input) require.NoError(t, err) require.Len(t, result.ToolCalls, 1) // finalizeAssistantText emits a start/content/end even when no text chunks arrived @@ -642,7 +819,17 @@ func TestExecuteLLMStream_WithUsageMetrics(t *testing.T) { rt := newTestRuntime(sdkruntime.AgentConfig{ LLM: sdkruntime.AgentLLM{Client: streamCapableLLMClient{stream: s}}, }) - result, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + result, err := rt.ExecuteLLMStream(context.Background(), input) require.NoError(t, err) require.NotNil(t, result.Usage) require.EqualValues(t, 8, result.Usage.PromptTokens) @@ -657,7 +844,17 @@ func TestExecuteLLMStream_NilChunkSkipped(t *testing.T) { rt := newTestRuntime(sdkruntime.AgentConfig{ LLM: sdkruntime.AgentLLM{Client: streamCapableLLMClient{stream: s}}, }) - result, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + result, err := rt.ExecuteLLMStream(context.Background(), input) require.NoError(t, err) require.Equal(t, "text", result.Content) } @@ -671,7 +868,17 @@ func TestExecuteLLMStream_FallbackGenerate_WithUsage(t *testing.T) { }, }}, }) - result, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + result, err := rt.ExecuteLLMStream(context.Background(), input) require.NoError(t, err) require.NotNil(t, result.Usage) require.EqualValues(t, 5, result.Usage.PromptTokens) @@ -685,7 +892,17 @@ func TestExecuteLLMStream_FallbackGenerate_UnknownToolCallError(t *testing.T) { }, }}, }) - _, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + _, err := rt.ExecuteLLMStream(context.Background(), input) require.Error(t, err) require.Contains(t, err.Error(), "unknown tool") } @@ -697,7 +914,17 @@ func TestExecuteLLMStream_Stream_UnknownToolCallError(t *testing.T) { rt := newTestRuntime(sdkruntime.AgentConfig{ LLM: sdkruntime.AgentLLM{Client: streamCapableLLMClient{stream: s}}, }) - _, err := rt.ExecuteLLMStream(context.Background(), noopLog(), "agent", "msg", nil, false, "", nil, nil) + input := ExecuteLLMInput{ + Logger: noopLog(), + AgentName: "agent", + MessageID: "msg", + Messages: nil, + SkipTools: false, + RetrieverContext: "", + Tools: nil, + Emit: nil, + } + _, err := rt.ExecuteLLMStream(context.Background(), input) require.Error(t, err) require.Contains(t, err.Error(), "unknown tool") } @@ -713,7 +940,9 @@ func TestExecuteRetrievers_EmptyDocsSkipped(t *testing.T) { }) got, err := rt.ExecuteRetrievers(context.Background(), noopLog(), "q") require.NoError(t, err) - require.Equal(t, "", got) + require.Equal(t, "", got.Context) + require.Equal(t, int64(1), got.TotalSearches) + require.Equal(t, int64(0), got.FailedSearches) } // --- ApplyLLMSampling Reasoning field --- diff --git a/internal/runtime/base/types.go b/internal/runtime/base/types.go index c631f69..eb34534 100644 --- a/internal/runtime/base/types.go +++ b/internal/runtime/base/types.go @@ -1,6 +1,9 @@ package base -import "github.com/agenticenv/agent-sdk-go/pkg/interfaces" +import ( + "github.com/agenticenv/agent-sdk-go/internal/types" + "github.com/agenticenv/agent-sdk-go/pkg/interfaces" +) // LLMResult is the result of a successful LLM call. // Content holds the assistant text; ToolCalls holds any tool invocations resolved against @@ -18,6 +21,7 @@ type ToolCallRequest struct { ToolCallID string ToolName string ToolDisplayName string + ToolKind types.ToolKind Args map[string]any NeedsApproval bool } @@ -28,3 +32,10 @@ type AuthorizeResult struct { Allowed bool Reason string } + +// RetrieverResult is the outcome of ExecuteRetrievers (prefetch / hybrid pre-loop). +type RetrieverResult struct { + Context string + TotalSearches int64 + FailedSearches int64 +} diff --git a/internal/runtime/base/utils.go b/internal/runtime/base/utils.go index c331833..dc0ab36 100644 --- a/internal/runtime/base/utils.go +++ b/internal/runtime/base/utils.go @@ -3,6 +3,7 @@ package base import ( "fmt" "strings" + "time" "github.com/agenticenv/agent-sdk-go/internal/runtime" "github.com/agenticenv/agent-sdk-go/internal/types" @@ -98,3 +99,16 @@ func GetConversationID(req *runtime.ExecuteRequest) string { } return "" } + +func NewAgentTelemetry(startedAt time.Time) *types.AgentTelemetry { + return &types.AgentTelemetry{ + Run: types.RunTelemetry{ + StartedAt: startedAt, + FinishReason: types.FinishReasonComplete, + }, + Tools: types.ToolTelemetry{ + Breakdown: make(map[string]int64), + FailedBreakdown: make(map[string]int64), + }, + } +} diff --git a/internal/runtime/local/agent_loop.go b/internal/runtime/local/agent_loop.go index 693e624..888ab9f 100644 --- a/internal/runtime/local/agent_loop.go +++ b/internal/runtime/local/agent_loop.go @@ -7,6 +7,7 @@ import ( "log/slog" "strings" "sync" + "time" "github.com/agenticenv/agent-sdk-go/internal/events" "github.com/agenticenv/agent-sdk-go/internal/runtime/base" @@ -47,8 +48,14 @@ type AgentLoopInput struct { // AgentLoopResult is the outcome of a completed local agent run. type AgentLoopResult struct { - Content string - Usage *interfaces.LLMUsage + Content string + LLMUsage *interfaces.LLMUsage + Telemetry *types.AgentTelemetry +} + +type toolResult struct { + message interfaces.Message + failed bool // true: hard err, ExecuteTool err, or ctx cancel } // publishEventToChannel marshals ev and publishes it on channelName via the runtime eventbus. @@ -78,9 +85,16 @@ func (rt *LocalRuntime) publishEventToChannel(ctx context.Context, channelName s // Events are published to rt.eventbus on input.ChannelName; callers subscribe to that channel. func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput) (*AgentLoopResult, error) { log := rt.logger + telemetry := base.NewAgentTelemetry(time.Now()) agentName := rt.AgentSpec.Name model := rt.AgentConfig.LLM.Client.GetModel() + ctx, sp := rt.Tracer.StartSpan(ctx, "agent.loop", + interfaces.Attribute{Key: "agent.name", Value: agentName}, + interfaces.Attribute{Key: "model", Value: model}, + ) + defer sp.End() + tools := input.Tools maxIter := rt.AgentConfig.Limits.MaxIterations @@ -127,22 +141,24 @@ func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput) slog.String("scope", "loop"), slog.String("mode", string(retrieverMode)), slog.Int("retrieverCount", len(rt.AgentConfig.Retrievers.Retrievers))) - rc, err := rt.ExecuteRetrievers(ctx, log, input.UserPrompt) + res, err := rt.ExecuteRetrievers(ctx, log, input.UserPrompt) if err != nil { return nil, fmt.Errorf("retriever prefetch: %w", err) } - retrieverContext = rc + retrieverContext = res.Context + telemetry.Storage.TotalRetrieverSearches += res.TotalSearches + telemetry.Storage.FailedRetrieverSearches += res.FailedSearches + telemetry.Storage.PrefetchSearches += res.TotalSearches log.Debug(ctx, "local: retriever prefetch done", slog.String("scope", "loop"), slog.Bool("hasContext", retrieverContext != "")) } - var runUsage *interfaces.LLMUsage - lastContent := "" + var lastContent string + var llmUsage *interfaces.LLMUsage for iter := 0; iter < maxIter; iter++ { messageID := uuid.New().String() - log.Debug(ctx, "local: LLM call started", slog.String("scope", "loop"), slog.Int("iteration", iter), @@ -150,16 +166,27 @@ func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput) var llmResult *base.LLMResult var err error + executeLLMInput := base.ExecuteLLMInput{ + Logger: log, + AgentName: agentName, + MessageID: messageID, + Messages: messages, + SkipTools: false, + RetrieverContext: retrieverContext, + Tools: tools, + Emit: emit, + } if input.StreamingEnabled { - llmResult, err = rt.ExecuteLLMStream(ctx, log, agentName, messageID, messages, false, retrieverContext, tools, emit) + llmResult, err = rt.ExecuteLLMStream(ctx, executeLLMInput) } else { - llmResult, err = rt.ExecuteLLM(ctx, log, agentName, messageID, messages, false, retrieverContext, tools, emit) + llmResult, err = rt.ExecuteLLM(ctx, executeLLMInput) } if err != nil { return nil, fmt.Errorf("llm call (iter %d): %w", iter, err) } - runUsage = base.MergeLLMUsage(runUsage, llmResult.Usage) + telemetry.Run.TotalLLMCalls++ + llmUsage = base.MergeLLMUsage(llmUsage, llmResult.Usage) // Final response: no tool calls → done. if len(llmResult.ToolCalls) == 0 { @@ -177,20 +204,32 @@ func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput) slog.String("scope", "loop"), slog.Int("iteration", iter)) finalMessageID := uuid.New().String() + executeLLMInput := base.ExecuteLLMInput{ + Logger: log, + AgentName: agentName, + MessageID: finalMessageID, + Messages: messages, + SkipTools: true, + RetrieverContext: retrieverContext, + Tools: tools, + Emit: emit, + } if input.StreamingEnabled { - llmResult, err = rt.ExecuteLLMStream(ctx, log, agentName, finalMessageID, messages, true, retrieverContext, tools, emit) + llmResult, err = rt.ExecuteLLMStream(ctx, executeLLMInput) } else { - llmResult, err = rt.ExecuteLLM(ctx, log, agentName, finalMessageID, messages, true, retrieverContext, tools, emit) + llmResult, err = rt.ExecuteLLM(ctx, executeLLMInput) } if err != nil { return nil, fmt.Errorf("llm final call (iter %d): %w", iter, err) } - runUsage = base.MergeLLMUsage(runUsage, llmResult.Usage) + llmUsage = base.MergeLLMUsage(llmUsage, llmResult.Usage) messages = append(messages, interfaces.Message{ Role: interfaces.MessageRoleAssistant, Content: llmResult.Content, }) lastContent = llmResult.Content + telemetry.Run.TotalLLMCalls++ + telemetry.Run.FinishReason = types.FinishReasonMaxIterations break } @@ -210,7 +249,7 @@ func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput) messages = append(messages, assistantMsg) // Execute tools according to the requested execution mode. - var toolResults []interfaces.Message + var toolResults []toolResult switch toolExecMode { case types.AgentToolExecutionModeParallel: toolResults, err = rt.executeToolsParallel(ctx, input, messageID, llmResult.ToolCalls, emit) @@ -226,28 +265,30 @@ func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput) return nil, err } - messages = append(messages, toolResults...) + for idx, result := range toolResults { + messages = append(messages, result.message) + tc := llmResult.ToolCalls[idx] + if tc.ToolKind.CountsTowardToolTelemetry() { + telemetry.Tools.Record(tc.ToolName, result.failed) + } + if tc.ToolKind == types.ToolKindRetriever { + telemetry.Storage.TotalRetrieverSearches++ + telemetry.Storage.AgenticSearches++ + if result.failed { + telemetry.Storage.FailedRetrieverSearches++ + } + } + } if rt.conversationMemoryEnabled(input) && rt.AgentConfig.Session.ConversationSaveOnIteration && len(messages) > persistedMessageCount { - if err := persistConversationMessages(ctx, rt, input.ConversationID, messages[persistedMessageCount:]); err != nil { - log.Warn(ctx, "local: persist conversation failed", - slog.String("scope", "loop"), - slog.String("conversationID", input.ConversationID), - slog.Any("error", err)) - } else { - persistedMessageCount = len(messages) - } + rt.persistConversationMessages(ctx, input.ConversationID, messages[persistedMessageCount:]) + persistedMessageCount = len(messages) } } // Persist unsaved messages: full run when ConversationSaveOnIteration is false; final assistant only when true. if rt.conversationMemoryEnabled(input) && len(messages) > persistedMessageCount { - if err := persistConversationMessages(ctx, rt, input.ConversationID, messages[persistedMessageCount:]); err != nil { - log.Warn(ctx, "local: persist conversation failed", - slog.String("scope", "loop"), - slog.String("conversationID", input.ConversationID), - slog.Any("error", err)) - } + rt.persistConversationMessages(ctx, input.ConversationID, messages[persistedMessageCount:]) } log.Info(ctx, "local: agent run completed", @@ -256,7 +297,13 @@ func (rt *LocalRuntime) RunAgentLoop(ctx context.Context, input AgentLoopInput) slog.String("model", model), slog.Int("contentLen", len(lastContent))) - return &AgentLoopResult{Content: lastContent, Usage: runUsage}, nil + telemetry.Run.CompletedAt = time.Now() + + return &AgentLoopResult{ + Content: lastContent, + LLMUsage: llmUsage, + Telemetry: telemetry, + }, nil } func (rt *LocalRuntime) conversationMemoryEnabled(input AgentLoopInput) bool { @@ -272,29 +319,34 @@ func (rt *LocalRuntime) executeToolsParallel( messageID string, toolCalls []base.ToolCallRequest, emit func(events.AgentEvent), -) ([]interfaces.Message, error) { +) ([]toolResult, error) { rt.logger.Info(ctx, "local: tool execution (parallel)", slog.String("scope", "loop"), slog.Int("toolCount", len(toolCalls))) - results := make([]interfaces.Message, len(toolCalls)) + results := make([]toolResult, len(toolCalls)) var wg sync.WaitGroup for i := range toolCalls { wg.Add(1) go func(idx int, tc base.ToolCallRequest) { defer wg.Done() - msg, err := rt.executeSingleTool(ctx, input, messageID, tc, emit) + result, err := rt.executeSingleTool(ctx, input, messageID, tc, emit) if err != nil { - results[idx] = interfaces.Message{ + rt.logger.Info(ctx, "local: parallel tool failed", + slog.String("scope", "loop"), + slog.Int("toolIndex", idx), + slog.String("toolName", tc.ToolName), + slog.Any("error", err)) + result.message = interfaces.Message{ Role: interfaces.MessageRoleTool, Content: "Tool execution failed: " + err.Error(), ToolName: tc.ToolName, ToolCallID: tc.ToolCallID, } - return + result.failed = true } - results[idx] = msg + results[idx] = result }(i, toolCalls[i]) } wg.Wait() @@ -302,30 +354,38 @@ func (rt *LocalRuntime) executeToolsParallel( return results, nil } -// executeToolsSequential runs tool calls one at a time and returns on the first hard error. +// executeToolsSequential runs tool calls one at a time. +// Hard errors from individual tools become synthetic tool messages; the batch always continues. func (rt *LocalRuntime) executeToolsSequential( ctx context.Context, input AgentLoopInput, messageID string, toolCalls []base.ToolCallRequest, emit func(events.AgentEvent), -) ([]interfaces.Message, error) { +) ([]toolResult, error) { rt.logger.Info(ctx, "local: tool execution (sequential)", slog.String("scope", "loop"), slog.Int("toolCount", len(toolCalls))) - results := make([]interfaces.Message, 0, len(toolCalls)) - for i, tc := range toolCalls { - msg, err := rt.executeSingleTool(ctx, input, messageID, tc, emit) + results := make([]toolResult, len(toolCalls)) + for idx, tc := range toolCalls { + result, err := rt.executeSingleTool(ctx, input, messageID, tc, emit) if err != nil { rt.logger.Info(ctx, "local: sequential tool failed", slog.String("scope", "loop"), - slog.Int("toolIndex", i), + slog.Int("toolIndex", idx), slog.String("toolName", tc.ToolName), slog.Any("error", err)) - return nil, err + + result.message = interfaces.Message{ + Role: interfaces.MessageRoleTool, + Content: "Tool execution failed: " + err.Error(), + ToolName: tc.ToolName, + ToolCallID: tc.ToolCallID, + } + result.failed = true } - results = append(results, msg) + results[idx] = result } return results, nil } @@ -338,7 +398,7 @@ func (rt *LocalRuntime) executeSingleTool( messageID string, tc base.ToolCallRequest, emit func(events.AgentEvent), -) (interfaces.Message, error) { +) (toolResult, error) { log := rt.logger tools := input.Tools @@ -361,7 +421,10 @@ func (rt *LocalRuntime) executeSingleTool( // Authorization check. authResult, err := rt.AuthorizeTool(ctx, log, tools, tc.ToolName, tc.Args) if err != nil { - return interfaces.Message{}, fmt.Errorf("tool authorization error for %q: %w", tc.ToolName, err) + return toolResult{ + message: interfaces.Message{}, + failed: true, + }, fmt.Errorf("tool authorization error for %q: %w", tc.ToolName, err) } if !authResult.Allowed { content := msgToolUnauthorized @@ -373,11 +436,14 @@ func (rt *LocalRuntime) executeSingleTool( slog.String("toolName", tc.ToolName), slog.String("reason", authResult.Reason)) emitToolEndThenResult(tc.ToolCallID, content) - return interfaces.Message{ - Role: interfaces.MessageRoleTool, - Content: content, - ToolName: tc.ToolName, - ToolCallID: tc.ToolCallID, + return toolResult{ + message: interfaces.Message{ + Role: interfaces.MessageRoleTool, + Content: content, + ToolName: tc.ToolName, + ToolCallID: tc.ToolCallID, + }, + failed: false, }, nil } @@ -452,12 +518,16 @@ func (rt *LocalRuntime) executeSingleTool( case status := <-resultCh: approvalStatus = status case <-ctx.Done(): - return interfaces.Message{}, ctx.Err() + return toolResult{ + message: interfaces.Message{}, + failed: true, + }, ctx.Err() } } } var content string + failed := false switch approvalStatus { case types.ApprovalStatusApproved: if isSubAgent { @@ -507,6 +577,7 @@ func (rt *LocalRuntime) executeSingleTool( result, execErr := rt.ExecuteTool(ctx, log, tools, tc.ToolName, tc.Args) if execErr != nil { content = "Tool execution failed: " + execErr.Error() + failed = true } else { content = result } @@ -516,31 +587,49 @@ func (rt *LocalRuntime) executeSingleTool( case types.ApprovalStatusUnavailable: content = msgToolApprovalUnavailable default: - return interfaces.Message{}, fmt.Errorf("unexpected approval status %q for tool %q", approvalStatus, tc.ToolName) + return toolResult{ + message: interfaces.Message{}, + failed: true, + }, fmt.Errorf("unexpected approval status %q for tool %q", approvalStatus, tc.ToolName) } emitToolEndThenResult(tc.ToolCallID, content) - return interfaces.Message{ - Role: interfaces.MessageRoleTool, - Content: content, - ToolName: tc.ToolName, - ToolCallID: tc.ToolCallID, + return toolResult{ + message: interfaces.Message{ + Role: interfaces.MessageRoleTool, + Content: content, + ToolName: tc.ToolName, + ToolCallID: tc.ToolCallID, + }, + failed: failed, }, nil } -// persistConversationMessages stores all accumulated messages from the run into the conversation store. -func persistConversationMessages(ctx context.Context, rt *LocalRuntime, conversationID string, messages []interfaces.Message) error { +// persistConversationMessages stores messages in the conversation store. +// Logs per-message failures and continues; does not fail the run. +func (rt *LocalRuntime) persistConversationMessages(ctx context.Context, conversationID string, messages []interfaces.Message) { conv := rt.AgentConfig.Session.Conversation - if conv == nil { - return nil + if conv == nil || len(messages) == 0 { + return } + + ctx, sp := rt.Tracer.StartSpan(ctx, "conversation.add_messages", + interfaces.Attribute{Key: "conversation.id", Value: conversationID}, + interfaces.Attribute{Key: "message.count", Value: len(messages)}, + ) + defer sp.End() + + failCount := 0 for _, msg := range messages { if err := conv.AddMessage(ctx, conversationID, msg); err != nil { + failCount++ rt.logger.Warn(ctx, "local: add conversation message failed", slog.String("scope", "loop"), slog.String("conversationID", conversationID), slog.Any("error", err)) } } - return nil + if failCount > 0 { + sp.SetAttribute("failed.count", failCount) + } } diff --git a/internal/runtime/local/agent_loop_test.go b/internal/runtime/local/agent_loop_test.go index 9559ee4..98bb350 100644 --- a/internal/runtime/local/agent_loop_test.go +++ b/internal/runtime/local/agent_loop_test.go @@ -45,6 +45,29 @@ func loopToolsInput(tools []interfaces.Tool) AgentLoopInput { return AgentLoopInput{Tools: tools} } +// stubKindTool is a stubTool with an explicit ToolKind for telemetry tests. +type stubKindTool struct { + stubTool + kind types.ToolKind +} + +func (t stubKindTool) ToolKind() types.ToolKind { return t.kind } + +// testToolCall builds a ToolCallRequest with ToolKind set (matches stubTool → native). +func testToolCall(toolCallID, toolName string) base.ToolCallRequest { + return base.ToolCallRequest{ + ToolCallID: toolCallID, + ToolName: toolName, + ToolKind: types.ToolKindNative, + } +} + +func testToolCallNeedsApproval(toolCallID, toolName string) base.ToolCallRequest { + tc := testToolCall(toolCallID, toolName) + tc.NeedsApproval = true + return tc +} + // noopEmit discards all events. func noopEmit(_ events.AgentEvent) {} @@ -113,6 +136,58 @@ func TestRunAgentLoop_ToolCallThenFinalAnswer(t *testing.T) { require.Equal(t, "sum is 7", result.Content) } +func TestRunAgentLoop_ToolTelemetry_Success(t *testing.T) { + client := &seqLLMClient{ + responses: []*interfaces.LLMResponse{ + {ToolCalls: []*interfaces.ToolCall{{ToolCallID: "c1", ToolName: "ok"}}}, + {Content: "done"}, + }, + } + okTool := stubTool{name: "ok", result: "1"} + rt, tools := newLoopRT(t, 5, client, okTool) + + result, err := runLoop(context.Background(), rt, tools, AgentLoopInput{UserPrompt: "go"}) + require.NoError(t, err) + require.NotNil(t, result.Telemetry) + require.Equal(t, int64(1), result.Telemetry.Tools.TotalCalls) + require.Equal(t, int64(0), result.Telemetry.Tools.FailedCalls) + require.Equal(t, int64(1), result.Telemetry.Tools.Breakdown["ok"]) +} + +func TestRunAgentLoop_ToolTelemetry_ExecError(t *testing.T) { + client := &seqLLMClient{ + responses: []*interfaces.LLMResponse{ + {ToolCalls: []*interfaces.ToolCall{{ToolCallID: "c1", ToolName: "bad"}}}, + {Content: "done"}, + }, + } + badTool := stubTool{name: "bad", execErr: errors.New("boom")} + rt, tools := newLoopRT(t, 5, client, badTool) + + result, err := runLoop(context.Background(), rt, tools, AgentLoopInput{UserPrompt: "go"}) + require.NoError(t, err) + require.NotNil(t, result.Telemetry) + require.Equal(t, int64(1), result.Telemetry.Tools.TotalCalls) + require.Equal(t, int64(1), result.Telemetry.Tools.FailedCalls) + require.Equal(t, int64(1), result.Telemetry.Tools.FailedBreakdown["bad"]) +} + +func TestRunAgentLoop_ToolTelemetry_SkipsNonCountableKind(t *testing.T) { + client := &seqLLMClient{ + responses: []*interfaces.LLMResponse{ + {ToolCalls: []*interfaces.ToolCall{{ToolCallID: "c1", ToolName: "delegate"}}}, + {Content: "done"}, + }, + } + tool := stubKindTool{stubTool: stubTool{name: "delegate", result: "ok"}, kind: types.ToolKindSubAgent} + rt, tools := newLoopRT(t, 5, client, tool) + + result, err := runLoop(context.Background(), rt, tools, AgentLoopInput{UserPrompt: "go"}) + require.NoError(t, err) + require.NotNil(t, result.Telemetry) + require.Equal(t, int64(0), result.Telemetry.Tools.TotalCalls) +} + func TestRunAgentLoop_MaxIterationsForcesFinalCall(t *testing.T) { // With maxIter=1 and the only LLM response returning a tool call, the loop // must fire a second "forced final" LLM call (skipTools=true) and return its content. @@ -179,7 +254,7 @@ func TestRunAgentLoop_WithConversationID(t *testing.T) { history := []interfaces.Message{{Role: interfaces.MessageRoleUser, Content: "old message"}} conv.EXPECT().ListMessages(gomock.Any(), "conv-x", gomock.Any()).Return(history, nil) - // user + assistant = 2 messages persisted (history messages re-saved too). + // user + assistant persisted in one batch at run end. conv.EXPECT().AddMessage(gomock.Any(), "conv-x", gomock.Any()).Return(nil).AnyTimes() client := &seqLLMClient{ @@ -263,15 +338,21 @@ func TestRunAgentLoop_RetrieverPrefetch(t *testing.T) { result, err := runLoop(context.Background(), rt, nil, AgentLoopInput{UserPrompt: "fetch me"}) require.NoError(t, err) require.Equal(t, "answer with context", result.Content) + require.Equal(t, int64(1), result.Telemetry.Storage.TotalRetrieverSearches) + require.Equal(t, int64(0), result.Telemetry.Storage.FailedRetrieverSearches) + require.Equal(t, int64(1), result.Telemetry.Storage.PrefetchSearches) + require.Equal(t, int64(0), result.Telemetry.Storage.AgenticSearches) } -func TestRunAgentLoop_RetrieverPrefetchError(t *testing.T) { +func TestRunAgentLoop_RetrieverAllFailContinues(t *testing.T) { ctrl := gomock.NewController(t) ret := ifmocks.NewMockRetriever(ctrl) ret.EXPECT().Name().Return("kb").AnyTimes() ret.EXPECT().Search(gomock.Any(), gomock.Any()).Return(nil, errors.New("kb down")) - client := &seqLLMClient{} + client := &seqLLMClient{ + responses: []*interfaces.LLMResponse{{Content: "answer without context"}}, + } rt, err := NewLocalRuntime( WithLogger(logger.NoopLogger()), WithAgentConfig(sdkruntime.AgentConfig{ @@ -285,9 +366,11 @@ func TestRunAgentLoop_RetrieverPrefetchError(t *testing.T) { ) require.NoError(t, err) - _, err = runLoop(context.Background(), rt, nil, AgentLoopInput{UserPrompt: "fetch"}) - require.Error(t, err) - require.Contains(t, err.Error(), "retriever prefetch") + result, err := runLoop(context.Background(), rt, nil, AgentLoopInput{UserPrompt: "fetch"}) + require.NoError(t, err) + require.Equal(t, "answer without context", result.Content) + require.Equal(t, int64(1), result.Telemetry.Storage.TotalRetrieverSearches) + require.Equal(t, int64(1), result.Telemetry.Storage.FailedRetrieverSearches) } // --------------------------------------------------------------------------- @@ -355,16 +438,16 @@ func TestExecuteToolsParallel_AllSucceed(t *testing.T) { rt, tools := newLoopRT(t, 5, &seqLLMClient{}, t1, t2) calls := []base.ToolCallRequest{ - {ToolCallID: "c1", ToolName: "t1"}, - {ToolCallID: "c2", ToolName: "t2"}, + testToolCall("c1", "t1"), + testToolCall("c2", "t2"), } msgs, err := rt.executeToolsParallel(context.Background(), loopToolsInput(tools), "msg-1", calls, noopEmit) require.NoError(t, err) require.Len(t, msgs, 2) // Order must match submission order (parallel but results are indexed). - require.Equal(t, "r1", msgs[0].Content) - require.Equal(t, "r2", msgs[1].Content) + require.Equal(t, "r1", msgs[0].message.Content) + require.Equal(t, "r2", msgs[1].message.Content) } func TestExecuteToolsParallel_ToolErrorInMessage(t *testing.T) { @@ -372,11 +455,12 @@ func TestExecuteToolsParallel_ToolErrorInMessage(t *testing.T) { failing := stubTool{name: "bad", execErr: errors.New("boom")} rt, tools := newLoopRT(t, 5, &seqLLMClient{}, failing) - calls := []base.ToolCallRequest{{ToolCallID: "c1", ToolName: "bad"}} + calls := []base.ToolCallRequest{testToolCall("c1", "bad")} msgs, err := rt.executeToolsParallel(context.Background(), loopToolsInput(tools), "msg", calls, noopEmit) require.NoError(t, err) // parallel swallows into message require.Len(t, msgs, 1) - require.Contains(t, msgs[0].Content, "boom") + require.Contains(t, msgs[0].message.Content, "boom") + require.True(t, msgs[0].failed) } func TestExecuteToolsParallel_ResultsOrderPreserved(t *testing.T) { @@ -389,13 +473,13 @@ func TestExecuteToolsParallel_ResultsOrderPreserved(t *testing.T) { rt, tools := newLoopRT(t, 5, &seqLLMClient{}, toolSet...) calls := []base.ToolCallRequest{ - {ToolCallID: "1", ToolName: "a"}, - {ToolCallID: "2", ToolName: "b"}, - {ToolCallID: "3", ToolName: "c"}, + testToolCall("1", "a"), + testToolCall("2", "b"), + testToolCall("3", "c"), } msgs, err := rt.executeToolsParallel(context.Background(), loopToolsInput(tools), "m", calls, noopEmit) require.NoError(t, err) - require.Equal(t, []string{"A", "B", "C"}, []string{msgs[0].Content, msgs[1].Content, msgs[2].Content}) + require.Equal(t, []string{"A", "B", "C"}, []string{msgs[0].message.Content, msgs[1].message.Content, msgs[2].message.Content}) } // --------------------------------------------------------------------------- @@ -408,30 +492,27 @@ func TestExecuteToolsSequential_AllSucceed(t *testing.T) { rt, tools := newLoopRT(t, 5, &seqLLMClient{}, t1, t2) calls := []base.ToolCallRequest{ - {ToolCallID: "c1", ToolName: "s1"}, - {ToolCallID: "c2", ToolName: "s2"}, + testToolCall("c1", "s1"), + testToolCall("c2", "s2"), } msgs, err := rt.executeToolsSequential(context.Background(), loopToolsInput(tools), "msg", calls, noopEmit) require.NoError(t, err) require.Len(t, msgs, 2) - require.Equal(t, "v1", msgs[0].Content) - require.Equal(t, "v2", msgs[1].Content) + require.Equal(t, "v1", msgs[0].message.Content) + require.Equal(t, "v2", msgs[1].message.Content) } func TestExecuteToolsSequential_HardErrorOnContextCancel(t *testing.T) { - // A tool that blocks until ctx is cancelled → executeSingleTool returns ctx.Err(). - // Sequential should propagate that error. rt, _ := newLoopRT(t, 5, &seqLLMClient{}) - // Add a fake tool that needs approval with no channel or handler → unavailable (not an error). - // Instead: use a blocking LLM as a proxy — but we need a tool-level error. - // We'll cancel the context before calling. ctx, cancel := context.WithCancel(context.Background()) cancel() // pre-cancelled - calls := []base.ToolCallRequest{{ToolCallID: "c1", ToolName: "missing-tool"}} - _, err := rt.executeToolsSequential(ctx, AgentLoopInput{}, "msg", calls, noopEmit) - // AuthorizeTool returns error for unknown tool. - require.Error(t, err) + calls := []base.ToolCallRequest{testToolCall("c1", "missing-tool")} + results, err := rt.executeToolsSequential(ctx, AgentLoopInput{}, "msg", calls, noopEmit) + require.NoError(t, err) + require.Len(t, results, 1) + require.True(t, results[0].failed) + require.Contains(t, results[0].message.Content, "missing-tool") } // --------------------------------------------------------------------------- @@ -444,12 +525,12 @@ func TestExecuteSingleTool_Approved(t *testing.T) { emit, evs := captureEmit() msg, err := rt.executeSingleTool(context.Background(), loopToolsInput(tools), "msg-1", - base.ToolCallRequest{ToolCallID: "c1", ToolName: "my-tool"}, emit) + testToolCall("c1", "my-tool"), emit) require.NoError(t, err) - require.Equal(t, "hello", msg.Content) - require.Equal(t, interfaces.MessageRoleTool, msg.Role) - require.Equal(t, "my-tool", msg.ToolName) + require.Equal(t, "hello", msg.message.Content) + require.Equal(t, interfaces.MessageRoleTool, msg.message.Role) + require.Equal(t, "my-tool", msg.message.ToolName) etypes := eventTypes(*evs) require.Contains(t, etypes, events.AgentEventTypeToolCallStart) @@ -462,16 +543,17 @@ func TestExecuteSingleTool_ToolExecError(t *testing.T) { rt, tools := newLoopRT(t, 5, &seqLLMClient{}, tool) msg, err := rt.executeSingleTool(context.Background(), loopToolsInput(tools), "msg", - base.ToolCallRequest{ToolCallID: "c1", ToolName: "boom"}, noopEmit) + testToolCall("c1", "boom"), noopEmit) require.NoError(t, err) // tool errors become a content message, not a hard error - require.Contains(t, msg.Content, "exec failed") + require.Contains(t, msg.message.Content, "exec failed") + require.True(t, msg.failed) } func TestExecuteSingleTool_UnknownToolErrors(t *testing.T) { rt, _ := newLoopRT(t, 5, &seqLLMClient{}) // no tools registered _, err := rt.executeSingleTool(context.Background(), AgentLoopInput{}, "msg", - base.ToolCallRequest{ToolCallID: "c1", ToolName: "ghost"}, noopEmit) + testToolCall("c1", "ghost"), noopEmit) require.Error(t, err) require.Contains(t, err.Error(), "ghost") } @@ -492,9 +574,10 @@ func TestExecuteSingleTool_AuthorizationDenied(t *testing.T) { rt, tools := newLoopRT(t, 5, &seqLLMClient{}, authTool) msg, err := rt.executeSingleTool(context.Background(), loopToolsInput(tools), "msg", - base.ToolCallRequest{ToolCallID: "c1", ToolName: "restricted"}, noopEmit) + testToolCall("c1", "restricted"), noopEmit) require.NoError(t, err) - require.Contains(t, msg.Content, msgToolUnauthorized) + require.Contains(t, msg.message.Content, msgToolUnauthorized) + require.False(t, msg.failed) _ = tool } @@ -503,7 +586,7 @@ func TestExecuteSingleTool_AuthorizationError(t *testing.T) { rt, tools := newLoopRT(t, 5, &seqLLMClient{}, authTool) _, err := rt.executeSingleTool(context.Background(), loopToolsInput(tools), "msg", - base.ToolCallRequest{ToolCallID: "c1", ToolName: "err-tool"}, noopEmit) + testToolCall("c1", "err-tool"), noopEmit) require.Error(t, err) require.Contains(t, err.Error(), "auth backend down") } @@ -515,9 +598,9 @@ func TestExecuteSingleTool_ApprovalUnavailable(t *testing.T) { msg, err := rt.executeSingleTool(context.Background(), AgentLoopInput{ChannelName: "", ApprovalHandler: nil, Tools: tools}, "msg", - base.ToolCallRequest{ToolCallID: "c1", ToolName: "guarded", NeedsApproval: true}, noopEmit) + testToolCallNeedsApproval("c1", "guarded"), noopEmit) require.NoError(t, err) - require.Contains(t, msg.Content, msgToolApprovalUnavailable) + require.Contains(t, msg.message.Content, msgToolApprovalUnavailable) } func TestExecuteSingleTool_ApprovalHandlerApproves(t *testing.T) { @@ -530,9 +613,9 @@ func TestExecuteSingleTool_ApprovalHandlerApproves(t *testing.T) { msg, err := rt.executeSingleTool(context.Background(), AgentLoopInput{ApprovalHandler: handler, Tools: tools}, "msg", - base.ToolCallRequest{ToolCallID: "c1", ToolName: "guarded", NeedsApproval: true}, noopEmit) + testToolCallNeedsApproval("c1", "guarded"), noopEmit) require.NoError(t, err) - require.Equal(t, "ok", msg.Content) + require.Equal(t, "ok", msg.message.Content) } func TestExecuteSingleTool_ApprovalHandlerRejects(t *testing.T) { @@ -545,9 +628,9 @@ func TestExecuteSingleTool_ApprovalHandlerRejects(t *testing.T) { msg, err := rt.executeSingleTool(context.Background(), AgentLoopInput{ApprovalHandler: handler, Tools: tools}, "msg", - base.ToolCallRequest{ToolCallID: "c1", ToolName: "guarded", NeedsApproval: true}, noopEmit) + testToolCallNeedsApproval("c1", "guarded"), noopEmit) require.NoError(t, err) - require.Equal(t, msgToolRejected, msg.Content) + require.Equal(t, msgToolRejected, msg.message.Content) } func TestExecuteSingleTool_StreamingApproveUnblocks(t *testing.T) { @@ -585,16 +668,16 @@ func TestExecuteSingleTool_StreamingApproveUnblocks(t *testing.T) { done := make(chan struct{}) var ( - resultMsg interfaces.Message + result toolResult resultErr error ) go func() { defer close(done) - resultMsg, resultErr = rt.executeSingleTool( + result, resultErr = rt.executeSingleTool( context.Background(), AgentLoopInput{ChannelName: "some-channel", Tools: tools}, // streaming path "msg", - base.ToolCallRequest{ToolCallID: "c1", ToolName: "guarded", NeedsApproval: true}, + testToolCallNeedsApproval("c1", "guarded"), emit, ) }() @@ -613,7 +696,7 @@ func TestExecuteSingleTool_StreamingApproveUnblocks(t *testing.T) { <-done require.NoError(t, resultErr) - require.Equal(t, "stream-ok", resultMsg.Content) + require.Equal(t, "stream-ok", result.message.Content) } func TestExecuteSingleTool_ApprovalContextCancel(t *testing.T) { @@ -631,7 +714,7 @@ func TestExecuteSingleTool_ApprovalContextCancel(t *testing.T) { _, err := rt.executeSingleTool(ctx, AgentLoopInput{ChannelName: "some-channel", Tools: tools}, "msg", - base.ToolCallRequest{ToolCallID: "c1", ToolName: "guarded", NeedsApproval: true}, noopEmit) + testToolCallNeedsApproval("c1", "guarded"), noopEmit) <-done require.Error(t, err) @@ -675,11 +758,9 @@ func TestPublishEventToChannel_NoOpWhenNilEventbus(t *testing.T) { func TestPersistConversationMessages_NilConversation(t *testing.T) { rt, _ := newLoopRT(t, 5, &seqLLMClient{}) - // No conversation configured — must not panic or error. - err := persistConversationMessages(context.Background(), rt, "c", []interfaces.Message{ + rt.persistConversationMessages(context.Background(), "c", []interfaces.Message{ {Role: interfaces.MessageRoleUser, Content: "hi"}, }) - require.NoError(t, err) } func TestPersistConversationMessages_StoresAllMessages(t *testing.T) { @@ -702,14 +783,13 @@ func TestPersistConversationMessages_StoresAllMessages(t *testing.T) { {Role: interfaces.MessageRoleAssistant, Content: "2"}, {Role: interfaces.MessageRoleTool, Content: "3"}, } - err = persistConversationMessages(context.Background(), rt, "conv-1", msgs) - require.NoError(t, err) + rt.persistConversationMessages(context.Background(), "conv-1", msgs) } -func TestPersistConversationMessages_AddMessageErrorWarnsOnly(t *testing.T) { +func TestPersistConversationMessages_AddMessageErrorContinues(t *testing.T) { ctrl := gomock.NewController(t) conv := ifmocks.NewMockConversation(ctrl) - conv.EXPECT().AddMessage(gomock.Any(), "c", gomock.Any()).Return(errors.New("store err")).AnyTimes() + conv.EXPECT().AddMessage(gomock.Any(), "c", gomock.Any()).Return(errors.New("store err")) rt, err := NewLocalRuntime( WithLogger(logger.NoopLogger()), @@ -721,11 +801,33 @@ func TestPersistConversationMessages_AddMessageErrorWarnsOnly(t *testing.T) { ) require.NoError(t, err) - // persistConversationMessages returns nil even when AddMessage fails (warns only). - err = persistConversationMessages(context.Background(), rt, "c", []interfaces.Message{ + rt.persistConversationMessages(context.Background(), "c", []interfaces.Message{ {Role: interfaces.MessageRoleUser, Content: "hi"}, }) +} + +func TestPersistConversationMessages_ContinuesAfterFailure(t *testing.T) { + ctrl := gomock.NewController(t) + conv := ifmocks.NewMockConversation(ctrl) + gomock.InOrder( + conv.EXPECT().AddMessage(gomock.Any(), "c", gomock.Any()).Return(errors.New("first fail")), + conv.EXPECT().AddMessage(gomock.Any(), "c", gomock.Any()).Return(nil), + ) + + rt, err := NewLocalRuntime( + WithLogger(logger.NoopLogger()), + WithAgentConfig(sdkruntime.AgentConfig{ + LLM: sdkruntime.AgentLLM{Client: &seqLLMClient{}}, + Session: sdkruntime.AgentSession{Conversation: conv}, + Limits: sdkruntime.AgentLimits{Timeout: 5 * time.Second}, + }), + ) require.NoError(t, err) + + rt.persistConversationMessages(context.Background(), "c", []interfaces.Message{ + {Role: interfaces.MessageRoleUser, Content: "1"}, + {Role: interfaces.MessageRoleAssistant, Content: "2"}, + }) } // --------------------------------------------------------------------------- diff --git a/internal/runtime/local/runtime.go b/internal/runtime/local/runtime.go index 0b5d3dc..96208c5 100644 --- a/internal/runtime/local/runtime.go +++ b/internal/runtime/local/runtime.go @@ -143,7 +143,8 @@ func (rt *LocalRuntime) Execute(ctx context.Context, req *sdkruntime.ExecuteRequ AgentName: strings.TrimSpace(agentName), Model: rt.AgentConfig.LLM.Client.GetModel(), Metadata: map[string]any{}, - Usage: loopResult.Usage, + LLMUsage: loopResult.LLMUsage, + Telemetry: loopResult.Telemetry, }, nil } @@ -236,7 +237,8 @@ func (rt *LocalRuntime) ExecuteStream(ctx context.Context, req *sdkruntime.Execu AgentName: strings.TrimSpace(agentName), Model: rt.AgentConfig.LLM.Client.GetModel(), Metadata: map[string]any{}, - Usage: result.Usage, + LLMUsage: result.LLMUsage, + Telemetry: result.Telemetry, } rt.publishLifecycleEvent(channel, events.NewAgentRunFinishedEvent(threadID, runID, agentRunResult)) }() diff --git a/internal/runtime/temporal/agent_workflow.go b/internal/runtime/temporal/agent_workflow.go index 393a779..0077e9f 100644 --- a/internal/runtime/temporal/agent_workflow.go +++ b/internal/runtime/temporal/agent_workflow.go @@ -134,8 +134,10 @@ type AgentWorkflowInput struct { // AgentWorkflowState is the state of the agent workflow. // It is used to store the state of the agent workflow on continue-as-new. type AgentWorkflowState struct { - Iteration int `json:"iteration"` - Messages []interfaces.Message `json:"messages"` + Iteration int `json:"iteration"` + Messages []interfaces.Message `json:"messages"` + LLMUsage *interfaces.LLMUsage `json:"llm_usage,omitempty"` + Telemetry *types.AgentTelemetry `json:"telemetry,omitempty"` } // AgentRetrieverInput is the input to AgentRetrieverActivity. @@ -149,6 +151,8 @@ type AgentRetrieverInput struct { // documents were found. It is injected into the system prompt by AgentLLMActivity and AgentLLMStreamActivity. type AgentRetrieverResult struct { RetrieverContext string `json:"retriever_context,omitempty"` + TotalSearches int64 `json:"total_searches,omitempty"` + FailedSearches int64 `json:"failed_searches,omitempty"` } // AgentLLMInput is the input to AgentLLMActivity and AgentLLMStreamActivity. @@ -188,6 +192,7 @@ func baseLLMResultToActivity(r *base.LLMResult) *AgentLLMResult { ToolCallID: tc.ToolCallID, ToolName: tc.ToolName, ToolDisplayName: tc.ToolDisplayName, + ToolKind: tc.ToolKind, Args: tc.Args, NeedsApproval: tc.NeedsApproval, }) @@ -200,6 +205,7 @@ type ToolCallRequest struct { ToolCallID string `json:"tool_call_id"` // from LLM; used to match tool results ToolName string `json:"tool_name"` ToolDisplayName string `json:"tool_display_name,omitempty"` + ToolKind types.ToolKind `json:"tool_kind"` Args map[string]any `json:"args"` NeedsApproval bool `json:"needs_approval"` } @@ -219,9 +225,16 @@ type agentToolCallInput struct { // agentToolCallOutput is the output of executeAgentToolCall. type agentToolCallOutput struct { msg interfaces.Message + failed bool // true: hard err or ExecuteTool err streamingUnavailable bool } +// agentToolResult is one tool outcome collected for the conversation and telemetry. +type agentToolResult struct { + message interfaces.Message + failed bool +} + // AgentToolExecuteInput is the input to AgentToolExecuteActivity. type AgentToolExecuteInput struct { ToolName string `json:"tool_name"` @@ -381,13 +394,19 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl useStreaming := input.StreamingEnabled && rt.AgentConfig.LLM.Client.IsStreamSupported() - // State restored after ContinueAsNew (iteration + conversation messages). + // State restored after ContinueAsNew (iteration, messages, run telemetry). if input.State == nil { input.State = &AgentWorkflowState{ Iteration: 0, Messages: []interfaces.Message{{Role: interfaces.MessageRoleUser, Content: input.UserPrompt}}, } } + if input.State.Telemetry == nil { + input.State.Telemetry = base.NewAgentTelemetry(workflow.Now(ctx)) + } + telemetry := input.State.Telemetry + + llmUsage := input.State.LLMUsage messages := input.State.Messages @@ -411,11 +430,13 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl return nil, err } retrieverContext = retrieverResult.RetrieverContext + telemetry.Storage.TotalRetrieverSearches += retrieverResult.TotalSearches + telemetry.Storage.FailedRetrieverSearches += retrieverResult.FailedSearches + telemetry.Storage.PrefetchSearches += retrieverResult.TotalSearches logger.Debug("workflow: retriever prefetch done", "scope", "workflow", "hasContext", retrieverContext != "") } lastContent := "" - var runUsage *interfaces.LLMUsage var llmResult AgentLLMResult for iter := input.State.Iteration; iter < maxIter; iter++ { @@ -445,7 +466,8 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl return nil, err } - runUsage = base.MergeLLMUsage(runUsage, llmResult.Usage) + telemetry.Run.TotalLLMCalls++ + llmUsage = base.MergeLLMUsage(llmUsage, llmResult.Usage) if len(llmResult.ToolCalls) == 0 { // Final response: accumulate assistant message for conversation @@ -469,9 +491,11 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl } return nil, err } - runUsage = base.MergeLLMUsage(runUsage, llmResult.Usage) + llmUsage = base.MergeLLMUsage(llmUsage, llmResult.Usage) messages = append(messages, interfaces.Message{Role: interfaces.MessageRoleAssistant, Content: llmResult.Content}) lastContent = llmResult.Content + telemetry.Run.TotalLLMCalls++ + telemetry.Run.FinishReason = types.FinishReasonMaxIterations break } @@ -490,7 +514,7 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl } messages = append(messages, assistantMsg) - var toolResults []interfaces.Message + var toolResults []agentToolResult toolExecMode := rt.ToolExecutionMode if toolExecMode == "" { @@ -546,7 +570,7 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl }) } - toolResults = make([]interfaces.Message, len(futures)) + toolResults = make([]agentToolResult, len(futures)) for i, fut := range futures { tc := llmResult.ToolCalls[i] var v *agentToolCallOutput @@ -559,22 +583,26 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl "toolName", tc.ToolName, "toolCallID", tc.ToolCallID, "error", err) - // Tool failed — send error as tool result so LLM can handle it - toolResults[i] = interfaces.Message{ - Role: interfaces.MessageRoleTool, - Content: "Tool execution failed: " + err.Error(), - ToolName: tc.ToolName, - ToolCallID: tc.ToolCallID, + toolResults[i] = agentToolResult{ + message: interfaces.Message{ + Role: interfaces.MessageRoleTool, + Content: "Tool execution failed: " + err.Error(), + ToolName: tc.ToolName, + ToolCallID: tc.ToolCallID, + }, + failed: true, } } else { - // Success: branch always Set(non-nil *agentToolCallOutput, nil). logger.Debug("workflow: parallel tool future collected (ok)", "scope", "workflow", "toolIndex", i, "toolName", tc.ToolName, "toolCallID", tc.ToolCallID, "streamingUnavailable", v.streamingUnavailable) - toolResults[i] = v.msg + toolResults[i] = agentToolResult{ + message: v.msg, + failed: v.failed, + } if v.streamingUnavailable { streamingUnavailable = true } @@ -588,7 +616,7 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl "executionMode", string(types.AgentToolExecutionModeSequential), "toolCount", len(llmResult.ToolCalls)) toolInput := rt.newAgentToolCallInput(ctx, input, activityIDSuffix, messageID, emitAgentEvent, "") - // authorize / approve / execute, then TOOL_CALL_END + TOOL_CALL_RESULT. + toolResults = make([]agentToolResult, len(llmResult.ToolCalls)) for i, tc := range llmResult.ToolCalls { logger.Debug("workflow: sequential tool executing", "scope", "workflow", @@ -604,7 +632,16 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl "toolName", tc.ToolName, "toolCallID", tc.ToolCallID, "error", runErr) - return nil, runErr + toolResults[i] = agentToolResult{ + message: interfaces.Message{ + Role: interfaces.MessageRoleTool, + Content: "Tool execution failed: " + runErr.Error(), + ToolName: tc.ToolName, + ToolCallID: tc.ToolCallID, + }, + failed: true, + } + continue } if toolOutput.streamingUnavailable { streamingUnavailable = true @@ -615,14 +652,30 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl "toolName", tc.ToolName, "toolCallID", tc.ToolCallID, "streamingUnavailable", toolOutput.streamingUnavailable) - toolResults = append(toolResults, toolOutput.msg) + toolResults[i] = agentToolResult{ + message: toolOutput.msg, + failed: toolOutput.failed, + } } } default: return nil, fmt.Errorf("invalid tool execution mode %q: use %q or %q", toolExecMode, types.AgentToolExecutionModeParallel, types.AgentToolExecutionModeSequential) } - messages = append(messages, toolResults...) + for i, result := range toolResults { + tc := llmResult.ToolCalls[i] + if tc.ToolKind.CountsTowardToolTelemetry() { + telemetry.Tools.Record(tc.ToolName, result.failed) + } + if tc.ToolKind == types.ToolKindRetriever { + telemetry.Storage.TotalRetrieverSearches++ + telemetry.Storage.AgenticSearches++ + if result.failed { + telemetry.Storage.FailedRetrieverSearches++ + } + } + messages = append(messages, result.message) + } if rt.conversationMemoryEnabled(input.ConversationID) && rt.AgentConfig.Session.ConversationSaveOnIteration && len(messages) > 0 { if err := workflow.ExecuteActivity(convCtx, rt.AddConversationMessagesActivity, AddConversationMessagesInput{ @@ -648,9 +701,12 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl "historySizeBytes", info.GetCurrentHistorySize(), "historySizeBytesLimit", agentWorkflowHistorySizeBytes, ) + input.State = &AgentWorkflowState{ Iteration: iter + 1, Messages: messages, + LLMUsage: llmUsage, + Telemetry: telemetry, } return nil, workflow.NewContinueAsNewError(ctx, rt.AgentWorkflow, input) } @@ -672,9 +728,19 @@ func (rt *TemporalRuntime) AgentWorkflow(ctx workflow.Context, input AgentWorkfl // Log summary only; avoid full content to prevent leaking sensitive data logger.Info("workflow: agent run completed", "scope", "workflow", "contentLen", len(lastContent)) - return &types.AgentRunResult{ - Content: lastContent, AgentName: agentName, Model: model, Metadata: map[string]any{}, Usage: runUsage, - }, nil + + telemetry.Run.CompletedAt = workflow.Now(ctx) + + runResult := &types.AgentRunResult{ + Content: lastContent, + AgentName: agentName, + Model: model, + Metadata: map[string]any{}, + LLMUsage: llmUsage, + Telemetry: telemetry, + } + + return runResult, nil } func (rt *TemporalRuntime) conversationMemoryEnabled(conversationID string) bool { @@ -781,6 +847,7 @@ func (rt *TemporalRuntime) executeAgentToolCall(input agentToolCallInput, tc Too ToolName: tc.ToolName, ToolCallID: tc.ToolCallID, }, + failed: false, streamingUnavailable: false, }, nil } @@ -818,6 +885,7 @@ func (rt *TemporalRuntime) executeAgentToolCall(input agentToolCallInput, tc Too } var content string + failed := false switch approvalStatus { case types.ApprovalStatusApproved: if route, ok := input.input.SubAgentRoutes[tc.ToolName]; ok { @@ -848,6 +916,7 @@ func (rt *TemporalRuntime) executeAgentToolCall(input agentToolCallInput, tc Too errExec := workflow.ExecuteActivity(input.execCtx, rt.AgentToolExecuteActivity, execInput).Get(input.execCtx, &result) if errExec != nil { content = "Tool execution failed: " + errExec.Error() + failed = true } else { content = result } @@ -869,6 +938,7 @@ func (rt *TemporalRuntime) executeAgentToolCall(input agentToolCallInput, tc Too ToolName: tc.ToolName, ToolCallID: tc.ToolCallID, }, + failed: failed, streamingUnavailable: markStreamingUnavailable, }, nil } @@ -947,7 +1017,18 @@ func (rt *TemporalRuntime) AgentLLMStreamActivity(ctx context.Context, input Age rt.publishAgentEventToStream(ctx, agentName, input.LocalChannelName, input.EventWorkflowID, input.EventTaskQueue, ev) } - result, err := rt.ExecuteLLMStream(ctx, actLog, agentName, input.MessageID, messages, input.SkipTools, input.RetrieverContext, tools, emit) + executeLLMInput := base.ExecuteLLMInput{ + Logger: actLog, + AgentName: agentName, + MessageID: input.MessageID, + Messages: messages, + SkipTools: input.SkipTools, + RetrieverContext: input.RetrieverContext, + Tools: tools, + Emit: emit, + } + + result, err := rt.ExecuteLLMStream(ctx, executeLLMInput) if err != nil { return nil, err } @@ -964,11 +1045,15 @@ func (rt *TemporalRuntime) AgentRetrieverActivity(ctx context.Context, input Age return nil, err } actLog := newActivityLogger(activity.GetLogger(ctx)) - retrieverContext, err := rt.ExecuteRetrievers(ctx, actLog, input.UserPrompt) + res, err := rt.ExecuteRetrievers(ctx, actLog, input.UserPrompt) if err != nil { return nil, err } - return &AgentRetrieverResult{RetrieverContext: retrieverContext}, nil + return &AgentRetrieverResult{ + RetrieverContext: res.Context, + TotalSearches: res.TotalSearches, + FailedSearches: res.FailedSearches, + }, nil } // AgentLLMActivity calls the LLM and returns content plus any tool calls. @@ -997,7 +1082,18 @@ func (rt *TemporalRuntime) AgentLLMActivity(ctx context.Context, input AgentLLMI rt.publishAgentEventToStream(ctx, agentName, input.LocalChannelName, input.EventWorkflowID, input.EventTaskQueue, ev) } - result, err := rt.ExecuteLLM(ctx, actLog, agentName, input.MessageID, messages, input.SkipTools, input.RetrieverContext, tools, emit) + executeLLMInput := base.ExecuteLLMInput{ + Logger: actLog, + AgentName: agentName, + MessageID: input.MessageID, + Messages: messages, + SkipTools: input.SkipTools, + RetrieverContext: input.RetrieverContext, + Tools: tools, + Emit: emit, + } + + result, err := rt.ExecuteLLM(ctx, executeLLMInput) if err != nil { return nil, err } diff --git a/internal/runtime/temporal/agent_workflow_test.go b/internal/runtime/temporal/agent_workflow_test.go index 137fd4c..4ec5611 100644 --- a/internal/runtime/temporal/agent_workflow_test.go +++ b/internal/runtime/temporal/agent_workflow_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "testing" + "time" "github.com/golang/mock/gomock" "github.com/stretchr/testify/mock" @@ -49,6 +50,19 @@ func wireTestToolsResolver(rt *TemporalRuntime, tools []interfaces.Tool) { } } +func testWorkflowToolCall(toolCallID, toolName string, kind types.ToolKind, args map[string]any) ToolCallRequest { + if kind == "" { + kind = types.ToolKindNative + } + return ToolCallRequest{ + ToolCallID: toolCallID, + ToolName: toolName, + ToolDisplayName: toolName, + ToolKind: kind, + Args: args, + } +} + // newActivityTestEnv returns a [testsuite.TestActivityEnvironment] for isolated activity tests. func newActivityTestEnv(t *testing.T) *testsuite.TestActivityEnvironment { t.Helper() @@ -120,13 +134,8 @@ func TestAgentWorkflow_OneToolThenFinal(t *testing.T) { llmCalls++ if llmCalls == 1 { return &AgentLLMResult{ - Content: "using tool", - ToolCalls: []ToolCallRequest{{ - ToolCallID: "tc1", - ToolName: "echo", - ToolDisplayName: "Echo", - Args: map[string]any{"x": 1}, - }}, + Content: "using tool", + ToolCalls: []ToolCallRequest{testWorkflowToolCall("tc1", "echo", types.ToolKindNative, map[string]any{"x": 1})}, }, nil } return &AgentLLMResult{Content: "after tool", ToolCalls: nil}, nil @@ -150,6 +159,111 @@ func TestAgentWorkflow_OneToolThenFinal(t *testing.T) { require.NoError(t, env.GetWorkflowResult(&result)) require.Equal(t, "after tool", result.Content) require.Equal(t, 2, llmCalls) + require.NotNil(t, result.Telemetry) + require.Equal(t, int64(1), result.Telemetry.Tools.TotalCalls) + require.Equal(t, int64(0), result.Telemetry.Tools.FailedCalls) +} + +func TestAgentWorkflow_ToolTelemetry_ExecError(t *testing.T) { + var suite testsuite.WorkflowTestSuite + env := suite.NewTestWorkflowEnvironment() + rt := testRuntimeForWorkflow(t) + + var llmCalls int + env.RegisterWorkflow(rt.AgentWorkflow) + env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentLLMInput) (*AgentLLMResult, error) { + llmCalls++ + if llmCalls == 1 { + return &AgentLLMResult{ + Content: "using tool", + ToolCalls: []ToolCallRequest{testWorkflowToolCall("tc1", "bad", types.ToolKindNative, nil)}, + }, nil + } + return &AgentLLMResult{Content: "after tool", ToolCalls: nil}, nil + }) + env.OnActivity(rt.AgentToolExecuteActivity, mock.Anything, mock.Anything).Return("", fmt.Errorf("boom")) + env.OnActivity(rt.AgentToolAuthorizeActivity, mock.Anything, mock.Anything).Return(AgentToolAuthorizeResult{Allowed: true}, nil) + + env.ExecuteWorkflow(rt.AgentWorkflow, AgentWorkflowInput{UserPrompt: "run"}) + + require.True(t, env.IsWorkflowCompleted()) + var result types.AgentRunResult + require.NoError(t, env.GetWorkflowResult(&result)) + require.Equal(t, int64(1), result.Telemetry.Tools.TotalCalls) + require.Equal(t, int64(1), result.Telemetry.Tools.FailedCalls) +} + +func TestAgentWorkflow_ToolTelemetry_SkipsNonCountableKind(t *testing.T) { + var suite testsuite.WorkflowTestSuite + env := suite.NewTestWorkflowEnvironment() + rt := testRuntimeForWorkflow(t) + + var llmCalls int + env.RegisterWorkflow(rt.AgentWorkflow) + env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentLLMInput) (*AgentLLMResult, error) { + llmCalls++ + if llmCalls == 1 { + return &AgentLLMResult{ + Content: "using tool", + ToolCalls: []ToolCallRequest{testWorkflowToolCall("tc1", "delegate", types.ToolKindSubAgent, nil)}, + }, nil + } + return &AgentLLMResult{Content: "done", ToolCalls: nil}, nil + }) + env.OnActivity(rt.AgentToolExecuteActivity, mock.Anything, mock.Anything).Return("should not run", nil) + env.OnActivity(rt.AgentToolAuthorizeActivity, mock.Anything, mock.Anything).Return(AgentToolAuthorizeResult{Allowed: true}, nil) + + env.ExecuteWorkflow(rt.AgentWorkflow, AgentWorkflowInput{UserPrompt: "run"}) + + require.True(t, env.IsWorkflowCompleted()) + var result types.AgentRunResult + require.NoError(t, env.GetWorkflowResult(&result)) + require.Equal(t, int64(0), result.Telemetry.Tools.TotalCalls) +} + +func TestAgentWorkflow_SequentialMode_ContinuesOnToolError(t *testing.T) { + var suite testsuite.WorkflowTestSuite + env := suite.NewTestWorkflowEnvironment() + rt := testRuntimeForWorkflow(t) + rt.ToolExecutionMode = types.AgentToolExecutionModeSequential + + var llmCalls int + env.RegisterWorkflow(rt.AgentWorkflow) + env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentLLMInput) (*AgentLLMResult, error) { + llmCalls++ + if llmCalls == 1 { + return &AgentLLMResult{ + Content: "using tools", + ToolCalls: []ToolCallRequest{ + testWorkflowToolCall("tc1", "bad", types.ToolKindNative, nil), + testWorkflowToolCall("tc2", "ok", types.ToolKindNative, nil), + }, + }, nil + } + return &AgentLLMResult{Content: "after tools", ToolCalls: nil}, nil + }) + env.OnActivity(rt.AgentToolAuthorizeActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentToolAuthorizeInput) (AgentToolAuthorizeResult, error) { + if in.ToolName == "bad" { + return AgentToolAuthorizeResult{}, fmt.Errorf("auth backend down") + } + return AgentToolAuthorizeResult{Allowed: true}, nil + }) + env.OnActivity(rt.AgentToolExecuteActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentToolExecuteInput) (string, error) { + if in.ToolName != "ok" { + t.Errorf("unexpected execute for %q", in.ToolName) + } + return "ok result", nil + }) + + env.ExecuteWorkflow(rt.AgentWorkflow, AgentWorkflowInput{UserPrompt: "run"}) + + require.True(t, env.IsWorkflowCompleted()) + var result types.AgentRunResult + require.NoError(t, env.GetWorkflowResult(&result)) + require.Equal(t, "after tools", result.Content) + require.Equal(t, 2, llmCalls) + require.Equal(t, int64(2), result.Telemetry.Tools.TotalCalls) + require.Equal(t, int64(1), result.Telemetry.Tools.FailedCalls) } func TestAgentWorkflow_ToolAuthorizationDenied_SkipsExecute(t *testing.T) { @@ -163,13 +277,8 @@ func TestAgentWorkflow_ToolAuthorizationDenied_SkipsExecute(t *testing.T) { llmCalls++ if llmCalls == 1 { return &AgentLLMResult{ - Content: "using tool", - ToolCalls: []ToolCallRequest{{ - ToolCallID: "tc-auth-deny", - ToolName: "echo", - ToolDisplayName: "Echo", - Args: map[string]any{"x": 1}, - }}, + Content: "using tool", + ToolCalls: []ToolCallRequest{testWorkflowToolCall("tc-auth-deny", "echo", types.ToolKindNative, map[string]any{"x": 1})}, }, nil } return &AgentLLMResult{Content: "after deny", ToolCalls: nil}, nil @@ -277,6 +386,7 @@ func TestAgentLLMActivity_MockLLM_ToolCalls(t *testing.T) { require.Len(t, got.ToolCalls, 1) require.Equal(t, "echo", got.ToolCalls[0].ToolName) require.Equal(t, "tc1", got.ToolCalls[0].ToolCallID) + require.Equal(t, types.ToolKindNative, got.ToolCalls[0].ToolKind) require.False(t, got.ToolCalls[0].NeedsApproval) } @@ -443,13 +553,8 @@ func TestAgentWorkflow_ContinueAsNewOnHistoryLengthAfterTools(t *testing.T) { env.RegisterWorkflow(rt.AgentWorkflow) env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentLLMInput) (*AgentLLMResult, error) { return &AgentLLMResult{ - Content: "using tool", - ToolCalls: []ToolCallRequest{{ - ToolCallID: "tc-can", - ToolName: "echo", - ToolDisplayName: "Echo", - Args: map[string]any{"x": 1}, - }}, + Content: "using tool", + ToolCalls: []ToolCallRequest{testWorkflowToolCall("tc-can", "echo", types.ToolKindNative, map[string]any{"x": 1})}, }, nil }) env.OnActivity(rt.AgentToolExecuteActivity, mock.Anything, mock.Anything).Return("echo ok", nil) @@ -467,6 +572,57 @@ func TestAgentWorkflow_ContinueAsNewOnHistoryLengthAfterTools(t *testing.T) { require.True(t, workflow.IsContinueAsNewError(wfErr), "expected continue-as-new, got: %v", wfErr) } +func TestAgentWorkflow_ResumesTelemetryFromState(t *testing.T) { + var suite testsuite.WorkflowTestSuite + env := suite.NewTestWorkflowEnvironment() + rt := testRuntimeForWorkflow(t) + + priorStarted := time.Date(2025, 6, 1, 12, 0, 0, 0, time.UTC) + + env.RegisterWorkflow(rt.AgentWorkflow) + env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return( + &AgentLLMResult{ + Content: "done", + Usage: &interfaces.LLMUsage{TotalTokens: 50, PromptTokens: 30, CompletionTokens: 20}, + }, nil) + + env.ExecuteWorkflow(rt.AgentWorkflow, AgentWorkflowInput{ + UserPrompt: "run", + State: &AgentWorkflowState{ + Iteration: 0, + Messages: []interfaces.Message{ + {Role: interfaces.MessageRoleUser, Content: "run"}, + }, + LLMUsage: &types.LLMUsage{TotalTokens: 100}, + Telemetry: &types.AgentTelemetry{ + Run: types.RunTelemetry{ + StartedAt: priorStarted, + TotalLLMCalls: 2, + FinishReason: types.FinishReasonComplete, + }, + Tools: types.ToolTelemetry{ + TotalCalls: 4, + FailedCalls: 1, + Breakdown: map[string]int64{"prior": 4}, + }, + }, + }, + }) + + require.True(t, env.IsWorkflowCompleted()) + var result types.AgentRunResult + require.NoError(t, env.GetWorkflowResult(&result)) + require.NotNil(t, result.Telemetry) + require.NotNil(t, result.Telemetry.Run) + require.Equal(t, priorStarted, result.Telemetry.Run.StartedAt) + require.Equal(t, int64(3), result.Telemetry.Run.TotalLLMCalls) + require.Equal(t, int64(4), result.Telemetry.Tools.TotalCalls) + require.Equal(t, int64(1), result.Telemetry.Tools.FailedCalls) + require.NotNil(t, result.LLMUsage) + require.Equal(t, int64(150), result.LLMUsage.TotalTokens) + require.False(t, result.Telemetry.Run.CompletedAt.IsZero()) +} + func TestAgentWorkflow_ContinueAsNewOnHistorySizeAfterTools(t *testing.T) { var suite testsuite.WorkflowTestSuite env := suite.NewTestWorkflowEnvironment() @@ -478,13 +634,8 @@ func TestAgentWorkflow_ContinueAsNewOnHistorySizeAfterTools(t *testing.T) { env.RegisterWorkflow(rt.AgentWorkflow) env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return(func(ctx context.Context, in AgentLLMInput) (*AgentLLMResult, error) { return &AgentLLMResult{ - Content: "using tool", - ToolCalls: []ToolCallRequest{{ - ToolCallID: "tc-can-size", - ToolName: "echo", - ToolDisplayName: "Echo", - Args: map[string]any{"x": 1}, - }}, + Content: "using tool", + ToolCalls: []ToolCallRequest{testWorkflowToolCall("tc-can-size", "echo", types.ToolKindNative, map[string]any{"x": 1})}, }, nil }) env.OnActivity(rt.AgentToolExecuteActivity, mock.Anything, mock.Anything).Return("echo ok", nil) @@ -565,6 +716,8 @@ func TestAgentRetrieverActivity_SingleRetriever(t *testing.T) { require.Contains(t, got.RetrieverContext, "Go is a language") require.Contains(t, got.RetrieverContext, "docs.go.dev") require.Contains(t, got.RetrieverContext, "0.95") + require.Equal(t, int64(1), got.TotalSearches) + require.Equal(t, int64(0), got.FailedSearches) // Single retriever: no section header require.NotContains(t, got.RetrieverContext, "## kb") } @@ -598,6 +751,8 @@ func TestAgentRetrieverActivity_MultipleRetrievers_SectionHeaders(t *testing.T) require.Contains(t, got.RetrieverContext, "doc from r1") require.Contains(t, got.RetrieverContext, "## r2") require.Contains(t, got.RetrieverContext, "doc from r2") + require.Equal(t, int64(2), got.TotalSearches) + require.Equal(t, int64(0), got.FailedSearches) } func TestAgentRetrieverActivity_PartialFailure_ContinuesWithPartialContext(t *testing.T) { @@ -625,9 +780,11 @@ func TestAgentRetrieverActivity_PartialFailure_ContinuesWithPartialContext(t *te require.NoError(t, val.Get(&got)) require.Contains(t, got.RetrieverContext, "good doc") require.NotContains(t, got.RetrieverContext, "bad") + require.Equal(t, int64(2), got.TotalSearches) + require.Equal(t, int64(1), got.FailedSearches) } -func TestAgentRetrieverActivity_AllFail_ReturnsError(t *testing.T) { +func TestAgentRetrieverActivity_AllFail_ContinuesWithEmptyContext(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() @@ -639,9 +796,13 @@ func TestAgentRetrieverActivity_AllFail_ReturnsError(t *testing.T) { actEnv := newActivityTestEnv(t) actEnv.RegisterActivity(rt.AgentRetrieverActivity) - _, err := actEnv.ExecuteActivity(rt.AgentRetrieverActivity, AgentRetrieverInput{UserPrompt: "q"}) - require.Error(t, err) - require.Contains(t, err.Error(), "all") + val, err := actEnv.ExecuteActivity(rt.AgentRetrieverActivity, AgentRetrieverInput{UserPrompt: "q"}) + require.NoError(t, err) + var got AgentRetrieverResult + require.NoError(t, val.Get(&got)) + require.Equal(t, "", got.RetrieverContext) + require.Equal(t, int64(1), got.TotalSearches) + require.Equal(t, int64(1), got.FailedSearches) } func TestAgentRetrieverActivity_EmptyDocs_EmptyContext(t *testing.T) { @@ -706,7 +867,7 @@ func TestAgentWorkflow_PrefetchMode_CallsRetrieverActivityFirst(t *testing.T) { func(ctx context.Context, in AgentRetrieverInput) (*AgentRetrieverResult, error) { retrieverCalled = true require.Equal(t, "user query", in.UserPrompt) - return &AgentRetrieverResult{RetrieverContext: "[1] prefetched doc"}, nil + return &AgentRetrieverResult{RetrieverContext: "[1] prefetched doc", TotalSearches: 1, FailedSearches: 0}, nil }) env.OnActivity(rt.AgentLLMActivity, mock.Anything, mock.Anything).Return( @@ -724,6 +885,11 @@ func TestAgentWorkflow_PrefetchMode_CallsRetrieverActivityFirst(t *testing.T) { var result types.AgentRunResult require.NoError(t, env.GetWorkflowResult(&result)) require.Equal(t, "answer", result.Content) + require.NotNil(t, result.Telemetry) + require.Equal(t, int64(1), result.Telemetry.Storage.TotalRetrieverSearches) + require.Equal(t, int64(0), result.Telemetry.Storage.FailedRetrieverSearches) + require.Equal(t, int64(1), result.Telemetry.Storage.PrefetchSearches) + require.Equal(t, int64(0), result.Telemetry.Storage.AgenticSearches) } func TestAgentWorkflow_AgenticMode_SkipsRetrieverActivity(t *testing.T) { diff --git a/internal/runtime/temporal/approval.go b/internal/runtime/temporal/approval.go new file mode 100644 index 0000000..216ad2e --- /dev/null +++ b/internal/runtime/temporal/approval.go @@ -0,0 +1,82 @@ +package temporal + +import ( + "errors" + "fmt" + + "github.com/agenticenv/agent-sdk-go/internal/events" + "github.com/agenticenv/agent-sdk-go/internal/types" +) + +// ErrNotApprovalCustomEvent means the CUSTOM event name is not tool or delegation approval. +var ErrNotApprovalCustomEvent = errors.New("temporal: custom event is not a recognized approval kind") + +// toolApprovalFromEventValue copies the CUSTOM approval payload into an SDK approval value. +func toolApprovalFromEventValue(ev events.AgentCustomEventApprovalValue) types.ToolApprovalRequestValue { + return types.ToolApprovalRequestValue{ + AgentName: ev.AgentName, + ToolCallID: ev.ToolCallID, + ToolName: ev.ToolName, + ToolDisplayName: ev.ToolDisplayName, + Args: cloneArgsMap(ev.Args), + ApprovalToken: ev.ApprovalToken, + } +} + +// delegationApprovalFromEventValue copies the CUSTOM delegation payload into an SDK approval value. +func delegationApprovalFromEventValue(ev events.AgentCustomEventDelegationValue) types.SubAgentDelegationApprovalRequestValue { + return types.SubAgentDelegationApprovalRequestValue{ + AgentName: ev.AgentName, + SubAgentName: ev.SubAgentName, + Args: cloneArgsMap(ev.Args), + ApprovalToken: ev.ApprovalToken, + } +} + +// prepareApprovalFromCustomEvent parses a CUSTOM event and returns Name + Value as SDK types and the approval token for Temporal CompleteActivity. +// Respond is nil; the caller must set it before calling types.ApprovalHandler. +// Returns ErrNotApprovalCustomEvent when ev.Name is not tool or delegation approval. +func prepareApprovalFromCustomEvent(ev *events.AgentCustomEvent) (req *types.ApprovalRequest, approvalToken string, err error) { + if ev == nil { + return nil, "", fmt.Errorf("temporal: nil custom event") + } + switch events.AgentCustomEventName(ev.Name) { + case events.AgentCustomEventNameToolApproval: + raw, err := events.ParseCustomEventApproval(ev) + if err != nil { + return nil, "", err + } + v := toolApprovalFromEventValue(raw) + return &types.ApprovalRequest{ + Name: types.ApprovalRequestNameTool, + Value: v, + }, + v.ApprovalToken, + nil + case events.AgentCustomEventNameSubAgentDelegation: + raw, err := events.ParseCustomEventDelegation(ev) + if err != nil { + return nil, "", err + } + v := delegationApprovalFromEventValue(raw) + return &types.ApprovalRequest{ + Name: types.ApprovalRequestNameSubAgent, + Value: v, + }, + v.ApprovalToken, + nil + default: + return nil, "", ErrNotApprovalCustomEvent + } +} + +func cloneArgsMap(m map[string]any) map[string]any { + if m == nil { + return nil + } + out := make(map[string]any, len(m)) + for k, v := range m { + out[k] = v + } + return out +} diff --git a/internal/runtime/temporal/approval_test.go b/internal/runtime/temporal/approval_test.go new file mode 100644 index 0000000..f6c2c60 --- /dev/null +++ b/internal/runtime/temporal/approval_test.go @@ -0,0 +1,146 @@ +package temporal + +import ( + "errors" + "strings" + "testing" + + "github.com/agenticenv/agent-sdk-go/internal/events" + "github.com/agenticenv/agent-sdk-go/internal/types" +) + +func TestPrepareApprovalFromCustomEvent_NilEvent(t *testing.T) { + req, token, err := prepareApprovalFromCustomEvent(nil) + if req != nil || token != "" || err == nil { + t.Fatalf("expected nil req/token and error, got req=%v token=%q err=%v", req, token, err) + } + if !strings.Contains(err.Error(), "nil custom event") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestPrepareApprovalFromCustomEvent_UnknownName(t *testing.T) { + ev := events.NewAgentCustomEvent("other_custom", map[string]any{"x": 1}) + req, token, err := prepareApprovalFromCustomEvent(ev) + if req != nil || token != "" { + t.Fatalf("expected nil req/token, got req=%v token=%q", req, token) + } + if !errors.Is(err, ErrNotApprovalCustomEvent) { + t.Fatalf("expected ErrNotApprovalCustomEvent, got %v", err) + } +} + +func TestPrepareApprovalFromCustomEvent_ToolApprovalTypedValue(t *testing.T) { + ev := events.NewAgentCustomEvent(string(events.AgentCustomEventNameToolApproval), &events.AgentCustomEventApprovalValue{ + AgentName: "agent-a", + ToolCallID: "call-1", + ToolName: "calculator", + ToolDisplayName: "Calculator", + Args: map[string]any{"x": 1}, + ApprovalToken: "tok-tool", + }) + + req, token, err := prepareApprovalFromCustomEvent(ev) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if token != "tok-tool" { + t.Fatalf("token: got %q want tok-tool", token) + } + if req == nil || req.Name != types.ApprovalRequestNameTool { + t.Fatalf("unexpected req: %#v", req) + } + + parsed, err := types.ParseToolApproval(req) + if err != nil { + t.Fatalf("ParseToolApproval: %v", err) + } + if parsed.AgentName != "agent-a" || parsed.ToolCallID != "call-1" || parsed.ToolName != "calculator" || + parsed.ToolDisplayName != "Calculator" || parsed.ApprovalToken != "tok-tool" { + t.Fatalf("unexpected parsed tool approval: %#v", parsed) + } + if parsed.Args["x"] != float64(1) && parsed.Args["x"] != 1 { + t.Fatalf("unexpected args: %#v", parsed.Args) + } +} + +func TestPrepareApprovalFromCustomEvent_ToolApprovalMapValue(t *testing.T) { + ev := events.NewAgentCustomEvent(string(events.AgentCustomEventNameToolApproval), map[string]any{ + "toolName": "search", + "approvalToken": "tok-map", + "args": map[string]any{"q": "hello"}, + }) + + req, token, err := prepareApprovalFromCustomEvent(ev) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if token != "tok-map" { + t.Fatalf("token: got %q want tok-map", token) + } + parsed, err := types.ParseToolApproval(req) + if err != nil { + t.Fatalf("ParseToolApproval: %v", err) + } + if parsed.ToolName != "search" || parsed.ApprovalToken != "tok-map" { + t.Fatalf("unexpected parsed: %#v", parsed) + } +} + +func TestPrepareApprovalFromCustomEvent_Delegation(t *testing.T) { + ev := events.NewAgentCustomEvent(string(events.AgentCustomEventNameSubAgentDelegation), map[string]any{ + "agentName": "parent", + "subAgentName": "child", + "approvalToken": "tok-delegate", + "args": map[string]any{"task": "summarize"}, + }) + + req, token, err := prepareApprovalFromCustomEvent(ev) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if token != "tok-delegate" { + t.Fatalf("token: got %q want tok-delegate", token) + } + if req == nil || req.Name != types.ApprovalRequestNameSubAgent { + t.Fatalf("unexpected req: %#v", req) + } + + parsed, err := types.ParseDelegationApproval(req) + if err != nil { + t.Fatalf("ParseDelegationApproval: %v", err) + } + if parsed.AgentName != "parent" || parsed.SubAgentName != "child" || parsed.ApprovalToken != "tok-delegate" { + t.Fatalf("unexpected parsed delegation: %#v", parsed) + } +} + +func TestPrepareApprovalFromCustomEvent_ClonesArgs(t *testing.T) { + args := map[string]any{"k": "v"} + ev := events.NewAgentCustomEvent(string(events.AgentCustomEventNameToolApproval), &events.AgentCustomEventApprovalValue{ + ToolName: "t", + ApprovalToken: "tok", + Args: args, + }) + + req, _, err := prepareApprovalFromCustomEvent(ev) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + parsed, err := types.ParseToolApproval(req) + if err != nil { + t.Fatalf("ParseToolApproval: %v", err) + } + parsed.Args["k"] = "mutated" + if args["k"] != "v" { + t.Fatal("expected args map to be cloned, source was mutated") + } +} + +func TestPrepareApprovalFromCustomEvent_ParseError(t *testing.T) { + ev := events.NewAgentCustomEvent(string(events.AgentCustomEventNameToolApproval), 123) + _, _, err := prepareApprovalFromCustomEvent(ev) + if err == nil { + t.Fatal("expected parse error for invalid value shape") + } +} diff --git a/internal/runtime/temporal/runtime.go b/internal/runtime/temporal/runtime.go index 2bca17b..743a282 100644 --- a/internal/runtime/temporal/runtime.go +++ b/internal/runtime/temporal/runtime.go @@ -429,9 +429,9 @@ func (rt *TemporalRuntime) Execute(ctx context.Context, req *runtime.ExecuteRequ if !ok { continue } - apprReq, token, err := types.PrepareApprovalFromCustomEvent(approvalEv) + apprReq, token, err := prepareApprovalFromCustomEvent(approvalEv) if err != nil { - if errors.Is(err, types.ErrNotApprovalCustomEvent) { + if errors.Is(err, ErrNotApprovalCustomEvent) { continue } rt.logger.Error(runCtx, "runtime approval custom event decode failed", slog.String("scope", "runtime"), slog.Any("error", err)) diff --git a/internal/runtime/temporal/runtime_test.go b/internal/runtime/temporal/runtime_test.go index be46b02..15f276c 100644 --- a/internal/runtime/temporal/runtime_test.go +++ b/internal/runtime/temporal/runtime_test.go @@ -68,37 +68,44 @@ func TestAgentNameFromRuntime(t *testing.T) { func TestSyntheticStreamCompleteEvent(t *testing.T) { ev := syntheticStreamCompleteEvent(nil, "threadID", "runID", "root") - if ev == nil || ev.Type() != events.AgentEventTypeRunFinished || ev.(*events.AgentRunFinishedEvent).Result.(*types.AgentRunResult).AgentName != "root" { + fin, _ := ev.(*events.AgentRunFinishedEvent) + if ev == nil || ev.Type() != events.AgentEventTypeRunFinished || fin.Result == nil || fin.Result.AgentName != "root" { t.Fatalf("nil resp: %+v", ev) } ev2 := syntheticStreamCompleteEvent(&types.AgentRunResult{ Content: "body", AgentName: "from-result", - Usage: &types.LLMUsage{TotalTokens: 9}, + LLMUsage: &types.LLMUsage{TotalTokens: 9}, }, "threadID", "runID", "root") - result, ok := ev2.(*events.AgentRunFinishedEvent).Result.(*types.AgentRunResult) - if !ok { - t.Fatalf("expected AgentRunResult, got %T", ev2.(*events.AgentRunFinishedEvent).Result) + fin2, _ := ev2.(*events.AgentRunFinishedEvent) + result := fin2.Result + if result == nil { + t.Fatalf("expected AgentRunResult, got nil") } - if result.Content != "body" || result.AgentName != "from-result" || result.Usage.TotalTokens != 9 { + if result.LLMUsage == nil { + t.Fatal("llm usage should be set") + } + if result.Content != "body" || result.AgentName != "from-result" || result.LLMUsage.TotalTokens != 9 { t.Fatalf("with AgentName: %+v", ev2) } ev3 := syntheticStreamCompleteEvent(&types.AgentRunResult{Content: "c", AgentName: ""}, "threadID", "runID", "fallback") - result, ok = ev3.(*events.AgentRunFinishedEvent).Result.(*types.AgentRunResult) - if !ok { - t.Fatalf("expected AgentRunResult, got %T", ev3.(*events.AgentRunFinishedEvent).Result) + fin3, _ := ev3.(*events.AgentRunFinishedEvent) + result = fin3.Result + if result == nil { + t.Fatalf("expected AgentRunResult, got nil") } if result.AgentName != "fallback" { t.Fatalf("fallback name: got %q", result.AgentName) } ev4 := syntheticStreamCompleteEvent(&types.AgentRunResult{Content: "only"}, "threadID", "runID", "") - result, ok = ev4.(*events.AgentRunFinishedEvent).Result.(*types.AgentRunResult) - if !ok { - t.Fatalf("expected AgentRunResult, got %T", ev4.(*events.AgentRunFinishedEvent).Result) + fin4, _ := ev4.(*events.AgentRunFinishedEvent) + result = fin4.Result + if result == nil { + t.Fatalf("expected AgentRunResult, got nil") } if result.AgentName != "" { t.Fatalf("empty rootName with empty resp.AgentName: got %q", result.AgentName) diff --git a/internal/types/agent.go b/internal/types/agent.go index 384df0a..815bb70 100644 --- a/internal/types/agent.go +++ b/internal/types/agent.go @@ -24,7 +24,11 @@ type AgentRunResult struct { Model string `json:"model"` Metadata map[string]any `json:"metadata"` // Usage is the sum of token usage across all LLM calls in this run (when reported by the provider). - Usage *LLMUsage `json:"usage,omitempty"` + // Usage acts as the historical root for aggregated token counters. + LLMUsage *LLMUsage `json:"llm_usage,omitempty"` + + // Telemetry contains the strongly typed nested metrics domain payload. + Telemetry *AgentTelemetry `json:"telemetry,omitempty"` } // AgentRunAsyncResult is the single outcome from AgentRunAsync. After the channel closes, Err is non-nil diff --git a/internal/types/approval.go b/internal/types/approval.go index 0f93db4..e61a22c 100644 --- a/internal/types/approval.go +++ b/internal/types/approval.go @@ -6,16 +6,11 @@ import ( "errors" "fmt" "time" - - "github.com/agenticenv/agent-sdk-go/internal/events" ) // maxApprovalTimeout caps how long a single approval wait may last in the run. const MaxApprovalTimeout = 31 * 24 * time.Hour -// ErrNotApprovalCustomEvent means the CUSTOM event name is not tool or delegation approval. -var ErrNotApprovalCustomEvent = errors.New("types: custom event is not a recognized approval kind") - type ApprovalStatus string const ( @@ -45,7 +40,7 @@ const ( // ApprovalRequest is one pending approval callback. Name + Value match CUSTOM semantics; // Value is a [ToolApprovalRequestValue] or [SubAgentDelegationApprovalRequestValue]. -// Set Respond before invoking the handler (see [PrepareApprovalFromCustomEvent]). +// Set Respond before invoking the handler. type ApprovalRequest struct { Name ApprovalRequestName `json:"name,omitempty"` Value any `json:"value,omitempty"` @@ -70,76 +65,6 @@ type SubAgentDelegationApprovalRequestValue struct { ApprovalToken string `json:"approvalToken,omitempty"` } -// ToolApprovalFromEventValue copies the CUSTOM approval payload into an SDK approval value. -func ToolApprovalFromEventValue(ev events.AgentCustomEventApprovalValue) ToolApprovalRequestValue { - return ToolApprovalRequestValue{ - AgentName: ev.AgentName, - ToolCallID: ev.ToolCallID, - ToolName: ev.ToolName, - ToolDisplayName: ev.ToolDisplayName, - Args: cloneArgsMap(ev.Args), - ApprovalToken: ev.ApprovalToken, - } -} - -// DelegationApprovalFromEventValue copies the CUSTOM delegation payload into an SDK approval value. -func DelegationApprovalFromEventValue(ev events.AgentCustomEventDelegationValue) SubAgentDelegationApprovalRequestValue { - return SubAgentDelegationApprovalRequestValue{ - AgentName: ev.AgentName, - SubAgentName: ev.SubAgentName, - Args: cloneArgsMap(ev.Args), - ApprovalToken: ev.ApprovalToken, - } -} - -func cloneArgsMap(m map[string]any) map[string]any { - if m == nil { - return nil - } - out := make(map[string]any, len(m)) - for k, v := range m { - out[k] = v - } - return out -} - -// PrepareApprovalFromCustomEvent parses a CUSTOM event and returns Name + Value as SDK types and the approval token for Temporal CompleteActivity. -// Respond is nil; the caller must set it before calling [ApprovalHandler]. -// Returns [ErrNotApprovalCustomEvent] when ev.Name is not tool or delegation approval. -func PrepareApprovalFromCustomEvent(ev *events.AgentCustomEvent) (req *ApprovalRequest, approvalToken string, err error) { - if ev == nil { - return nil, "", fmt.Errorf("types: nil custom event") - } - switch events.AgentCustomEventName(ev.Name) { - case events.AgentCustomEventNameToolApproval: - raw, err := events.ParseCustomEventApproval(ev) - if err != nil { - return nil, "", err - } - v := ToolApprovalFromEventValue(raw) - return &ApprovalRequest{ - Name: ApprovalRequestNameTool, - Value: v, - }, - v.ApprovalToken, - nil - case events.AgentCustomEventNameSubAgentDelegation: - raw, err := events.ParseCustomEventDelegation(ev) - if err != nil { - return nil, "", err - } - v := DelegationApprovalFromEventValue(raw) - return &ApprovalRequest{ - Name: ApprovalRequestNameSubAgent, - Value: v, - }, - v.ApprovalToken, - nil - default: - return nil, "", ErrNotApprovalCustomEvent - } -} - func parseApprovalPayload[V any](v any) (out V, err error) { if v == nil { return out, fmt.Errorf("types: nil approval value") diff --git a/internal/types/telemetry.go b/internal/types/telemetry.go new file mode 100644 index 0000000..69c4cc1 --- /dev/null +++ b/internal/types/telemetry.go @@ -0,0 +1,96 @@ +package types + +import "time" + +// AgentTelemetry is the unified container for operational insights across +// a single agent run, run lifecycle, tool calls, and storage operations. +type AgentTelemetry struct { + // Run captures the orchestration lifecycle metrics for the run. + Run RunTelemetry `json:"run"` + + // Tools tracks tool invocation counts and breakdowns for the run. + Tools ToolTelemetry `json:"tools"` + + // Storage tracks data storage and retrieval operations for the run. + Storage StorageTelemetry `json:"storage"` +} + +type FinishReason string + +const ( + // FinishReasonComplete indicates that the agent run completed normally. + FinishReasonComplete FinishReason = "complete" + // FinishReasonMaxIterations indicates that the agent run completed because the maximum number of iterations was reached. + FinishReasonMaxIterations FinishReason = "max_iterations" +) + +// RunTelemetry captures the orchestration lifecycle metrics for a single agent run. +type RunTelemetry struct { + // StartedAt tracks the start time of the agent run. + StartedAt time.Time `json:"started_at"` + + // CompletedAt tracks the completion time of the agent run. + CompletedAt time.Time `json:"completed_at"` + + // TotalLLMCalls counts how many LLM calls were made during the run. + TotalLLMCalls int64 `json:"total_llm_calls"` + + // FinishReason explains how the run concluded. See FinishReason for possible values. + FinishReason FinishReason `json:"finish_reason"` +} + +// ToolTelemetry tracks tool invocation counts and per-tool breakdowns across a single agent run. +type ToolTelemetry struct { + // TotalCalls is the total number of tool invocations made by the agent. + TotalCalls int64 `json:"total_calls"` + + // FailedCalls is the number of tool invocations that returned an error. + // Excludes approval-denied and unauthorized cases. + FailedCalls int64 `json:"failed_calls"` + + // Breakdown tracks invocation counts per tool name. + // Key: tool name (e.g. "palo_alto_fw_lookup"), Value: invocation count + Breakdown map[string]int64 `json:"breakdown,omitempty"` + + // FailedBreakdown tracks failed invocation counts per tool name. + // Excludes approval-denied and unauthorized cases. + // Key: tool name (e.g. "palo_alto_fw_lookup"), Value: failed invocation count + FailedBreakdown map[string]int64 `json:"failed_breakdown,omitempty"` +} + +// Record increments tool invocation counters for name. When failed is true, failed counters are updated too. +// Breakdown keys are omitted when name is empty. Caller must serialize concurrent Record calls (e.g. mutex in parallel tool execution). +func (t *ToolTelemetry) Record(name string, failed bool) { + if t == nil { + return + } + t.TotalCalls++ + if t.Breakdown == nil { + t.Breakdown = make(map[string]int64) + } + if name != "" { + t.Breakdown[name]++ + } + if !failed { + return + } + t.FailedCalls++ + if t.FailedBreakdown == nil { + t.FailedBreakdown = make(map[string]int64) + } + if name != "" { + t.FailedBreakdown[name]++ + } +} + +// StorageTelemetry tracks RAG retrieval operations across prefetch, agentic, and hybrid modes. +// All fields are zero when no retriever is configured. +type StorageTelemetry struct { + // Retriever — RAG searches (prefetch/agentic/hybrid), zero if not configured. + TotalRetrieverSearches int64 `json:"total_retriever_searches"` + FailedRetrieverSearches int64 `json:"failed_retriever_searches"` + + // Breakdown by mode — zero if mode not used. + PrefetchSearches int64 `json:"prefetch_searches,omitempty"` + AgenticSearches int64 `json:"agentic_searches,omitempty"` +} diff --git a/internal/types/telemetry_test.go b/internal/types/telemetry_test.go new file mode 100644 index 0000000..3eadf68 --- /dev/null +++ b/internal/types/telemetry_test.go @@ -0,0 +1,61 @@ +package types + +import "testing" + +func TestToolTelemetry_Record_nilReceiver(t *testing.T) { + var tels *ToolTelemetry + tels.Record("calc", false) +} + +func TestToolTelemetry_Record_success(t *testing.T) { + var tools ToolTelemetry + tools.Record("calculator", false) + if tools.TotalCalls != 1 || tools.FailedCalls != 0 { + t.Fatalf("calls: total=%d failed=%d", tools.TotalCalls, tools.FailedCalls) + } + if tools.Breakdown["calculator"] != 1 { + t.Fatalf("breakdown: %#v", tools.Breakdown) + } + if len(tools.FailedBreakdown) != 0 { + t.Fatalf("unexpected failed breakdown: %#v", tools.FailedBreakdown) + } +} + +func TestToolTelemetry_Record_failed(t *testing.T) { + var tools ToolTelemetry + tools.Record("weather", true) + if tools.TotalCalls != 1 || tools.FailedCalls != 1 { + t.Fatalf("calls: total=%d failed=%d", tools.TotalCalls, tools.FailedCalls) + } + if tools.Breakdown["weather"] != 1 || tools.FailedBreakdown["weather"] != 1 { + t.Fatalf("breakdown=%#v failed=%#v", tools.Breakdown, tools.FailedBreakdown) + } +} + +func TestToolTelemetry_Record_multiple(t *testing.T) { + var tools ToolTelemetry + tools.Record("calculator", false) + tools.Record("calculator", true) + tools.Record("mcp_srv_search", false) + + if tools.TotalCalls != 3 || tools.FailedCalls != 1 { + t.Fatalf("calls: total=%d failed=%d", tools.TotalCalls, tools.FailedCalls) + } + if tools.Breakdown["calculator"] != 2 || tools.Breakdown["mcp_srv_search"] != 1 { + t.Fatalf("breakdown: %#v", tools.Breakdown) + } + if tools.FailedBreakdown["calculator"] != 1 { + t.Fatalf("failed breakdown: %#v", tools.FailedBreakdown) + } +} + +func TestToolTelemetry_Record_emptyName(t *testing.T) { + var tools ToolTelemetry + tools.Record("", true) + if tools.TotalCalls != 1 || tools.FailedCalls != 1 { + t.Fatalf("calls: total=%d failed=%d", tools.TotalCalls, tools.FailedCalls) + } + if len(tools.Breakdown) != 0 || len(tools.FailedBreakdown) != 0 { + t.Fatalf("expected no breakdown keys for empty name, got breakdown=%#v failed=%#v", tools.Breakdown, tools.FailedBreakdown) + } +} diff --git a/internal/types/tool.go b/internal/types/tool.go index f1e15bb..42d5ee8 100644 --- a/internal/types/tool.go +++ b/internal/types/tool.go @@ -14,3 +14,42 @@ type JSONSchema map[string]any func (s JSONSchema) MarshalJSON() ([]byte, error) { return json.Marshal(map[string]any(s)) } + +// ToolKind classifies SDK-built tool wrappers. User-registered tools default to [ToolKindNative]. +type ToolKind string + +const ( + ToolKindNative ToolKind = "native" + ToolKindMCP ToolKind = "mcp" + ToolKindA2A ToolKind = "a2a" + ToolKindSubAgent ToolKind = "sub_agent" + ToolKindRetriever ToolKind = "retriever" +) + +// ToolKindProvider is implemented by SDK tool wrappers (MCP, A2A, sub-agent, retriever). +type ToolKindProvider interface { + ToolKind() ToolKind +} + +// KindOf returns the tool kind when t implements [ToolKindProvider], otherwise [ToolKindNative]. +func KindOf(t any) ToolKind { + if t == nil { + return ToolKindNative + } + if k, ok := t.(ToolKindProvider); ok { + if kind := k.ToolKind(); kind != "" { + return kind + } + } + return ToolKindNative +} + +// CountsTowardToolTelemetry reports whether invocations of this kind belong in [ToolTelemetry]. +func (k ToolKind) CountsTowardToolTelemetry() bool { + switch k { + case ToolKindSubAgent, ToolKindA2A, ToolKindRetriever: + return false + default: + return true + } +} diff --git a/internal/types/tool_test.go b/internal/types/tool_test.go new file mode 100644 index 0000000..43f3f9e --- /dev/null +++ b/internal/types/tool_test.go @@ -0,0 +1,35 @@ +package types + +import "testing" + +type stubKindTool struct{ kind ToolKind } + +func (s stubKindTool) ToolKind() ToolKind { return s.kind } + +type stubNativeTool struct{} + +func TestKindOf(t *testing.T) { + if KindOf(nil) != ToolKindNative { + t.Fatalf("nil = %q", KindOf(nil)) + } + if KindOf(stubNativeTool{}) != ToolKindNative { + t.Fatal("native tool without provider") + } + if KindOf(stubKindTool{kind: ToolKindMCP}) != ToolKindMCP { + t.Fatal("mcp kind") + } + if KindOf(stubKindTool{kind: ""}) != ToolKindNative { + t.Fatal("empty kind falls back to native") + } +} + +func TestToolKind_CountsTowardToolTelemetry(t *testing.T) { + if !ToolKindNative.CountsTowardToolTelemetry() || !ToolKindMCP.CountsTowardToolTelemetry() { + t.Fatal("native and mcp count toward tool telemetry") + } + for _, k := range []ToolKind{ToolKindSubAgent, ToolKindA2A, ToolKindRetriever} { + if k.CountsTowardToolTelemetry() { + t.Fatalf("%q should not count toward tool telemetry", k) + } + } +} diff --git a/pkg/agent/a2a.go b/pkg/agent/a2a.go index 99f12dc..d12da38 100644 --- a/pkg/agent/a2a.go +++ b/pkg/agent/a2a.go @@ -23,7 +23,7 @@ var ( const defaultA2AToolTimeout = types.DefaultA2ATimeout var _ interfaces.Tool = (*A2ATool)(nil) -var _ interfaces.ToolKindProvider = (*A2ATool)(nil) +var _ types.ToolKindProvider = (*A2ATool)(nil) // NOTE: A2ATools for the same server share one A2AClient. The default pkg/a2a/client is safe // for concurrent use; custom A2AClient implementations should document concurrency behaviour. @@ -76,8 +76,8 @@ func NewA2ATool(serverName string, spec interfaces.ToolSpec, skillSpec interface return &A2ATool{ServerName: serverName, Spec: spec, SkillSpec: skillSpec, Client: client} } -// ToolKind implements [interfaces.ToolKindProvider]. -func (t *A2ATool) ToolKind() string { return "a2a" } +// ToolKind implements [types.ToolKindProvider]. +func (t *A2ATool) ToolKind() types.ToolKind { return types.ToolKindA2A } // Name implements [interfaces.Tool]. func (t *A2ATool) Name() string { diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index b1c5d15..a71aaa6 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -41,6 +41,23 @@ type AgentRunResult = types.AgentRunResult // on failure; otherwise Result is non-nil. type AgentRunAsyncResult = types.AgentRunAsyncResult +// AgentTelemetry is the unified container for operational insights across +// a single agent run, covering run lifecycle, tool calls, and storage operations. +// Token usage is reported separately on AgentRunResult.LLMUsage. +type AgentTelemetry = types.AgentTelemetry + +// LLMUsage is the token usage for a single LLM call. +type LLMUsage = types.LLMUsage + +// RunTelemetry captures the orchestration lifecycle metrics for a single agent run. +type RunTelemetry = types.RunTelemetry + +// ToolTelemetry tracks tool invocation counts and per-tool breakdowns across a single agent run. +type ToolTelemetry = types.ToolTelemetry + +// StorageTelemetry tracks RAG retrieval operations (prefetch, agentic, and hybrid searches). +type StorageTelemetry = types.StorageTelemetry + // buildAgent builds an Agent from options. Validates approval handler when tools require approval. func buildAgent(opts []Option) (*Agent, error) { cfg, err := buildAgentConfig(opts) diff --git a/pkg/agent/agent_test.go b/pkg/agent/agent_test.go index 6593a28..69bd15c 100644 --- a/pkg/agent/agent_test.go +++ b/pkg/agent/agent_test.go @@ -111,9 +111,9 @@ func TestAgent_Stream_SetsStreamingEnabled(t *testing.T) { if !ok || fin == nil { t.Fatalf("event not *AgentRunFinishedEvent: %+v", ev) } - result, ok := fin.Result.(*types.AgentRunResult) - if !ok || result == nil { - t.Fatalf("Result not *AgentRunResult: %+v", fin.Result) + result := fin.Result + if result == nil { + t.Fatalf("Result is nil") } if result.Content != "done" { t.Fatalf("result.Content = %q", result.Content) diff --git a/pkg/agent/config.go b/pkg/agent/config.go index f25dd7a..3df6850 100644 --- a/pkg/agent/config.go +++ b/pkg/agent/config.go @@ -1140,11 +1140,11 @@ func validateToolNames(tools []interfaces.Tool) error { return fmt.Errorf("tool must not be nil") } name := tool.Name() - kind := interfaces.KindOf(tool) + kind := types.KindOf(tool) if prev, ok := seen[name]; ok { return fmt.Errorf("duplicate tool name %q: %s tool conflicts with an existing %s tool", name, kind, prev) } - seen[name] = kind + seen[name] = string(kind) } return nil } diff --git a/pkg/agent/mcp.go b/pkg/agent/mcp.go index 8db4daa..7b6d76e 100644 --- a/pkg/agent/mcp.go +++ b/pkg/agent/mcp.go @@ -19,7 +19,7 @@ var ( ) var _ interfaces.Tool = (*MCPTool)(nil) -var _ interfaces.ToolKindProvider = (*MCPTool)(nil) +var _ types.ToolKindProvider = (*MCPTool)(nil) // NOTE: MCPTools for the same server share one MCPClient. The default pkg/mcp/client serializes // RPCs on that client with a mutex; custom MCPClient implementations should document concurrency behavior. @@ -63,8 +63,8 @@ func NewMCPTool(serverName string, spec interfaces.ToolSpec, client interfaces.M } } -// ToolKind implements [interfaces.ToolKindProvider]. -func (t *MCPTool) ToolKind() string { return "mcp" } +// ToolKind implements [types.ToolKindProvider]. +func (t *MCPTool) ToolKind() types.ToolKind { return types.ToolKindMCP } // Name implements interfaces.Tool. func (t *MCPTool) Name() string { diff --git a/pkg/agent/retriever.go b/pkg/agent/retriever.go index 057642c..cbaf906 100644 --- a/pkg/agent/retriever.go +++ b/pkg/agent/retriever.go @@ -20,7 +20,7 @@ var ( ) var _ interfaces.Tool = (*RetrieverTool)(nil) -var _ interfaces.ToolKindProvider = (*RetrieverTool)(nil) +var _ types.ToolKindProvider = (*RetrieverTool)(nil) // RetrieverTool implements [interfaces.Tool] for [RetrieverModeAgentic] and [RetrieverModeHybrid]. type RetrieverTool struct { @@ -61,8 +61,8 @@ func NewRetrieverTool(retriever interfaces.Retriever) interfaces.Tool { return &RetrieverTool{RetrieverName: rn, Retriever: retriever} } -// ToolKind implements [interfaces.ToolKindProvider]. -func (t *RetrieverTool) ToolKind() string { return "retriever" } +// ToolKind implements [types.ToolKindProvider]. +func (t *RetrieverTool) ToolKind() types.ToolKind { return types.ToolKindRetriever } // Name implements [interfaces.Tool]. func (t *RetrieverTool) Name() string { diff --git a/pkg/agent/subagent.go b/pkg/agent/subagent.go index 6be6237..3e79eb2 100644 --- a/pkg/agent/subagent.go +++ b/pkg/agent/subagent.go @@ -8,13 +8,14 @@ import ( "strings" "github.com/agenticenv/agent-sdk-go/internal/runtime" + "github.com/agenticenv/agent-sdk-go/internal/types" "github.com/agenticenv/agent-sdk-go/pkg/interfaces" "github.com/agenticenv/agent-sdk-go/pkg/tools" ) var _ AgentTool = (*subAgentTool)(nil) var _ interfaces.Tool = (*subAgentTool)(nil) -var _ interfaces.ToolKindProvider = (*subAgentTool)(nil) +var _ types.ToolKindProvider = (*subAgentTool)(nil) // Sub-agent tool names must be identifier-like for LLM tool APIs; normalize display names accordingly. var subAgentToolNameNonIdent = regexp.MustCompile(`[^a-zA-Z0-9]+`) @@ -119,5 +120,5 @@ func (t *subAgentTool) Execute(_ context.Context, _ map[string]any) (any, error) func (t *subAgentTool) SubAgent() *Agent { return t.agent } -// ToolKind implements [interfaces.ToolKindProvider]. -func (t *subAgentTool) ToolKind() string { return "sub-agent" } +// ToolKind implements [types.ToolKindProvider]. +func (t *subAgentTool) ToolKind() types.ToolKind { return types.ToolKindSubAgent } diff --git a/pkg/interfaces/mocks/mock_tool.go b/pkg/interfaces/mocks/mock_tool.go index 068c279..f519ff4 100644 --- a/pkg/interfaces/mocks/mock_tool.go +++ b/pkg/interfaces/mocks/mock_tool.go @@ -1,5 +1,5 @@ // Code generated by MockGen. DO NOT EDIT. -// Source: github.com/agenticenv/agent-sdk-go/pkg/interfaces (interfaces: Tool,ToolApproval,ToolAuthorizer,ToolKindProvider) +// Source: github.com/agenticenv/agent-sdk-go/pkg/interfaces (interfaces: Tool,ToolApproval,ToolAuthorizer) // Package mocks is a generated GoMock package. package mocks @@ -181,40 +181,3 @@ func (mr *MockToolAuthorizerMockRecorder) Authorize(arg0, arg1 interface{}) *gom mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Authorize", reflect.TypeOf((*MockToolAuthorizer)(nil).Authorize), arg0, arg1) } - -// MockToolKindProvider is a mock of ToolKindProvider interface. -type MockToolKindProvider struct { - ctrl *gomock.Controller - recorder *MockToolKindProviderMockRecorder -} - -// MockToolKindProviderMockRecorder is the mock recorder for MockToolKindProvider. -type MockToolKindProviderMockRecorder struct { - mock *MockToolKindProvider -} - -// NewMockToolKindProvider creates a new mock instance. -func NewMockToolKindProvider(ctrl *gomock.Controller) *MockToolKindProvider { - mock := &MockToolKindProvider{ctrl: ctrl} - mock.recorder = &MockToolKindProviderMockRecorder{mock} - return mock -} - -// EXPECT returns an object that allows the caller to indicate expected use. -func (m *MockToolKindProvider) EXPECT() *MockToolKindProviderMockRecorder { - return m.recorder -} - -// ToolKind mocks base method. -func (m *MockToolKindProvider) ToolKind() string { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ToolKind") - ret0, _ := ret[0].(string) - return ret0 -} - -// ToolKind indicates an expected call of ToolKind. -func (mr *MockToolKindProviderMockRecorder) ToolKind() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ToolKind", reflect.TypeOf((*MockToolKindProvider)(nil).ToolKind)) -} diff --git a/pkg/interfaces/tool.go b/pkg/interfaces/tool.go index b8aa00d..011ffb9 100644 --- a/pkg/interfaces/tool.go +++ b/pkg/interfaces/tool.go @@ -6,7 +6,7 @@ import ( "github.com/agenticenv/agent-sdk-go/internal/types" ) -//go:generate mockgen -destination=./mocks/mock_tool.go -package=mocks github.com/agenticenv/agent-sdk-go/pkg/interfaces Tool,ToolApproval,ToolAuthorizer,ToolKindProvider +//go:generate mockgen -destination=./mocks/mock_tool.go -package=mocks github.com/agenticenv/agent-sdk-go/pkg/interfaces Tool,ToolApproval,ToolAuthorizer // ToolApproval is an optional interface for tools that require interactive human approval before execution. // When implemented, the agent honors ApprovalRequired() when no agent-level approval policy is set. @@ -73,18 +73,3 @@ func ToolsToSpecs(tools []Tool) []ToolSpec { } return specs } - -// ToolKindProvider is an optional interface for tools that report their origin. -type ToolKindProvider interface { - ToolKind() string -} - -// KindOf returns ToolKind() from t when implemented, or "native". -func KindOf(t Tool) string { - if k, ok := t.(ToolKindProvider); ok { - if s := k.ToolKind(); s != "" { - return s - } - } - return "native" -} diff --git a/pkg/interfaces/tool_test.go b/pkg/interfaces/tool_test.go deleted file mode 100644 index 73eda79..0000000 --- a/pkg/interfaces/tool_test.go +++ /dev/null @@ -1,40 +0,0 @@ -package interfaces - -import ( - "context" - "testing" -) - -type stubKindTool struct{ kind string } - -func (s stubKindTool) ToolKind() string { return s.kind } -func (stubKindTool) Name() string { return "x" } -func (stubKindTool) DisplayName() string { return "x" } -func (stubKindTool) Description() string { return "" } -func (stubKindTool) Parameters() JSONSchema { return JSONSchema{"type": "object"} } -func (stubKindTool) Execute(_ context.Context, _ map[string]any) (any, error) { return nil, nil } - -type stubNativeTool struct{} - -func (stubNativeTool) Name() string { return "n" } -func (stubNativeTool) DisplayName() string { return "n" } -func (stubNativeTool) Description() string { return "" } -func (stubNativeTool) Parameters() JSONSchema { return JSONSchema{"type": "object"} } -func (stubNativeTool) Execute(_ context.Context, _ map[string]any) (any, error) { - return nil, nil -} - -func TestKindOf(t *testing.T) { - if KindOf(nil) != "native" { - t.Fatalf("nil = %q", KindOf(nil)) - } - if KindOf(stubNativeTool{}) != "native" { - t.Fatal("native tool without provider") - } - if KindOf(stubKindTool{kind: "mcp"}) != "mcp" { - t.Fatal("mcp kind") - } - if KindOf(stubKindTool{kind: ""}) != "native" { - t.Fatal("empty kind falls back to native") - } -}