From 312060cc5cf2cd59bea8d4454af35e52994a4b68 Mon Sep 17 00:00:00 2001 From: Ashlynn Antrobus Date: Thu, 2 Apr 2026 16:20:51 -0600 Subject: [PATCH 1/2] feat(observability): emit METRIC event with token usage after each agent run Captures input_tokens, output_tokens, total_tokens, and model from the pydantic-ai RunResult after every successful LLM call in run_agent_async, emitting them via the existing on_observe callback as ObservationType.METRIC. The package remains ignorant of user and run identity. --- src/lorebinders/agent/factory.py | 13 ++++++++ tests/unit/agents/test_factory.py | 49 +++++++++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/lorebinders/agent/factory.py b/src/lorebinders/agent/factory.py index 9948a59..b8ebb4d 100644 --- a/src/lorebinders/agent/factory.py +++ b/src/lorebinders/agent/factory.py @@ -125,6 +125,19 @@ async def run_agent_async( f"Agent run completed with model {model}", meta, ) + usage = res.usage() + emit_observation( + on_observe, + ObservationType.METRIC, + "agent", + f"Token usage for model {model}", + { + "model": model, + "input_tokens": usage.input_tokens, + "output_tokens": usage.output_tokens, + "total_tokens": usage.input_tokens + usage.output_tokens, + }, + ) return res.output except Exception as e: logger.error(f"Agent run failed: {e}") diff --git a/tests/unit/agents/test_factory.py b/tests/unit/agents/test_factory.py index e4b5473..d515c71 100644 --- a/tests/unit/agents/test_factory.py +++ b/tests/unit/agents/test_factory.py @@ -1,9 +1,22 @@ +import pytest from pydantic_ai.exceptions import ModelHTTPError from pydantic_ai.models.fallback import FallbackModel from pydantic_ai.models.test import TestModel -from lorebinders.agent.factory import _is_moderation_error, create_agent -from lorebinders.models import AgentDeps, ExtractionResult +from lorebinders.agent.factory import ( + _is_moderation_error, + create_agent, + create_extraction_agent, + load_prompt_from_assets, + run_agent_async, +) +from lorebinders.models import ( + AgentDeps, + ExtractionResult, + ObservationEvent, + ObservationType, +) +from lorebinders.settings import get_settings def test_is_moderation_error_true() -> None: @@ -40,3 +53,35 @@ def test_create_agent_with_fallback_wraps_in_fallback_model() -> None: fallback=fallback, ) assert isinstance(agent.model, FallbackModel) + + +@pytest.mark.anyio +async def test_run_agent_async_emits_metric_event() -> None: + """Test that run_agent_async emits a METRIC event with token counts.""" + observations: list[ObservationEvent] = [] + + def on_observe(event: ObservationEvent) -> None: + observations.append(event) + + agent = create_extraction_agent() + agent.model = TestModel() + + deps = AgentDeps( + settings=get_settings(), + prompt_loader=load_prompt_from_assets, + ) + + await run_agent_async( + agent, "test prompt", deps=deps, on_observe=on_observe + ) + + metric_events = [ + o for o in observations if o.type == ObservationType.METRIC + ] + assert len(metric_events) == 1 + meta = metric_events[0].metadata + assert isinstance(meta["input_tokens"], int) + assert isinstance(meta["output_tokens"], int) + assert isinstance(meta["total_tokens"], int) + assert meta["total_tokens"] == meta["input_tokens"] + meta["output_tokens"] + assert "model" in meta From 12ad6b60d5f7ee6279ef372115bc6333b8f3ffaf Mon Sep 17 00:00:00 2001 From: Ashlynn Antrobus Date: Fri, 3 Apr 2026 20:05:03 -0600 Subject: [PATCH 2/2] fix: handle None --- src/lorebinders/agent/factory.py | 29 ++++++++++++++++------------- tests/unit/agents/test_factory.py | 3 +++ 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/lorebinders/agent/factory.py b/src/lorebinders/agent/factory.py index b8ebb4d..13df9b7 100644 --- a/src/lorebinders/agent/factory.py +++ b/src/lorebinders/agent/factory.py @@ -125,19 +125,22 @@ async def run_agent_async( f"Agent run completed with model {model}", meta, ) - usage = res.usage() - emit_observation( - on_observe, - ObservationType.METRIC, - "agent", - f"Token usage for model {model}", - { - "model": model, - "input_tokens": usage.input_tokens, - "output_tokens": usage.output_tokens, - "total_tokens": usage.input_tokens + usage.output_tokens, - }, - ) + try: + usage = res.usage() + emit_observation( + on_observe, + ObservationType.METRIC, + "agent", + f"Token usage for model {model}", + { + "model": model, + "input_tokens": usage.input_tokens, + "output_tokens": usage.output_tokens, + "total_tokens": usage.input_tokens + usage.output_tokens, + }, + ) + except Exception as e: + logger.warning(f"Failed to collect token usage metrics: {e}") return res.output except Exception as e: logger.error(f"Agent run failed: {e}") diff --git a/tests/unit/agents/test_factory.py b/tests/unit/agents/test_factory.py index d515c71..2622fd7 100644 --- a/tests/unit/agents/test_factory.py +++ b/tests/unit/agents/test_factory.py @@ -83,5 +83,8 @@ def on_observe(event: ObservationEvent) -> None: assert isinstance(meta["input_tokens"], int) assert isinstance(meta["output_tokens"], int) assert isinstance(meta["total_tokens"], int) + assert meta["input_tokens"] >= 0 + assert meta["output_tokens"] >= 0 + assert meta["total_tokens"] >= 0 assert meta["total_tokens"] == meta["input_tokens"] + meta["output_tokens"] assert "model" in meta