From 897c63cec9b4913617128341859555c337c658f9 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 19 Jun 2026 11:00:59 +0000 Subject: [PATCH 1/3] Add endpoint probe, rename cost_probe, add PR/probe interval throttling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename scripts/sources/probe.py → cost_probe.py; class ProbeSource → CostProbeSource; config key free_tier.probe → free_tier.cost_probe; state file probe_state.json → cost_probe_state.json (with auto-migration). Back-compat aliases preserved in config.py and admin.py so existing configs and admin API keep working. - Add scripts/sources/endpoint_probe.py (EndpointProbeSource): calls each configured provider's GET /v1/models, emits positive Evidence for new :free-suffixed models not yet in believed_free and negative Evidence for :free models that disappeared. Gated by sync_on_startup or update_on_startup (no separate enabled flag needed). Throttled by free_tier.endpoint_probe.frequency_minutes (default 30). - Add providers_pr.frequency_days throttle: _maybe_open_providers_pr() checks pr_state.json before opening a PR; saves last_pr_at on success. Accumulated sidecar changes are bundled into one PR per interval. - Add _maybe_fire_interval_probes() in server.py: debounced (60 s gate) check called on startup and every request. Fires endpoint probe (sync_on_startup / update_on_startup), cost probe (update_on_startup + cost_probe.enabled), and PR creation (frequency_days elapsed) as single-flight daemon threads when their intervals have elapsed. - New config keys: free_tier.endpoint_probe.{frequency_minutes,timeout_sec}, free_tier.cost_probe (renamed), providers_pr.frequency_days. State files: cost_probe_state.json, endpoint_probe_state.json, pr_state.json. Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01RkRhX4Znv73oPcZy41bnyS --- llmproxy/admin.py | 6 +- llmproxy/config.py | 118 ++++++++--- llmproxy/server.py | 212 +++++++++++++++++++- llmproxy/usage.py | 2 +- scripts/sources/__init__.py | 16 +- scripts/sources/{probe.py => cost_probe.py} | 12 +- scripts/sources/endpoint_probe.py | 131 ++++++++++++ scripts/update_free_models.py | 151 ++++++++------ 8 files changed, 551 insertions(+), 97 deletions(-) rename scripts/sources/{probe.py => cost_probe.py} (95%) create mode 100644 scripts/sources/endpoint_probe.py diff --git a/llmproxy/admin.py b/llmproxy/admin.py index f647d81..8f899ca 100644 --- a/llmproxy/admin.py +++ b/llmproxy/admin.py @@ -389,12 +389,12 @@ def admin_index(): # the single-page admin UI unchanged while the on-disk config uses the grouped # objects (and the config loader's migration shim accepts either form on input). _MAINTENANCE_PATHS: dict[str, tuple[str, ...]] = { - "probe_cost": ("free_tier", "probe", "enabled"), - "autoremove_believed_free": ("free_tier", "probe", "autoremove"), + "probe_cost": ("free_tier", "cost_probe", "enabled"), + "autoremove_believed_free": ("free_tier", "cost_probe", "autoremove"), "update_believed_free_on_startup": ("free_tier", "update_on_startup"), "pr_providers_list": ("providers_pr", "enabled"), "sync_believed_free_on_startup": ("free_tier", "sync_on_startup"), - "probe_frequency_days": ("free_tier", "probe", "frequency_days"), + "probe_frequency_days": ("free_tier", "cost_probe", "frequency_days"), "pr_providers_repo": ("providers_pr", "repo"), "pr_providers_base": ("providers_pr", "base"), "pr_providers_branch": ("providers_pr", "branch"), diff --git a/llmproxy/config.py b/llmproxy/config.py index 4650ea1..68f275f 100755 --- a/llmproxy/config.py +++ b/llmproxy/config.py @@ -135,7 +135,11 @@ def get_config_path(override: str | None = None) -> Path: DEFAULT_FREE_TIER_CONFIG = { "sync_on_startup": True, "update_on_startup": False, - "probe": { + "endpoint_probe": { + "frequency_minutes": 30, + "timeout_sec": 10, + }, + "cost_probe": { "enabled": False, "autoremove": False, "frequency_days": 0, @@ -193,9 +197,9 @@ def get_config_path(override: str | None = None) -> Path: _LEGACY_KEY_MIGRATIONS: dict[str, tuple[str, ...]] = { "sync_believed_free_on_startup": ("free_tier", "sync_on_startup"), "update_believed_free_on_startup": ("free_tier", "update_on_startup"), - "probe_cost": ("free_tier", "probe", "enabled"), - "autoremove_believed_free": ("free_tier", "probe", "autoremove"), - "probe_frequency_days": ("free_tier", "probe", "frequency_days"), + "probe_cost": ("free_tier", "cost_probe", "enabled"), + "autoremove_believed_free": ("free_tier", "cost_probe", "autoremove"), + "probe_frequency_days": ("free_tier", "cost_probe", "frequency_days"), "pr_providers_list": ("providers_pr", "enabled"), "pr_providers_repo": ("providers_pr", "repo"), "pr_providers_base": ("providers_pr", "base"), @@ -317,22 +321,14 @@ def save_config(config: dict, config_path: str | None = None) -> bool: # --------------------------------------------------------------------------- -# Probe state (machine-managed cache, kept out of the user-edited config) +# Probe state (machine-managed caches, kept out of the user-edited config) # --------------------------------------------------------------------------- # -# The cost probe (scripts/sources/probe.py) can be throttled to at most once -# every ``probe_frequency_days``. The last-run timestamp is stored in a small -# sibling cache file rather than in config.json so we don't churn the -# hand-edited config (or sweep the timestamp into the providers-PR / sync flow). - -def get_probe_state_path(config_path: str | None = None) -> Path: - """Return the path to the probe-state cache file, a sibling of config.json.""" - return get_config_path(config_path).parent / "probe_state.json" +# Each probe (cost_probe, endpoint_probe) and PR creation is throttled via its +# own frequency setting. The last-run timestamps live in small sibling cache +# files rather than in config.json so we don't churn the hand-edited config. - -def load_probe_state(config_path: str | None = None) -> dict: - """Load the probe-state cache, returning {} when absent or unreadable.""" - path = get_probe_state_path(config_path) +def _load_state_file(path: Path, label: str) -> dict: if not path.exists(): return {} try: @@ -340,13 +336,11 @@ def load_probe_state(config_path: str | None = None) -> dict: data = json.load(fh) return data if isinstance(data, dict) else {} except Exception as e: # noqa: BLE001 — a corrupt cache must never break a run - print(f"[config:load_probe_state] Failed to load {path}: {e}") + print(f"[config:{label}] Failed to load {path}: {e}") return {} -def save_probe_state(state: dict, config_path: str | None = None) -> bool: - """Persist the probe-state cache atomically (tempfile + rename).""" - path = get_probe_state_path(config_path) +def _save_state_file(state: dict, path: Path, label: str) -> bool: try: path.parent.mkdir(parents=True, exist_ok=True) fd, tmp_name = tempfile.mkstemp( @@ -366,10 +360,80 @@ def save_probe_state(state: dict, config_path: str | None = None) -> bool: raise return True except Exception as e: # noqa: BLE001 - print(f"[config:save_probe_state] Failed to write {path}: {e}") + print(f"[config:{label}] Failed to write {path}: {e}") return False +# --- Cost probe state (cost_probe_state.json) --- + +def get_cost_probe_state_path(config_path: str | None = None) -> Path: + return get_config_path(config_path).parent / "cost_probe_state.json" + + +def load_cost_probe_state(config_path: str | None = None) -> dict: + """Load cost probe state, migrating from the old probe_state.json if needed.""" + path = get_cost_probe_state_path(config_path) + if not path.exists(): + old = path.parent / "probe_state.json" + if old.exists(): + data = _load_state_file(old, "load_cost_probe_state") + if data: + _save_state_file(data, path, "load_cost_probe_state") + return data + return _load_state_file(path, "load_cost_probe_state") + + +def save_cost_probe_state(state: dict, config_path: str | None = None) -> bool: + return _save_state_file( + state, get_cost_probe_state_path(config_path), "save_cost_probe_state" + ) + + +# Back-compat aliases so any external callers of the old names still work. +def get_probe_state_path(config_path: str | None = None) -> Path: + return get_cost_probe_state_path(config_path) + + +def load_probe_state(config_path: str | None = None) -> dict: + return load_cost_probe_state(config_path) + + +def save_probe_state(state: dict, config_path: str | None = None) -> bool: + return save_cost_probe_state(state, config_path) + + +# --- Endpoint probe state (endpoint_probe_state.json) --- + +def get_endpoint_probe_state_path(config_path: str | None = None) -> Path: + return get_config_path(config_path).parent / "endpoint_probe_state.json" + + +def load_endpoint_probe_state(config_path: str | None = None) -> dict: + return _load_state_file(get_endpoint_probe_state_path(config_path), "load_endpoint_probe_state") + + +def save_endpoint_probe_state(state: dict, config_path: str | None = None) -> bool: + return _save_state_file( + state, get_endpoint_probe_state_path(config_path), "save_endpoint_probe_state" + ) + + +# --- PR state (pr_state.json) --- + +def get_pr_state_path(config_path: str | None = None) -> Path: + return get_config_path(config_path).parent / "pr_state.json" + + +def load_pr_state(config_path: str | None = None) -> dict: + return _load_state_file(get_pr_state_path(config_path), "load_pr_state") + + +def save_pr_state(state: dict, config_path: str | None = None) -> bool: + return _save_state_file( + state, get_pr_state_path(config_path), "save_pr_state" + ) + + # --------------------------------------------------------------------------- # Provider helpers # --------------------------------------------------------------------------- @@ -658,10 +722,16 @@ def _normalize_config(raw: dict) -> dict: """ if not isinstance(raw, dict): return raw - if not any(k in raw for k in _LEGACY_KEY_MIGRATIONS): - return raw normalized = copy.deepcopy(raw) + + # Migrate free_tier.probe → free_tier.cost_probe (renamed in this release). + ft = normalized.get("free_tier") + if isinstance(ft, dict) and "probe" in ft and "cost_probe" not in ft: + ft["cost_probe"] = ft.pop("probe") + + if not any(k in normalized for k in _LEGACY_KEY_MIGRATIONS): + return normalized for legacy_key, path in _LEGACY_KEY_MIGRATIONS.items(): if legacy_key not in normalized: continue diff --git a/llmproxy/server.py b/llmproxy/server.py index ba8b965..ca98ddc 100755 --- a/llmproxy/server.py +++ b/llmproxy/server.py @@ -470,6 +470,22 @@ def _reset_usage() -> None: _startup_update_done: bool = False _startup_update_lock = threading.Lock() +# --------------------------------------------------------------------------- +# Periodic interval probe checks (endpoint probe, cost probe, PR creation) +# --------------------------------------------------------------------------- +# State files are re-read at most once per _PROBE_INTERVAL_GATE_SEC so +# concurrent requests don't all hit disk simultaneously. The actual probe +# frequency is controlled by the per-probe frequency_minutes / frequency_days +# settings in config.json. +_PROBE_INTERVAL_GATE_SEC = 60 # check state files at most once per minute +_last_probe_interval_check: float = 0.0 +_probe_interval_check_lock = threading.Lock() + +_endpoint_probe_inflight: bool = False +_endpoint_probe_lock = threading.Lock() +_cost_probe_inflight: bool = False +_cost_probe_lock = threading.Lock() + # --------------------------------------------------------------------------- # Short-lived response cache (non-streaming only) # --------------------------------------------------------------------------- @@ -520,6 +536,9 @@ def _log_request() -> None: # deployments where the eager per-worker trigger in __main__ did not fire; it # is a no-op after the first invocation. _run_startup_tasks_once() + # Check probe / PR frequency intervals on every request (debounced by + # _PROBE_INTERVAL_GATE_SEC so state files are read at most once per minute). + _maybe_fire_interval_probes() logger.info("→ %s %s", request.method, request.path) @@ -1281,8 +1300,8 @@ def _run_free_models_update(config: dict, config_path: str | None) -> bool: regenerates config.example.json, and syncs the user config; its changes are picked up by the normal mtime-based config reload. - When config['probe_cost'] is true the updater also actively probes - believed_free models for cost (see scripts/sources/probe.py). + When free_tier.cost_probe.enabled is true the updater also actively probes + believed_free models for cost (see scripts/sources/cost_probe.py). """ # The scraper lives in the repo-root `scripts/` package, which sits next to # the installed `llmproxy/` package but may not be on sys.path (e.g. under @@ -1367,6 +1386,160 @@ def _run_free_models_update(config: dict, config_path: str | None) -> bool: return True +def _maybe_fire_interval_probes(config_path: str | None = None) -> None: + """Check frequency intervals for endpoint probe, cost probe, and PR creation. + + Fires each as a background daemon thread if its interval has elapsed. + Gated by _PROBE_INTERVAL_GATE_SEC so state files are not read on every + single request — the actual probe frequency is set in config.json. + + Endpoint probe: gated by sync_on_startup OR update_on_startup. + Cost probe: gated by update_on_startup AND cost_probe.enabled. + PR creation: checked independently of startup flags. + """ + global _last_probe_interval_check + now = time.monotonic() + with _probe_interval_check_lock: + if now - _last_probe_interval_check < _PROBE_INTERVAL_GATE_SEC: + return + _last_probe_interval_check = now + + try: + config = load_config() + except Exception: # noqa: BLE001 + return + free_tier = config.get("free_tier", {}) if isinstance(config.get("free_tier"), dict) else {} + + # Endpoint probe — gated by sync_on_startup OR update_on_startup. + if free_tier.get("sync_on_startup") or free_tier.get("update_on_startup"): + _maybe_fire_endpoint_probe(config, free_tier, config_path) + + # Cost probe — gated by update_on_startup + cost_probe.enabled. + if free_tier.get("update_on_startup") and free_tier.get("cost_probe", {}).get("enabled"): + _maybe_fire_cost_probe(config, free_tier, config_path) + + # PR creation interval — independent of startup flags. + _maybe_fire_pr_if_due(config, config_path) + + +def _maybe_fire_endpoint_probe( + config: dict, free_tier: dict, config_path: str | None +) -> None: + ep_cfg = free_tier.get("endpoint_probe", {}) + freq_min = ep_cfg.get("frequency_minutes", 30) + freq_days = freq_min / 1440.0 + try: + import sys, os as _os + repo_root = _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__))) + if repo_root not in sys.path: + sys.path.insert(0, repo_root) + from scripts.update_free_models import _probe_due + from llmproxy.config import load_endpoint_probe_state + except Exception: # noqa: BLE001 — scripts/ may not be available + return + state = load_endpoint_probe_state(config_path) + due, _ = _probe_due(state.get("last_probe_at"), freq_days) + if not due: + return + + global _endpoint_probe_inflight + with _endpoint_probe_lock: + if _endpoint_probe_inflight: + return + _endpoint_probe_inflight = True + + def _run() -> None: + global _endpoint_probe_inflight + try: + logger.info("[endpoint-probe] interval due — running endpoint probe") + _run_free_models_update(load_config(), config_path) + with _models_list_cache_lock: + global _models_list_cache + _models_list_cache = None + except Exception as exc: # noqa: BLE001 + logger.warning("[endpoint-probe] failed: %s", exc) + finally: + with _endpoint_probe_lock: + _endpoint_probe_inflight = False + + threading.Thread(target=_run, daemon=True, name="endpoint-probe-interval").start() + + +def _maybe_fire_cost_probe( + config: dict, free_tier: dict, config_path: str | None +) -> None: + cost_probe_cfg = free_tier.get("cost_probe", {}) + freq_days = cost_probe_cfg.get("frequency_days", 0) + try: + import sys, os as _os + repo_root = _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__))) + if repo_root not in sys.path: + sys.path.insert(0, repo_root) + from scripts.update_free_models import _probe_due + from llmproxy.config import load_cost_probe_state + except Exception: # noqa: BLE001 + return + state = load_cost_probe_state(config_path) + due, _ = _probe_due(state.get("last_probe_at"), freq_days) + if not due: + return + + global _cost_probe_inflight + with _cost_probe_lock: + if _cost_probe_inflight: + return + _cost_probe_inflight = True + + def _run() -> None: + global _cost_probe_inflight + try: + logger.info("[cost-probe] interval due — running cost probe") + _run_free_models_update(load_config(), config_path) + with _models_list_cache_lock: + global _models_list_cache + _models_list_cache = None + except Exception as exc: # noqa: BLE001 + logger.warning("[cost-probe] failed: %s", exc) + finally: + with _cost_probe_lock: + _cost_probe_inflight = False + + threading.Thread(target=_run, daemon=True, name="cost-probe-interval").start() + + +def _maybe_fire_pr_if_due(config: dict, config_path: str | None) -> None: + """Open a providers PR if providers_pr.frequency_days has elapsed since last PR.""" + pr_cfg = config.get("providers_pr", {}) + if pr_cfg.get("enabled") is not True: + return + freq_days = pr_cfg.get("frequency_days", 0) + if not freq_days or freq_days <= 0: + return # no throttle configured — PR is opened immediately after updates + try: + import sys, os as _os + repo_root = _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__))) + if repo_root not in sys.path: + sys.path.insert(0, repo_root) + from scripts.update_free_models import _probe_due + from llmproxy.config import load_pr_state + from llmproxy.providers import DATA_PATH as _DATA_PATH + except Exception: # noqa: BLE001 + return + pr_state = load_pr_state(config_path) + due, days_since = _probe_due(pr_state.get("last_pr_at"), freq_days) + if not due: + return + # Read current sidecar to pass to _maybe_open_providers_pr. + try: + providers_text = _DATA_PATH.read_text(encoding="utf-8") if _DATA_PATH.exists() else None + except Exception: # noqa: BLE001 + providers_text = None + if providers_text is None: + return + logger.info("[providers-pr] frequency_days interval elapsed — checking for PR") + _maybe_open_providers_pr(config, providers_text) + + def _run_startup_tasks_once(config_path: str | None = None) -> None: """Run the one-time per-worker startup tasks in a background daemon thread. @@ -1444,6 +1617,10 @@ def _run() -> None: except Exception as exc: # noqa: BLE001 — warming must never crash the worker logger.warning("[startup] /v1/models cache warm failed: %s", exc) + # 6. Check frequency intervals for endpoint probe, cost probe, and PR + # creation. Fires background threads for any that are due. + _maybe_fire_interval_probes(config_path) + threading.Thread(target=_run, daemon=True, name="startup-tasks").start() @@ -1465,6 +1642,31 @@ def _maybe_open_providers_pr(config: dict, providers_text: str, example_text: st """ if config.get("providers_pr", {}).get("enabled") is not True: return + + # Throttle PR creation to at most once every providers_pr.frequency_days. + pr_cfg = config.get("providers_pr", {}) + freq_days = pr_cfg.get("frequency_days", 0) + if freq_days and freq_days > 0: + try: + import sys as _sys, os as _os2 + repo_root = _os2.path.dirname(_os2.path.dirname(_os2.path.abspath(__file__))) + if repo_root not in _sys.path: + _sys.path.insert(0, repo_root) + from scripts.update_free_models import _probe_due + from .config import load_pr_state, save_pr_state as _save_pr_state + except Exception: # noqa: BLE001 + _probe_due = None # type: ignore[assignment] + if _probe_due is not None: + pr_state = load_pr_state() + due, days_since = _probe_due(pr_state.get("last_pr_at"), freq_days) + if not due: + logger.info( + "[providers-pr] throttled — %.1f day(s) since last PR " + "(frequency_days=%s); skipping.", + days_since, freq_days, + ) + return + import os from .github_pr import create_or_update_pr @@ -1514,6 +1716,12 @@ def _maybe_open_providers_pr(config: dict, providers_text: str, example_text: st ) if url: logger.info("[providers-pr] %s", url) + try: + from datetime import UTC as _UTC, datetime as _datetime + from .config import save_pr_state as _save_pr_state2 + _save_pr_state2({"last_pr_at": _datetime.now(_UTC).isoformat()}) + except Exception as _exc: # noqa: BLE001 + logger.warning("[providers-pr] could not save pr_state: %s", _exc) except Exception as exc: # noqa: BLE001 — PR creation is best-effort logger.warning("[providers-pr] failed to open PR: %s", exc) diff --git a/llmproxy/usage.py b/llmproxy/usage.py index 914a9be..4864e92 100644 --- a/llmproxy/usage.py +++ b/llmproxy/usage.py @@ -2,7 +2,7 @@ This module holds the *pure* (web-framework-free) pieces of usage tracking so they can be unit-tested in isolation and reused by the offline scraper probe -(scripts/sources/probe.py): +(scripts/sources/cost_probe.py): * ``ModelUsage`` — a thread-safe per-model counter that tracks both requests (sliding 60s window + per-day) and tokens (same windows) plus lifetime diff --git a/scripts/sources/__init__.py b/scripts/sources/__init__.py index d96eea5..d266e02 100644 --- a/scripts/sources/__init__.py +++ b/scripts/sources/__init__.py @@ -10,7 +10,8 @@ from .fireworks import FireworksSource from .litellm_cost_map import LiteLLMCostMapSource from .openrouter import OpenRouterSource -from .probe import ProbeSource +from .cost_probe import CostProbeSource +from .endpoint_probe import EndpointProbeSource from .together import TogetherSource ALL_SOURCES: dict[str, type[Source]] = { @@ -21,12 +22,17 @@ "together": TogetherSource, "fireworks": FireworksSource, # Active cost probe. Excluded from the default source set because it sends - # real requests; opt in via config probe_cost: true or --probe. - "probe": ProbeSource, + # real requests; opt in via free_tier.cost_probe.enabled or --cost-probe. + "cost_probe": CostProbeSource, + # Endpoint discovery probe. Excluded from the default source set because it + # makes authenticated GET /models requests; opt in via sync_on_startup or + # update_on_startup, or pass --source endpoint_probe. + "endpoint_probe": EndpointProbeSource, } -# Sources that must NOT run unless explicitly opted into (they spend real quota). -OPT_IN_SOURCES: frozenset[str] = frozenset({"probe"}) +# Sources that must NOT run unless explicitly opted into (they spend real quota +# or make authenticated requests beyond the normal scrape set). +OPT_IN_SOURCES: frozenset[str] = frozenset({"cost_probe", "endpoint_probe"}) # Docs scrapers are registered separately so we can list them under --source docs. from .docs import DOCS_SOURCES # noqa: E402 diff --git a/scripts/sources/probe.py b/scripts/sources/cost_probe.py similarity index 95% rename from scripts/sources/probe.py rename to scripts/sources/cost_probe.py index 74428fb..da747cf 100644 --- a/scripts/sources/probe.py +++ b/scripts/sources/cost_probe.py @@ -1,4 +1,4 @@ -"""probe.py — active cost probe for believed-free models. +"""cost_probe.py — active cost probe for believed-free models. Unlike every other source (which reads docs / pricing pages / /models catalogs), this source sends a *real*, minimal chat completion to each model in @@ -8,9 +8,9 @@ high-confidence ``is_free=False`` so the aggregator can flag/remove it. This source spends real quota (and possibly real money), so it is OFF by -default: it only runs when the user sets ``probe_cost: true`` in config.json or -passes ``--probe`` on the command line, and it skips any provider that has no -configured API key. +default: it only runs when the user sets ``cost_probe.enabled: true`` in +config.json or passes ``--cost-probe`` on the command line, and it skips any +provider that has no configured API key. Probes run with bounded *per-provider* concurrency: requests to different providers overlap, but each provider is capped (``concurrency``, default 3) so @@ -62,8 +62,8 @@ def _make_progress(total: int, desc: str): } -class ProbeSource(Source): - name = "probe" +class CostProbeSource(Source): + name = "cost_probe" def __init__( self, diff --git a/scripts/sources/endpoint_probe.py b/scripts/sources/endpoint_probe.py new file mode 100644 index 0000000..57a3882 --- /dev/null +++ b/scripts/sources/endpoint_probe.py @@ -0,0 +1,131 @@ +"""endpoint_probe.py — discovers :free-suffixed models via GET /v1/models. + +Unlike cost_probe.py (which sends real chat completions to cost-verify models), +this source only makes authenticated GET requests to each configured provider's +/v1/models endpoint. It emits: + + * positive Evidence for model IDs ending in ':free' that are returned by the + endpoint but are not yet in believed_free — new free models to consider adding. + * negative Evidence for model IDs ending in ':free' that *are* in believed_free + but are no longer returned by the endpoint — models that may have been removed. + +Non-:free believed_free models are ignored by this source; their presence/absence +in the /v1/models listing is handled by other sources (e.g. api_models). + +This source makes authenticated network calls but does not spend quota, so it +is controlled by the existing sync_on_startup / update_on_startup startup flags +rather than requiring its own 'enabled' toggle. +""" + +from __future__ import annotations + +import requests + +from llmproxy.config import ( + get_provider, + load_config, + provider_api_key, + provider_base_url, +) +from llmproxy.providers import load_data + +from .base import Evidence, Source + +DEFAULT_TIMEOUT = 10 + + +class EndpointProbeSource(Source): + name = "endpoint_probe" + + def __init__( + self, + config_path: str | None = None, + provider_filter: str | None = None, + timeout: int = DEFAULT_TIMEOUT, + ) -> None: + self.config_path = config_path + self.provider_filter = provider_filter + self.timeout = timeout + + def fetch(self) -> list[Evidence]: + config = load_config(self.config_path, force_reload=True) + sidecar = load_data() + + # Build set of believed_free :free-suffixed models per provider. + believed_free_per_provider: dict[str, set[str]] = {} + for prov_name, prov_data in sidecar.get("providers", {}).items(): + for qualified in prov_data.get("believed_free", []): + if "/" not in qualified: + continue + p, m = qualified.split("/", 1) + if m.endswith(":free"): + believed_free_per_provider.setdefault(p, set()).add(m) + + evidence: list[Evidence] = [] + for provider_name, provider_cfg in config.get("providers", {}).items(): + if self.provider_filter and provider_name != self.provider_filter: + continue + if not get_provider(config, provider_name): + continue + api_key = provider_api_key(provider_cfg) + if not api_key: + continue + base_url = provider_base_url(provider_cfg) + if not base_url: + continue + + endpoint_ids = self._fetch_model_ids(base_url, api_key) + if endpoint_ids is None: + continue # network failure — emit no opinion (fail-soft) + + endpoint_free = {m for m in endpoint_ids if m.endswith(":free")} + known_free = believed_free_per_provider.get(provider_name, set()) + models_url = f"{base_url}/models" + + # Positive: new :free models not yet in believed_free. + for model_id in sorted(endpoint_free - known_free): + evidence.append(Evidence( + provider=provider_name, + model_id=f"{provider_name}/{model_id}", + is_free=True, + source=self.name, + confidence="high", + url=models_url, + notes="endpoint lists model with :free suffix; not yet in believed_free", + )) + + # Negative: believed_free :free models absent from endpoint. + for model_id in sorted(known_free - endpoint_free): + evidence.append(Evidence( + provider=provider_name, + model_id=f"{provider_name}/{model_id}", + is_free=False, + source=self.name, + confidence="high", + url=models_url, + notes="endpoint no longer lists this :free model", + )) + + return evidence + + def _fetch_model_ids(self, base_url: str, api_key: str) -> list[str] | None: + """GET {base_url}/models and return the list of model id strings, or None on error.""" + try: + resp = requests.get( + f"{base_url}/models", + headers={"Authorization": f"Bearer {api_key}"}, + timeout=self.timeout, + ) + if resp.status_code >= 400: + return None + data = resp.json() + models = data.get("data") if isinstance(data, dict) else data + if not isinstance(models, list): + return None + return [ + m.get("id", "") + for m in models + if isinstance(m, dict) and m.get("id") + ] + except Exception: # noqa: BLE001 — any failure means no opinion + return None diff --git a/scripts/update_free_models.py b/scripts/update_free_models.py index 152b44f..ab79ef1 100644 --- a/scripts/update_free_models.py +++ b/scripts/update_free_models.py @@ -9,7 +9,8 @@ python scripts/update_free_models.py --source openrouter,docs python scripts/update_free_models.py --regen-config-only python scripts/update_free_models.py --config ~/.config/llmproxy/config.json - python scripts/update_free_models.py --probe --probe-concurrency 2 + python scripts/update_free_models.py --cost-probe --cost-probe-concurrency 2 + python scripts/update_free_models.py --endpoint-probe Behavior -------- @@ -48,9 +49,11 @@ load_config as load_user_config, ) from llmproxy.config import ( # noqa: E402 - load_probe_state, + load_cost_probe_state, + load_endpoint_probe_state, save_config, - save_probe_state, + save_cost_probe_state, + save_endpoint_probe_state, ) from llmproxy.providers import ( # noqa: E402 DATA_PATH, @@ -59,8 +62,9 @@ load_data, ) from scripts.sources import ALL_SOURCES, OPT_IN_SOURCES, Evidence # noqa: E402 +from scripts.sources.cost_probe import CostProbeSource # noqa: E402 +from scripts.sources.endpoint_probe import EndpointProbeSource # noqa: E402 from scripts.sources.litellm_cost_map import fetch_pricing_map # noqa: E402 -from scripts.sources.probe import ProbeSource # noqa: E402 CONFIG_EXAMPLE_PATH = REPO_ROOT / "config.example.json" @@ -566,7 +570,11 @@ def regenerate_config_example(sidecar: dict, server_block: dict | None = None, "free_tier": { "sync_on_startup": True, "update_on_startup": False, - "probe": { + "endpoint_probe": { + "frequency_minutes": 30, + "timeout_sec": 10, + }, + "cost_probe": { "enabled": False, "autoremove": False, "frequency_days": 0, @@ -574,6 +582,7 @@ def regenerate_config_example(sidecar: dict, server_block: dict | None = None, }, "providers_pr": { "enabled": False, + "frequency_days": 0, "repo": None, "base": "main", "branch": "llmproxy-auto/providers", @@ -835,12 +844,16 @@ def _run_source( probe_max: int | None = None, probe_provider: str | None = None, probe_concurrency: int | None = None, + endpoint_probe_timeout: int = 10, ) -> tuple[str, bool, list[Evidence], str | None]: try: - if source_name == "probe": - src = ProbeSource(config_path=config_path, max_models=probe_max, - provider_filter=probe_provider, - concurrency=probe_concurrency) + if source_name == "cost_probe": + src = CostProbeSource(config_path=config_path, max_models=probe_max, + provider_filter=probe_provider, + concurrency=probe_concurrency) + elif source_name == "endpoint_probe": + src = EndpointProbeSource(config_path=config_path, + timeout=endpoint_probe_timeout) else: cls = ALL_SOURCES[source_name] src = cls() @@ -852,7 +865,7 @@ def _run_source( def _probe_due(last_probe_at: str | None, frequency_days, now: datetime | None = None ) -> tuple[bool, float | None]: - """Decide whether the cost probe is due to run. + """Decide whether a probe/PR is due to run. Returns ``(due, days_since_last)``. The probe is due when: * ``frequency_days`` is missing or <= 0 (no throttle — run every time), or @@ -889,22 +902,26 @@ def main(argv: list[str] | None = None) -> int: help="Limit updates to a single provider (e.g. 'google').") ap.add_argument("--source", default=",".join(default_sources), help="Comma-separated source names (default: all except opt-in probes).") - ap.add_argument("--probe", action="store_true", + ap.add_argument("--cost-probe", action="store_true", help="Actively probe believed_free models for cost (sends real " "requests; requires configured API keys). Also enabled by " - "setting probe_cost: true in config.json.") - ap.add_argument("--probe-max", type=int, metavar="N", - help="Probe at most N models (bounds spend).") - ap.add_argument("--probe-provider", metavar="NAME", - help="Only probe models from this provider.") - ap.add_argument("--probe-concurrency", type=int, metavar="N", - help="Max concurrent probe requests per provider (default 3). " + "setting free_tier.cost_probe.enabled: true in config.json.") + ap.add_argument("--cost-probe-max", type=int, metavar="N", + help="Cost-probe at most N models (bounds spend).") + ap.add_argument("--cost-probe-provider", metavar="NAME", + help="Only cost-probe models from this provider.") + ap.add_argument("--cost-probe-concurrency", type=int, metavar="N", + help="Max concurrent cost-probe requests per provider (default 3). " "Different providers always run in parallel; this bounds " "in-flight requests to any single provider to avoid " "tripping its rate limit.") + ap.add_argument("--endpoint-probe", action="store_true", + help="Probe each provider's /v1/models endpoint to discover new " + ":free-suffixed models. Also enabled by sync_on_startup or " + "update_on_startup in config.json.") ap.add_argument("--ignore-throttle", action="store_true", - help="Probe even if probe_frequency_days says it is too soon " - "since the last probe (bypasses the throttle).") + help="Run probes even if their frequency setting says it is too " + "soon since the last run (bypasses all throttles).") ap.add_argument("--regen-config-only", action="store_true", help="Skip scraping; regenerate config.example.json from the current sidecar.") ap.add_argument("--sync-config-only", action="store_true", @@ -927,31 +944,50 @@ def main(argv: list[str] | None = None) -> int: return 2 return _sync_user_config(load_data(), args.config, dry_run=args.dry_run) - # Read opt-in flags from the user config (probe_cost / autoremove_believed_free). + # Read opt-in flags from the user config (cost_probe / endpoint_probe / autoremove). try: user_cfg = load_user_config(args.config, force_reload=True) except Exception: # noqa: BLE001 — a missing/broken config must not break scraping user_cfg = {} - probe_cfg = user_cfg.get("free_tier", {}).get("probe", {}) - probe_cost = bool(probe_cfg.get("enabled", False)) or args.probe - autoremove = bool(probe_cfg.get("autoremove", False)) - - # Throttle the probe to at most once every probe_frequency_days. The last-run - # timestamp lives in a sibling cache file (probe_state.json), not config.json. + free_tier_cfg = user_cfg.get("free_tier", {}) + cost_probe_cfg = free_tier_cfg.get("cost_probe", {}) + cost_probe_enabled = bool(cost_probe_cfg.get("enabled", False)) or args.cost_probe + autoremove = bool(cost_probe_cfg.get("autoremove", False)) + ep_cfg = free_tier_cfg.get("endpoint_probe", {}) + ep_enabled = bool( + free_tier_cfg.get("sync_on_startup") or free_tier_cfg.get("update_on_startup") + ) or args.endpoint_probe + + # Throttle the cost probe to at most once every frequency_days. The last-run + # timestamp lives in cost_probe_state.json (sibling of config.json). # --ignore-throttle bypasses this; frequency 0 means "probe every time". - if probe_cost and not args.ignore_throttle: - state = load_probe_state(args.config) + if cost_probe_enabled and not args.ignore_throttle: + cp_state = load_cost_probe_state(args.config) due, days_since = _probe_due( - state.get("last_probe_at"), probe_cfg.get("frequency_days", 0) + cp_state.get("last_probe_at"), cost_probe_cfg.get("frequency_days", 0) ) if not due: - freq = probe_cfg.get("frequency_days", 0) + freq = cost_probe_cfg.get("frequency_days", 0) since = f"{days_since:.1f}" if days_since is not None else "?" print(_warn( - f" ⚠ probe throttled — last run was {since} day(s) ago, " - f"probe_frequency_days={freq}. Use --ignore-throttle to override." + f" ⚠ cost_probe throttled — last run was {since} day(s) ago, " + f"frequency_days={freq}. Use --ignore-throttle to override." )) - probe_cost = False + cost_probe_enabled = False + + # Throttle the endpoint probe to at most once every frequency_minutes. + if ep_enabled and not args.ignore_throttle: + ep_state = load_endpoint_probe_state(args.config) + freq_days = ep_cfg.get("frequency_minutes", 30) / 1440.0 + ep_due, ep_days_since = _probe_due(ep_state.get("last_probe_at"), freq_days) + if not ep_due: + freq_min = ep_cfg.get("frequency_minutes", 30) + ep_since_min = f"{ep_days_since * 1440:.1f}" if ep_days_since is not None else "?" + print(_warn( + f" ⚠ endpoint_probe throttled — last run was {ep_since_min} min ago, " + f"frequency_minutes={freq_min}. Use --ignore-throttle to override." + )) + ep_enabled = False if args.regen_config_only: write_config_example() @@ -961,24 +997,27 @@ def main(argv: list[str] | None = None) -> int: return 0 requested = [s.strip() for s in args.source.split(",") if s.strip()] - if probe_cost and "probe" not in requested: - requested.append("probe") + if cost_probe_enabled and "cost_probe" not in requested: + requested.append("cost_probe") + if ep_enabled and "endpoint_probe" not in requested: + requested.append("endpoint_probe") unknown = [s for s in requested if s not in ALL_SOURCES] if unknown: print(_err(f"Unknown source(s): {unknown}. Known: {sorted(ALL_SOURCES.keys())}")) return 2 print(_h(f"\nFetching evidence from sources: {requested}")) - if "probe" in requested: - print(_warn(" ⚠ probe enabled — sending real requests to believed_free models " + if "cost_probe" in requested: + print(_warn(" ⚠ cost_probe enabled — sending real requests to believed_free models " "(uses configured API keys / quota).")) all_evidence: list[Evidence] = [] source_status: dict[str, bool] = {} with ThreadPoolExecutor(max_workers=min(5, len(requested))) as ex: futures = { ex.submit(_run_source, s, config_path=args.config, - probe_max=args.probe_max, probe_provider=args.probe_provider, - probe_concurrency=args.probe_concurrency): s + probe_max=args.cost_probe_max, probe_provider=args.cost_probe_provider, + probe_concurrency=args.cost_probe_concurrency, + endpoint_probe_timeout=ep_cfg.get("timeout_sec", 10)): s for s in requested } for fut in as_completed(futures): @@ -990,16 +1029,16 @@ def main(argv: list[str] | None = None) -> int: else: print(_err(f" {name}: FAILED — {err}")) - # Probe-confirmed paid models. When autoremove_believed_free is off (the - # default), we report these but do NOT remove them from believed_free. - probe_paid = { + # Cost-probe-confirmed paid models. When autoremove is off (the default), + # we report these but do NOT remove them from believed_free. + cost_probe_paid = { (ev.provider, ev.model_id) for ev in all_evidence - if ev.source == "probe" and ev.is_free is False + if ev.source == "cost_probe" and ev.is_free is False } - if probe_paid: - print(_h("\n=== Probe flagged believed_free models reporting a cost ===")) - for _provider_name, model_id in sorted(probe_paid): + if cost_probe_paid: + print(_h("\n=== cost_probe flagged believed_free models reporting a cost ===")) + for _provider_name, model_id in sorted(cost_probe_paid): print(f" {_warn('⚠')} {model_id}") if autoremove: print(_warn(" autoremove_believed_free=true → these will be removed.")) @@ -1009,7 +1048,7 @@ def main(argv: list[str] | None = None) -> int: if not autoremove: all_evidence = [ ev for ev in all_evidence - if not (ev.source == "probe" and ev.is_free is False) + if not (ev.source == "cost_probe" and ev.is_free is False) ] # If only "api" succeeded for a provider, we trust /models presence as @@ -1070,14 +1109,14 @@ def main(argv: list[str] | None = None) -> int: else: print(_dim("\nNo changes to apply.")) - # Record when the probe last ran so probe_frequency_days can throttle the - # next invocation. Only on a real run where the probe was actually included. - # Done after (and independent of) the sidecar write above so a read-only - # providers.json can't prevent the throttle from advancing. - if "probe" in requested: - save_probe_state( - {"last_probe_at": datetime.now(UTC).isoformat()}, args.config - ) + # Record when each probe last ran so frequency settings can throttle the + # next invocation. Done after (and independent of) the sidecar write so a + # read-only providers.json can't prevent the throttle from advancing. + now_iso = datetime.now(UTC).isoformat() + if "cost_probe" in requested: + save_cost_probe_state({"last_probe_at": now_iso}, args.config) + if "endpoint_probe" in requested: + save_endpoint_probe_state({"last_probe_at": now_iso}, args.config) # Sync the user config even when the sidecar was unchanged — a stale config # should still be reconciled against the current sidecar. From 45d5bf1f7dccf3dc5593d3b4821184aeb9740c25 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 19 Jun 2026 11:19:42 +0000 Subject: [PATCH 2/3] =?UTF-8?q?fix:=20resolve=20CI=20failures=20after=20pr?= =?UTF-8?q?obe=E2=86=92cost=5Fprobe=20rename?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename ProbeSource→CostProbeSource throughout test_probe_source.py (module path, monkeypatch targets, class usage, source name assertion) - Sort cost_probe/endpoint_probe imports to correct alphabetical position in scripts/sources/__init__.py (ruff I001) - Rename unused loop variable prov_name→_prov_name in endpoint_probe.py (ruff B007) - Fix server.py: split `import os, sys` into separate lines (ruff E401), separate import groups with blank lines per isort (ruff I001), and remove unused `save_pr_state as _save_pr_state` alias (ruff F401) - Regenerate config.example.json with cost_probe/endpoint_probe sections Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01RkRhX4Znv73oPcZy41bnyS --- config.example.json | 7 +++++- llmproxy/server.py | 25 +++++++++++++-------- scripts/sources/__init__.py | 4 ++-- scripts/sources/endpoint_probe.py | 2 +- tests/test_scraper/test_probe_source.py | 30 ++++++++++++------------- 5 files changed, 40 insertions(+), 28 deletions(-) diff --git a/config.example.json b/config.example.json index cec67d6..bbdd146 100644 --- a/config.example.json +++ b/config.example.json @@ -1282,7 +1282,11 @@ "free_tier": { "sync_on_startup": true, "update_on_startup": false, - "probe": { + "endpoint_probe": { + "frequency_minutes": 30, + "timeout_sec": 10 + }, + "cost_probe": { "enabled": false, "autoremove": false, "frequency_days": 0 @@ -1290,6 +1294,7 @@ }, "providers_pr": { "enabled": false, + "frequency_days": 0, "repo": null, "base": "main", "branch": "llmproxy-auto/providers", diff --git a/llmproxy/server.py b/llmproxy/server.py index ca98ddc..f05c79e 100755 --- a/llmproxy/server.py +++ b/llmproxy/server.py @@ -1429,12 +1429,13 @@ def _maybe_fire_endpoint_probe( freq_min = ep_cfg.get("frequency_minutes", 30) freq_days = freq_min / 1440.0 try: - import sys, os as _os + import os as _os + import sys repo_root = _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__))) if repo_root not in sys.path: sys.path.insert(0, repo_root) - from scripts.update_free_models import _probe_due from llmproxy.config import load_endpoint_probe_state + from scripts.update_free_models import _probe_due except Exception: # noqa: BLE001 — scripts/ may not be available return state = load_endpoint_probe_state(config_path) @@ -1471,12 +1472,13 @@ def _maybe_fire_cost_probe( cost_probe_cfg = free_tier.get("cost_probe", {}) freq_days = cost_probe_cfg.get("frequency_days", 0) try: - import sys, os as _os + import os as _os + import sys repo_root = _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__))) if repo_root not in sys.path: sys.path.insert(0, repo_root) - from scripts.update_free_models import _probe_due from llmproxy.config import load_cost_probe_state + from scripts.update_free_models import _probe_due except Exception: # noqa: BLE001 return state = load_cost_probe_state(config_path) @@ -1516,13 +1518,14 @@ def _maybe_fire_pr_if_due(config: dict, config_path: str | None) -> None: if not freq_days or freq_days <= 0: return # no throttle configured — PR is opened immediately after updates try: - import sys, os as _os + import os as _os + import sys repo_root = _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__))) if repo_root not in sys.path: sys.path.insert(0, repo_root) - from scripts.update_free_models import _probe_due from llmproxy.config import load_pr_state from llmproxy.providers import DATA_PATH as _DATA_PATH + from scripts.update_free_models import _probe_due except Exception: # noqa: BLE001 return pr_state = load_pr_state(config_path) @@ -1648,12 +1651,14 @@ def _maybe_open_providers_pr(config: dict, providers_text: str, example_text: st freq_days = pr_cfg.get("frequency_days", 0) if freq_days and freq_days > 0: try: - import sys as _sys, os as _os2 + import os as _os2 + import sys as _sys repo_root = _os2.path.dirname(_os2.path.dirname(_os2.path.abspath(__file__))) if repo_root not in _sys.path: _sys.path.insert(0, repo_root) from scripts.update_free_models import _probe_due - from .config import load_pr_state, save_pr_state as _save_pr_state + + from .config import load_pr_state except Exception: # noqa: BLE001 _probe_due = None # type: ignore[assignment] if _probe_due is not None: @@ -1717,7 +1722,9 @@ def _maybe_open_providers_pr(config: dict, providers_text: str, example_text: st if url: logger.info("[providers-pr] %s", url) try: - from datetime import UTC as _UTC, datetime as _datetime + from datetime import UTC as _UTC + from datetime import datetime as _datetime + from .config import save_pr_state as _save_pr_state2 _save_pr_state2({"last_pr_at": _datetime.now(_UTC).isoformat()}) except Exception as _exc: # noqa: BLE001 diff --git a/scripts/sources/__init__.py b/scripts/sources/__init__.py index d266e02..1006110 100644 --- a/scripts/sources/__init__.py +++ b/scripts/sources/__init__.py @@ -7,11 +7,11 @@ from .api_models import ApiModelsSource from .base import Evidence, Source from .community import CommunitySource +from .cost_probe import CostProbeSource +from .endpoint_probe import EndpointProbeSource from .fireworks import FireworksSource from .litellm_cost_map import LiteLLMCostMapSource from .openrouter import OpenRouterSource -from .cost_probe import CostProbeSource -from .endpoint_probe import EndpointProbeSource from .together import TogetherSource ALL_SOURCES: dict[str, type[Source]] = { diff --git a/scripts/sources/endpoint_probe.py b/scripts/sources/endpoint_probe.py index 57a3882..50913fa 100644 --- a/scripts/sources/endpoint_probe.py +++ b/scripts/sources/endpoint_probe.py @@ -53,7 +53,7 @@ def fetch(self) -> list[Evidence]: # Build set of believed_free :free-suffixed models per provider. believed_free_per_provider: dict[str, set[str]] = {} - for prov_name, prov_data in sidecar.get("providers", {}).items(): + for _prov_name, prov_data in sidecar.get("providers", {}).items(): for qualified in prov_data.get("believed_free", []): if "/" not in qualified: continue diff --git a/tests/test_scraper/test_probe_source.py b/tests/test_scraper/test_probe_source.py index ea604a2..49b594e 100644 --- a/tests/test_scraper/test_probe_source.py +++ b/tests/test_scraper/test_probe_source.py @@ -1,11 +1,11 @@ -"""Tests for the opt-in cost probe source (scripts/sources/probe.py).""" +"""Tests for the opt-in cost probe source (scripts/sources/cost_probe.py).""" from __future__ import annotations import responses from scripts.sources import OPT_IN_SOURCES -from scripts.sources.probe import ProbeSource +from scripts.sources.cost_probe import CostProbeSource def _patch(monkeypatch, *, believed_free, providers, pricing=None): @@ -15,12 +15,12 @@ def _patch(monkeypatch, *, believed_free, providers, pricing=None): prov = pid.split("/", 1)[0] sidecar["providers"].setdefault(prov, {"believed_free": []})["believed_free"].append(pid) config = {"providers": providers} - monkeypatch.setattr("scripts.sources.probe.load_data", lambda: sidecar) - monkeypatch.setattr("scripts.sources.probe.load_config", lambda *a, **k: config) + monkeypatch.setattr("scripts.sources.cost_probe.load_data", lambda: sidecar) + monkeypatch.setattr("scripts.sources.cost_probe.load_config", lambda *a, **k: config) def test_probe_is_opt_in(): - assert "probe" in OPT_IN_SOURCES + assert "cost_probe" in OPT_IN_SOURCES @responses.activate @@ -35,12 +35,12 @@ def test_probe_flags_paid_model(monkeypatch): json={"usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2, "cost": 0.003}}, status=200, ) - evs = ProbeSource().fetch() + evs = CostProbeSource().fetch() assert len(evs) == 1 assert evs[0].is_free is False assert evs[0].confidence == "high" assert evs[0].model_id == "groq/free-model" - assert evs[0].source == "probe" + assert evs[0].source == "cost_probe" @responses.activate @@ -55,7 +55,7 @@ def test_probe_silent_on_zero_cost(monkeypatch): json={"usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}}, status=200, ) - assert ProbeSource().fetch() == [] + assert CostProbeSource().fetch() == [] def test_probe_skips_models_without_api_key(monkeypatch): @@ -65,7 +65,7 @@ def test_probe_skips_models_without_api_key(monkeypatch): providers={"groq": {"base_url": "http://groq.example/v1", "api_key": ""}}, ) # No HTTP call should be made (responses not activated → would raise if called). - assert ProbeSource().fetch() == [] + assert CostProbeSource().fetch() == [] @responses.activate @@ -79,7 +79,7 @@ def test_probe_fail_soft_on_error(monkeypatch): responses.POST, "http://groq.example/v1/chat/completions", status=500, ) - assert ProbeSource().fetch() == [] + assert CostProbeSource().fetch() == [] @responses.activate @@ -94,7 +94,7 @@ def test_probe_flags_multiple_models_concurrently(monkeypatch): json={"usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2, "cost": 0.003}}, status=200, ) - evs = ProbeSource(concurrency=3).fetch() + evs = CostProbeSource(concurrency=3).fetch() assert {e.model_id for e in evs} == {"groq/m1", "groq/m2", "groq/m3"} assert all(e.is_free is False for e in evs) @@ -111,7 +111,7 @@ def test_probe_respects_max_models(monkeypatch): json={"usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2, "cost": 0.003}}, status=200, ) - evs = ProbeSource(max_models=2).fetch() + evs = CostProbeSource(max_models=2).fetch() assert len(evs) == 2 # Only the first two believed_free candidates are probed (budget bound). assert len(responses.calls) == 2 @@ -142,8 +142,8 @@ def fake_probe(self, base_url, api_key, model): with lock: state["in_flight"] -= 1 - monkeypatch.setattr(ProbeSource, "_probe", fake_probe) - evs = ProbeSource(concurrency=2).fetch() + monkeypatch.setattr(CostProbeSource, "_probe", fake_probe) + evs = CostProbeSource(concurrency=2).fetch() assert len(evs) == 6 assert state["peak"] <= 2 @@ -161,6 +161,6 @@ def test_probe_computes_cost_from_pricing(monkeypatch): json={"usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150}}, status=200, ) - evs = ProbeSource().fetch() + evs = CostProbeSource().fetch() assert len(evs) == 1 assert "cost=" in evs[0].notes From c4c2a3f7f5c20ed5e0545a82744981110fe5b558 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 19 Jun 2026 11:27:34 +0000 Subject: [PATCH 3/3] =?UTF-8?q?fix:=20update=20tests=20for=20probe?= =?UTF-8?q?=E2=86=92cost=5Fprobe=20rename,=20429=20retry,=20flaky=20fusion?= =?UTF-8?q?=20order?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - test_admin_api: check free_tier.cost_probe (not free_tier.probe) after PUT - test_probe_throttle: import/use cost_probe_* state helpers; --source cost_probe - test_fusion: use sorted() comparison for backfill panel order (parallel threads make request order non-deterministic, only set membership matters) - docs/base.py: retry GET up to 3 times on 429 with exponential-ish backoff, honouring Retry-After; return [] on exhaustion instead of raising - test_docs_huggingface: add tests for 429 retry-then-succeed and exhausted-429 returns-empty, monkeypatching time.sleep to keep tests fast Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01RkRhX4Znv73oPcZy41bnyS --- scripts/sources/docs/base.py | 24 +++++++++++++---- tests/test_admin_api.py | 4 +-- tests/test_fusion.py | 2 +- tests/test_scraper/test_docs_huggingface.py | 21 +++++++++++++++ tests/test_scraper/test_probe_throttle.py | 30 ++++++++++++--------- 5 files changed, 60 insertions(+), 21 deletions(-) diff --git a/scripts/sources/docs/base.py b/scripts/sources/docs/base.py index 9ee0615..73c3ae0 100644 --- a/scripts/sources/docs/base.py +++ b/scripts/sources/docs/base.py @@ -2,11 +2,17 @@ from __future__ import annotations +import time + import requests from ..base import Evidence, Source TIMEOUT = (5, 10) +_HEADERS = { + "User-Agent": "llmproxy-update-free-models/1.0 (+https://github.com/billjr99/llmproxy)", +} +_429_RETRY_DELAYS = (5.0, 15.0, 30.0) # seconds between successive 429 retries class DocsScraperBase(Source): @@ -21,11 +27,19 @@ class DocsScraperBase(Source): provider_key: str = "" # e.g. "google", "groq" def fetch(self) -> list[Evidence]: - resp = requests.get(self.url, timeout=TIMEOUT, headers={ - "User-Agent": "llmproxy-update-free-models/1.0 (+https://github.com/billjr99/llmproxy)", - }) - resp.raise_for_status() - return list(self.parse(resp.text)) + delays = iter(_429_RETRY_DELAYS) + while True: + resp = requests.get(self.url, timeout=TIMEOUT, headers=_HEADERS) + if resp.status_code == 429: + wait = next(delays, None) + if wait is None: + return [] # retries exhausted — skip silently rather than raise + ra = resp.headers.get("Retry-After", "") + actual_wait = float(ra) if ra.isdigit() else wait + time.sleep(min(actual_wait, 60.0)) + continue + resp.raise_for_status() + return list(self.parse(resp.text)) def parse(self, html: str) -> list[Evidence]: # noqa: D401 — overridden by subclasses raise NotImplementedError diff --git a/tests/test_admin_api.py b/tests/test_admin_api.py index 72e36b9..dc72a13 100644 --- a/tests/test_admin_api.py +++ b/tests/test_admin_api.py @@ -362,8 +362,8 @@ def test_put_maintenance_sets_flags(client, cfg_path): }) assert resp.status_code == 200 saved = _read_config(cfg_path) - assert saved["free_tier"]["probe"]["enabled"] is True - assert saved["free_tier"]["probe"]["frequency_days"] == 7 + assert saved["free_tier"]["cost_probe"]["enabled"] is True + assert saved["free_tier"]["cost_probe"]["frequency_days"] == 7 assert saved["providers_pr"]["repo"] == "BillJr99/llmproxy" assert saved["providers_pr"]["token"] == "ghp_secret" # Token is never echoed back verbatim diff --git a/tests/test_fusion.py b/tests/test_fusion.py index 783fe09..ce05400 100644 --- a/tests/test_fusion.py +++ b/tests/test_fusion.py @@ -239,7 +239,7 @@ def test_fusion_backfills_failed_panel_from_reserve(server, monkeypatch): rep = json.loads(resp.get_data())["llmproxy_fusion"] assert rep["panel"] == ["p4/m4", "p5/m5"] # reserve backfilled the failed slots assert {f["model"] for f in rep["failed_models"]} == {"p0/m0", "p1/m1", "p2/m2", "p3/m3"} - assert seen["panel"] == ["m0", "m1", "m2", "m3", "m4", "m5"] # every slot attempted once + assert sorted(seen["panel"]) == ["m0", "m1", "m2", "m3", "m4", "m5"] # every slot attempted once def test_fusion_all_panel_fail_error_lists_reasons(server, monkeypatch): diff --git a/tests/test_scraper/test_docs_huggingface.py b/tests/test_scraper/test_docs_huggingface.py index 55dc969..11b07af 100644 --- a/tests/test_scraper/test_docs_huggingface.py +++ b/tests/test_scraper/test_docs_huggingface.py @@ -41,3 +41,24 @@ def test_unrelated_page_yields_empty(): html = "

Some other HF docs page

" responses.add(responses.GET, URL, body=html, status=200) assert HuggingFaceDocs().fetch() == [] + + +@responses.activate +def test_429_retried_and_succeeds(fixtures_dir: Path, monkeypatch): + """A single 429 is retried; the scraper returns results on the next attempt.""" + monkeypatch.setattr("scripts.sources.docs.base.time.sleep", lambda _: None) + html = (fixtures_dir / "huggingface_providers.html").read_text() + responses.add(responses.GET, URL, status=429) + responses.add(responses.GET, URL, body=html, status=200, content_type="text/html") + evs = HuggingFaceDocs().fetch() + assert evs # recovered after retry + assert len(responses.calls) == 2 + + +@responses.activate +def test_429_exhausted_returns_empty(monkeypatch): + """When all retries are consumed the scraper returns [] instead of raising.""" + monkeypatch.setattr("scripts.sources.docs.base.time.sleep", lambda _: None) + for _ in range(4): # initial attempt + 3 retries + responses.add(responses.GET, URL, status=429) + assert HuggingFaceDocs().fetch() == [] diff --git a/tests/test_scraper/test_probe_throttle.py b/tests/test_scraper/test_probe_throttle.py index e400175..5fe3161 100644 --- a/tests/test_scraper/test_probe_throttle.py +++ b/tests/test_scraper/test_probe_throttle.py @@ -1,12 +1,16 @@ """Tests for the probe-frequency throttle (scripts/update_free_models._probe_due) -and the probe-state cache helpers (llmproxy.config).""" +and the cost-probe-state cache helpers (llmproxy.config).""" from __future__ import annotations import json from datetime import UTC, datetime, timedelta -from llmproxy.config import get_probe_state_path, load_probe_state, save_probe_state +from llmproxy.config import ( + get_cost_probe_state_path, + load_cost_probe_state, + save_cost_probe_state, +) from scripts.update_free_models import _probe_due @@ -66,21 +70,21 @@ def test_non_numeric_frequency_defaults_to_always_due(): assert due is True -# --- probe-state cache helpers --------------------------------------------- +# --- cost-probe-state cache helpers ----------------------------------------- def test_probe_state_roundtrip(tmp_path): cfg = str(tmp_path / "config.json") - assert load_probe_state(cfg) == {} + assert load_cost_probe_state(cfg) == {} ts = datetime.now(UTC).isoformat() - assert save_probe_state({"last_probe_at": ts}, cfg) is True - assert get_probe_state_path(cfg) == tmp_path / "probe_state.json" - assert load_probe_state(cfg) == {"last_probe_at": ts} + assert save_cost_probe_state({"last_probe_at": ts}, cfg) is True + assert get_cost_probe_state_path(cfg) == tmp_path / "cost_probe_state.json" + assert load_cost_probe_state(cfg) == {"last_probe_at": ts} def test_probe_state_corrupt_returns_empty(tmp_path): cfg = str(tmp_path / "config.json") - get_probe_state_path(cfg).write_text("{not json", encoding="utf-8") - assert load_probe_state(cfg) == {} + get_cost_probe_state_path(cfg).write_text("{not json", encoding="utf-8") + assert load_cost_probe_state(cfg) == {} class _ReadOnlyPath: @@ -97,7 +101,7 @@ def __str__(self): def test_probe_timestamp_recorded_when_sidecar_write_fails(tmp_path, monkeypatch): """A read-only providers.json must not prevent the probe-throttle timestamp - (which lives next to the user config, e.g. /config/probe_state.json) from + (which lives next to the user config, e.g. /config/cost_probe_state.json) from advancing — regression for the container bind-mount case.""" import scripts.update_free_models as ufm @@ -108,10 +112,10 @@ def test_probe_timestamp_recorded_when_sidecar_write_fails(tmp_path, monkeypatch monkeypatch.setattr(ufm, "apply_updates", lambda *a, **k: True) monkeypatch.setattr(ufm, "DATA_PATH", _ReadOnlyPath()) - rc = ufm.main(["--source", "probe", "--config", str(cfg)]) + rc = ufm.main(["--source", "cost_probe", "--config", str(cfg)]) assert rc == 0 # the run completed despite the read-only sidecar - state = load_probe_state(str(cfg)) + state = load_cost_probe_state(str(cfg)) assert "last_probe_at" in state # throttle timestamp persisted to the bind mount @@ -126,7 +130,7 @@ def test_sidecar_mirrored_to_config_dir_when_readonly(tmp_path, monkeypatch): monkeypatch.setattr(ufm, "apply_updates", lambda *a, **k: True) monkeypatch.setattr(ufm, "DATA_PATH", _ReadOnlyPath()) - ufm.main(["--source", "probe", "--config", str(cfg)]) + ufm.main(["--source", "cost_probe", "--config", str(cfg)]) mirrored_providers = tmp_path / "providers.json" mirrored_example = tmp_path / "config.example.json"