From 897c63cec9b4913617128341859555c337c658f9 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 11:00:59 +0000
Subject: [PATCH 1/3] Add endpoint probe, rename cost_probe, add PR/probe
 interval throttling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename scripts/sources/probe.py → cost_probe.py; class ProbeSource →
  CostProbeSource; config key free_tier.probe → free_tier.cost_probe;
  state file probe_state.json → cost_probe_state.json (with auto-migration).
  Back-compat aliases preserved in config.py and admin.py so existing
  configs and admin API keep working.

- Add scripts/sources/endpoint_probe.py (EndpointProbeSource): calls each
  configured provider's GET /v1/models, emits positive Evidence for new
  :free-suffixed models not yet in believed_free and negative Evidence for
  :free models that disappeared. Gated by sync_on_startup or
  update_on_startup (no separate enabled flag needed). Throttled by
  free_tier.endpoint_probe.frequency_minutes (default 30).

- Add providers_pr.frequency_days throttle: _maybe_open_providers_pr()
  checks pr_state.json before opening a PR; saves last_pr_at on success.
  Accumulated sidecar changes are bundled into one PR per interval.

- Add _maybe_fire_interval_probes() in server.py: debounced (60 s gate)
  check called on startup and every request. Fires endpoint probe
  (sync_on_startup / update_on_startup), cost probe (update_on_startup +
  cost_probe.enabled), and PR creation (frequency_days elapsed) as
  single-flight daemon threads when their intervals have elapsed.

- New config keys: free_tier.endpoint_probe.{frequency_minutes,timeout_sec},
  free_tier.cost_probe (renamed), providers_pr.frequency_days.
  State files: cost_probe_state.json, endpoint_probe_state.json, pr_state.json.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01RkRhX4Znv73oPcZy41bnyS
---
 llmproxy/admin.py                           |   6 +-
 llmproxy/config.py                          | 118 ++++++++---
 llmproxy/server.py                          | 212 +++++++++++++++++++-
 llmproxy/usage.py                           |   2 +-
 scripts/sources/__init__.py                 |  16 +-
 scripts/sources/{probe.py => cost_probe.py} |  12 +-
 scripts/sources/endpoint_probe.py           | 131 ++++++++++++
 scripts/update_free_models.py               | 151 ++++++++------
 8 files changed, 551 insertions(+), 97 deletions(-)
 rename scripts/sources/{probe.py => cost_probe.py} (95%)
 create mode 100644 scripts/sources/endpoint_probe.py

diff --git a/llmproxy/admin.py b/llmproxy/admin.py
index f647d81..8f899ca 100644
--- a/llmproxy/admin.py
+++ b/llmproxy/admin.py
@@ -389,12 +389,12 @@ def admin_index():
 # the single-page admin UI unchanged while the on-disk config uses the grouped
 # objects (and the config loader's migration shim accepts either form on input).
 _MAINTENANCE_PATHS: dict[str, tuple[str, ...]] = {
-    "probe_cost": ("free_tier", "probe", "enabled"),
-    "autoremove_believed_free": ("free_tier", "probe", "autoremove"),
+    "probe_cost": ("free_tier", "cost_probe", "enabled"),
+    "autoremove_believed_free": ("free_tier", "cost_probe", "autoremove"),
     "update_believed_free_on_startup": ("free_tier", "update_on_startup"),
     "pr_providers_list": ("providers_pr", "enabled"),
     "sync_believed_free_on_startup": ("free_tier", "sync_on_startup"),
-    "probe_frequency_days": ("free_tier", "probe", "frequency_days"),
+    "probe_frequency_days": ("free_tier", "cost_probe", "frequency_days"),
     "pr_providers_repo": ("providers_pr", "repo"),
     "pr_providers_base": ("providers_pr", "base"),
     "pr_providers_branch": ("providers_pr", "branch"),
diff --git a/llmproxy/config.py b/llmproxy/config.py
index 4650ea1..68f275f 100755
--- a/llmproxy/config.py
+++ b/llmproxy/config.py
@@ -135,7 +135,11 @@ def get_config_path(override: str | None = None) -> Path:
 DEFAULT_FREE_TIER_CONFIG = {
     "sync_on_startup": True,
     "update_on_startup": False,
-    "probe": {
+    "endpoint_probe": {
+        "frequency_minutes": 30,
+        "timeout_sec": 10,
+    },
+    "cost_probe": {
         "enabled": False,
         "autoremove": False,
         "frequency_days": 0,
@@ -193,9 +197,9 @@ def get_config_path(override: str | None = None) -> Path:
 _LEGACY_KEY_MIGRATIONS: dict[str, tuple[str, ...]] = {
     "sync_believed_free_on_startup": ("free_tier", "sync_on_startup"),
     "update_believed_free_on_startup": ("free_tier", "update_on_startup"),
-    "probe_cost": ("free_tier", "probe", "enabled"),
-    "autoremove_believed_free": ("free_tier", "probe", "autoremove"),
-    "probe_frequency_days": ("free_tier", "probe", "frequency_days"),
+    "probe_cost": ("free_tier", "cost_probe", "enabled"),
+    "autoremove_believed_free": ("free_tier", "cost_probe", "autoremove"),
+    "probe_frequency_days": ("free_tier", "cost_probe", "frequency_days"),
     "pr_providers_list": ("providers_pr", "enabled"),
     "pr_providers_repo": ("providers_pr", "repo"),
     "pr_providers_base": ("providers_pr", "base"),
@@ -317,22 +321,14 @@ def save_config(config: dict, config_path: str | None = None) -> bool:
 
 
 # ---------------------------------------------------------------------------
-# Probe state (machine-managed cache, kept out of the user-edited config)
+# Probe state (machine-managed caches, kept out of the user-edited config)
 # ---------------------------------------------------------------------------
 #
-# The cost probe (scripts/sources/probe.py) can be throttled to at most once
-# every ``probe_frequency_days``. The last-run timestamp is stored in a small
-# sibling cache file rather than in config.json so we don't churn the
-# hand-edited config (or sweep the timestamp into the providers-PR / sync flow).
-
-def get_probe_state_path(config_path: str | None = None) -> Path:
-    """Return the path to the probe-state cache file, a sibling of config.json."""
-    return get_config_path(config_path).parent / "probe_state.json"
+# Each probe (cost_probe, endpoint_probe) and PR creation is throttled via its
+# own frequency setting. The last-run timestamps live in small sibling cache
+# files rather than in config.json so we don't churn the hand-edited config.
 
-
-def load_probe_state(config_path: str | None = None) -> dict:
-    """Load the probe-state cache, returning {} when absent or unreadable."""
-    path = get_probe_state_path(config_path)
+def _load_state_file(path: Path, label: str) -> dict:
     if not path.exists():
         return {}
     try:
@@ -340,13 +336,11 @@ def load_probe_state(config_path: str | None = None) -> dict:
             data = json.load(fh)
         return data if isinstance(data, dict) else {}
     except Exception as e:  # noqa: BLE001 — a corrupt cache must never break a run
-        print(f"[config:load_probe_state] Failed to load {path}: {e}")
+        print(f"[config:{label}] Failed to load {path}: {e}")
         return {}
 
 
-def save_probe_state(state: dict, config_path: str | None = None) -> bool:
-    """Persist the probe-state cache atomically (tempfile + rename)."""
-    path = get_probe_state_path(config_path)
+def _save_state_file(state: dict, path: Path, label: str) -> bool:
     try:
         path.parent.mkdir(parents=True, exist_ok=True)
         fd, tmp_name = tempfile.mkstemp(
@@ -366,10 +360,80 @@ def save_probe_state(state: dict, config_path: str | None = None) -> bool:
             raise
         return True
     except Exception as e:  # noqa: BLE001
-        print(f"[config:save_probe_state] Failed to write {path}: {e}")
+        print(f"[config:{label}] Failed to write {path}: {e}")
         return False
 
 
+# --- Cost probe state (cost_probe_state.json) ---
+
+def get_cost_probe_state_path(config_path: str | None = None) -> Path:
+    return get_config_path(config_path).parent / "cost_probe_state.json"
+
+
+def load_cost_probe_state(config_path: str | None = None) -> dict:
+    """Load cost probe state, migrating from the old probe_state.json if needed."""
+    path = get_cost_probe_state_path(config_path)
+    if not path.exists():
+        old = path.parent / "probe_state.json"
+        if old.exists():
+            data = _load_state_file(old, "load_cost_probe_state")
+            if data:
+                _save_state_file(data, path, "load_cost_probe_state")
+                return data
+    return _load_state_file(path, "load_cost_probe_state")
+
+
+def save_cost_probe_state(state: dict, config_path: str | None = None) -> bool:
+    return _save_state_file(
+        state, get_cost_probe_state_path(config_path), "save_cost_probe_state"
+    )
+
+
+# Back-compat aliases so any external callers of the old names still work.
+def get_probe_state_path(config_path: str | None = None) -> Path:
+    return get_cost_probe_state_path(config_path)
+
+
+def load_probe_state(config_path: str | None = None) -> dict:
+    return load_cost_probe_state(config_path)
+
+
+def save_probe_state(state: dict, config_path: str | None = None) -> bool:
+    return save_cost_probe_state(state, config_path)
+
+
+# --- Endpoint probe state (endpoint_probe_state.json) ---
+
+def get_endpoint_probe_state_path(config_path: str | None = None) -> Path:
+    return get_config_path(config_path).parent / "endpoint_probe_state.json"
+
+
+def load_endpoint_probe_state(config_path: str | None = None) -> dict:
+    return _load_state_file(get_endpoint_probe_state_path(config_path), "load_endpoint_probe_state")
+
+
+def save_endpoint_probe_state(state: dict, config_path: str | None = None) -> bool:
+    return _save_state_file(
+        state, get_endpoint_probe_state_path(config_path), "save_endpoint_probe_state"
+    )
+
+
+# --- PR state (pr_state.json) ---
+
+def get_pr_state_path(config_path: str | None = None) -> Path:
+    return get_config_path(config_path).parent / "pr_state.json"
+
+
+def load_pr_state(config_path: str | None = None) -> dict:
+    return _load_state_file(get_pr_state_path(config_path), "load_pr_state")
+
+
+def save_pr_state(state: dict, config_path: str | None = None) -> bool:
+    return _save_state_file(
+        state, get_pr_state_path(config_path), "save_pr_state"
+    )
+
+
 # ---------------------------------------------------------------------------
 # Provider helpers
 # ---------------------------------------------------------------------------
@@ -658,10 +722,16 @@ def _normalize_config(raw: dict) -> dict:
     """
     if not isinstance(raw, dict):
         return raw
-    if not any(k in raw for k in _LEGACY_KEY_MIGRATIONS):
-        return raw
 
     normalized = copy.deepcopy(raw)
+
+    # Migrate free_tier.probe → free_tier.cost_probe (renamed in this release).
+    ft = normalized.get("free_tier")
+    if isinstance(ft, dict) and "probe" in ft and "cost_probe" not in ft:
+        ft["cost_probe"] = ft.pop("probe")
+
+    if not any(k in normalized for k in _LEGACY_KEY_MIGRATIONS):
+        return normalized
     for legacy_key, path in _LEGACY_KEY_MIGRATIONS.items():
         if legacy_key not in normalized:
             continue
diff --git a/llmproxy/server.py b/llmproxy/server.py
index ba8b965..ca98ddc 100755
--- a/llmproxy/server.py
+++ b/llmproxy/server.py
@@ -470,6 +470,22 @@ def _reset_usage() -> None:
 _startup_update_done: bool = False
 _startup_update_lock = threading.Lock()
 
+# ---------------------------------------------------------------------------
+# Periodic interval probe checks (endpoint probe, cost probe, PR creation)
+# ---------------------------------------------------------------------------
+# State files are re-read at most once per _PROBE_INTERVAL_GATE_SEC so
+# concurrent requests don't all hit disk simultaneously. The actual probe
+# frequency is controlled by the per-probe frequency_minutes / frequency_days
+# settings in config.json.
+_PROBE_INTERVAL_GATE_SEC = 60   # check state files at most once per minute
+_last_probe_interval_check: float = 0.0
+_probe_interval_check_lock = threading.Lock()
+
+_endpoint_probe_inflight: bool = False
+_endpoint_probe_lock = threading.Lock()
+_cost_probe_inflight: bool = False
+_cost_probe_lock = threading.Lock()
+
 # ---------------------------------------------------------------------------
 # Short-lived response cache (non-streaming only)
 # ---------------------------------------------------------------------------
@@ -520,6 +536,9 @@ def _log_request() -> None:
     # deployments where the eager per-worker trigger in __main__ did not fire; it
     # is a no-op after the first invocation.
     _run_startup_tasks_once()
+    # Check probe / PR frequency intervals on every request (debounced by
+    # _PROBE_INTERVAL_GATE_SEC so state files are read at most once per minute).
+    _maybe_fire_interval_probes()
     logger.info("→ %s %s", request.method, request.path)
 
 
@@ -1281,8 +1300,8 @@ def _run_free_models_update(config: dict, config_path: str | None) -> bool:
     regenerates config.example.json, and syncs the user config; its changes are
     picked up by the normal mtime-based config reload.
 
-    When config['probe_cost'] is true the updater also actively probes
-    believed_free models for cost (see scripts/sources/probe.py).
+    When free_tier.cost_probe.enabled is true the updater also actively probes
+    believed_free models for cost (see scripts/sources/cost_probe.py).
     """
     # The scraper lives in the repo-root `scripts/` package, which sits next to
     # the installed `llmproxy/` package but may not be on sys.path (e.g. under
@@ -1367,6 +1386,160 @@ def _run_free_models_update(config: dict, config_path: str | None) -> bool:
     return True
 
 
+def _maybe_fire_interval_probes(config_path: str | None = None) -> None:
+    """Check frequency intervals for endpoint probe, cost probe, and PR creation.
+
+    Fires each as a background daemon thread if its interval has elapsed.
+    Gated by _PROBE_INTERVAL_GATE_SEC so state files are not read on every
+    single request — the actual probe frequency is set in config.json.
+
+    Endpoint probe: gated by sync_on_startup OR update_on_startup.
+    Cost probe: gated by update_on_startup AND cost_probe.enabled.
+    PR creation: checked independently of startup flags.
+    """
+    global _last_probe_interval_check
+    now = time.monotonic()
+    with _probe_interval_check_lock:
+        if now - _last_probe_interval_check < _PROBE_INTERVAL_GATE_SEC:
+            return
+        _last_probe_interval_check = now
+
+    try:
+        config = load_config()
+    except Exception:  # noqa: BLE001
+        return
+    free_tier = config.get("free_tier", {}) if isinstance(config.get("free_tier"), dict) else {}
+
+    # Endpoint probe — gated by sync_on_startup OR update_on_startup.
+    if free_tier.get("sync_on_startup") or free_tier.get("update_on_startup"):
+        _maybe_fire_endpoint_probe(config, free_tier, config_path)
+
+    # Cost probe — gated by update_on_startup + cost_probe.enabled.
+    if free_tier.get("update_on_startup") and free_tier.get("cost_probe", {}).get("enabled"):
+        _maybe_fire_cost_probe(config, free_tier, config_path)
+
+    # PR creation interval — independent of startup flags.
+    _maybe_fire_pr_if_due(config, config_path)
+
+
+def _maybe_fire_endpoint_probe(
+    config: dict, free_tier: dict, config_path: str | None
+) -> None:
+    ep_cfg = free_tier.get("endpoint_probe", {})
+    freq_min = ep_cfg.get("frequency_minutes", 30)
+    freq_days = freq_min / 1440.0
+    try:
+        import sys, os as _os
+        repo_root = _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__)))
+        if repo_root not in sys.path:
+            sys.path.insert(0, repo_root)
+        from scripts.update_free_models import _probe_due
+        from llmproxy.config import load_endpoint_probe_state
+    except Exception:  # noqa: BLE001 — scripts/ may not be available
+        return
+    state = load_endpoint_probe_state(config_path)
+    due, _ = _probe_due(state.get("last_probe_at"), freq_days)
+    if not due:
+        return
+
+    global _endpoint_probe_inflight
+    with _endpoint_probe_lock:
+        if _endpoint_probe_inflight:
+            return
+        _endpoint_probe_inflight = True
+
+    def _run() -> None:
+        global _endpoint_probe_inflight
+        try:
+            logger.info("[endpoint-probe] interval due — running endpoint probe")
+            _run_free_models_update(load_config(), config_path)
+            with _models_list_cache_lock:
+                global _models_list_cache
+                _models_list_cache = None
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("[endpoint-probe] failed: %s", exc)
+        finally:
+            with _endpoint_probe_lock:
+                _endpoint_probe_inflight = False
+
+    threading.Thread(target=_run, daemon=True, name="endpoint-probe-interval").start()
+
+
+def _maybe_fire_cost_probe(
+    config: dict, free_tier: dict, config_path: str | None
+) -> None:
+    cost_probe_cfg = free_tier.get("cost_probe", {})
+    freq_days = cost_probe_cfg.get("frequency_days", 0)
+    try:
+        import sys, os as _os
+        repo_root = _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__)))
+        if repo_root not in sys.path:
+            sys.path.insert(0, repo_root)
+        from scripts.update_free_models import _probe_due
+        from llmproxy.config import load_cost_probe_state
+    except Exception:  # noqa: BLE001
+        return
+    state = load_cost_probe_state(config_path)
+    due, _ = _probe_due(state.get("last_probe_at"), freq_days)
+    if not due:
+        return
+
+    global _cost_probe_inflight
+    with _cost_probe_lock:
+        if _cost_probe_inflight:
+            return
+        _cost_probe_inflight = True
+
+    def _run() -> None:
+        global _cost_probe_inflight
+        try:
+            logger.info("[cost-probe] interval due — running cost probe")
+            _run_free_models_update(load_config(), config_path)
+            with _models_list_cache_lock:
+                global _models_list_cache
+                _models_list_cache = None
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("[cost-probe] failed: %s", exc)
+        finally:
+            with _cost_probe_lock:
+                _cost_probe_inflight = False
+
+    threading.Thread(target=_run, daemon=True, name="cost-probe-interval").start()
+
+
+def _maybe_fire_pr_if_due(config: dict, config_path: str | None) -> None:
+    """Open a providers PR if providers_pr.frequency_days has elapsed since last PR."""
+    pr_cfg = config.get("providers_pr", {})
+    if pr_cfg.get("enabled") is not True:
+        return
+    freq_days = pr_cfg.get("frequency_days", 0)
+    if not freq_days or freq_days <= 0:
+        return  # no throttle configured — PR is opened immediately after updates
+    try:
+        import sys, os as _os
+        repo_root = _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__)))
+        if repo_root not in sys.path:
+            sys.path.insert(0, repo_root)
+        from scripts.update_free_models import _probe_due
+        from llmproxy.config import load_pr_state
+        from llmproxy.providers import DATA_PATH as _DATA_PATH
+    except Exception:  # noqa: BLE001
+        return
+    pr_state = load_pr_state(config_path)
+    due, days_since = _probe_due(pr_state.get("last_pr_at"), freq_days)
+    if not due:
+        return
+    # Read current sidecar to pass to _maybe_open_providers_pr.
+    try:
+        providers_text = _DATA_PATH.read_text(encoding="utf-8") if _DATA_PATH.exists() else None
+    except Exception:  # noqa: BLE001
+        providers_text = None
+    if providers_text is None:
+        return
+    logger.info("[providers-pr] frequency_days interval elapsed — checking for PR")
+    _maybe_open_providers_pr(config, providers_text)
+
+
 def _run_startup_tasks_once(config_path: str | None = None) -> None:
     """Run the one-time per-worker startup tasks in a background daemon thread.
 
@@ -1444,6 +1617,10 @@ def _run() -> None:
         except Exception as exc:  # noqa: BLE001 — warming must never crash the worker
             logger.warning("[startup] /v1/models cache warm failed: %s", exc)
 
+        # 6. Check frequency intervals for endpoint probe, cost probe, and PR
+        #    creation. Fires background threads for any that are due.
+        _maybe_fire_interval_probes(config_path)
+
     threading.Thread(target=_run, daemon=True, name="startup-tasks").start()
 
 
@@ -1465,6 +1642,31 @@ def _maybe_open_providers_pr(config: dict, providers_text: str, example_text: st
     """
     if config.get("providers_pr", {}).get("enabled") is not True:
         return
+
+    # Throttle PR creation to at most once every providers_pr.frequency_days.
+    pr_cfg = config.get("providers_pr", {})
+    freq_days = pr_cfg.get("frequency_days", 0)
+    if freq_days and freq_days > 0:
+        try:
+            import sys as _sys, os as _os2
+            repo_root = _os2.path.dirname(_os2.path.dirname(_os2.path.abspath(__file__)))
+            if repo_root not in _sys.path:
+                _sys.path.insert(0, repo_root)
+            from scripts.update_free_models import _probe_due
+            from .config import load_pr_state, save_pr_state as _save_pr_state
+        except Exception:  # noqa: BLE001
+            _probe_due = None  # type: ignore[assignment]
+        if _probe_due is not None:
+            pr_state = load_pr_state()
+            due, days_since = _probe_due(pr_state.get("last_pr_at"), freq_days)
+            if not due:
+                logger.info(
+                    "[providers-pr] throttled — %.1f day(s) since last PR "
+                    "(frequency_days=%s); skipping.",
+                    days_since, freq_days,
+                )
+                return
+
     import os
 
     from .github_pr import create_or_update_pr
@@ -1514,6 +1716,12 @@ def _maybe_open_providers_pr(config: dict, providers_text: str, example_text: st
         )
         if url:
             logger.info("[providers-pr] %s", url)
+            try:
+                from datetime import UTC as _UTC, datetime as _datetime
+                from .config import save_pr_state as _save_pr_state2
+                _save_pr_state2({"last_pr_at": _datetime.now(_UTC).isoformat()})
+            except Exception as _exc:  # noqa: BLE001
+                logger.warning("[providers-pr] could not save pr_state: %s", _exc)
     except Exception as exc:  # noqa: BLE001 — PR creation is best-effort
         logger.warning("[providers-pr] failed to open PR: %s", exc)
 
diff --git a/llmproxy/usage.py b/llmproxy/usage.py
index 914a9be..4864e92 100644
--- a/llmproxy/usage.py
+++ b/llmproxy/usage.py
@@ -2,7 +2,7 @@
 
 This module holds the *pure* (web-framework-free) pieces of usage tracking so
 they can be unit-tested in isolation and reused by the offline scraper probe
-(scripts/sources/probe.py):
+(scripts/sources/cost_probe.py):
 
   * ``ModelUsage`` — a thread-safe per-model counter that tracks both requests
     (sliding 60s window + per-day) and tokens (same windows) plus lifetime
diff --git a/scripts/sources/__init__.py b/scripts/sources/__init__.py
index d96eea5..d266e02 100644
--- a/scripts/sources/__init__.py
+++ b/scripts/sources/__init__.py
@@ -10,7 +10,8 @@
 from .fireworks import FireworksSource
 from .litellm_cost_map import LiteLLMCostMapSource
 from .openrouter import OpenRouterSource
-from .probe import ProbeSource
+from .cost_probe import CostProbeSource
+from .endpoint_probe import EndpointProbeSource
 from .together import TogetherSource
 
 ALL_SOURCES: dict[str, type[Source]] = {
@@ -21,12 +22,17 @@
     "together": TogetherSource,
     "fireworks": FireworksSource,
     # Active cost probe. Excluded from the default source set because it sends
-    # real requests; opt in via config probe_cost: true or --probe.
-    "probe": ProbeSource,
+    # real requests; opt in via free_tier.cost_probe.enabled or --cost-probe.
+    "cost_probe": CostProbeSource,
+    # Endpoint discovery probe. Excluded from the default source set because it
+    # makes authenticated GET /models requests; opt in via sync_on_startup or
+    # update_on_startup, or pass --source endpoint_probe.
+    "endpoint_probe": EndpointProbeSource,
 }
 
-# Sources that must NOT run unless explicitly opted into (they spend real quota).
-OPT_IN_SOURCES: frozenset[str] = frozenset({"probe"})
+# Sources that must NOT run unless explicitly opted into (they spend real quota
+# or make authenticated requests beyond the normal scrape set).
+OPT_IN_SOURCES: frozenset[str] = frozenset({"cost_probe", "endpoint_probe"})
 
 # Docs scrapers are registered separately so we can list them under --source docs.
 from .docs import DOCS_SOURCES  # noqa: E402
diff --git a/scripts/sources/probe.py b/scripts/sources/cost_probe.py
similarity index 95%
rename from scripts/sources/probe.py
rename to scripts/sources/cost_probe.py
index 74428fb..da747cf 100644
--- a/scripts/sources/probe.py
+++ b/scripts/sources/cost_probe.py
@@ -1,4 +1,4 @@
-"""probe.py — active cost probe for believed-free models.
+"""cost_probe.py — active cost probe for believed-free models.
 
 Unlike every other source (which reads docs / pricing pages / /models
 catalogs), this source sends a *real*, minimal chat completion to each model in
@@ -8,9 +8,9 @@
 high-confidence ``is_free=False`` so the aggregator can flag/remove it.
 
 This source spends real quota (and possibly real money), so it is OFF by
-default: it only runs when the user sets ``probe_cost: true`` in config.json or
-passes ``--probe`` on the command line, and it skips any provider that has no
-configured API key.
+default: it only runs when the user sets ``cost_probe.enabled: true`` in
+config.json or passes ``--cost-probe`` on the command line, and it skips any
+provider that has no configured API key.
 
 Probes run with bounded *per-provider* concurrency: requests to different
 providers overlap, but each provider is capped (``concurrency``, default 3) so
@@ -62,8 +62,8 @@ def _make_progress(total: int, desc: str):
 }
 
 
-class ProbeSource(Source):
-    name = "probe"
+class CostProbeSource(Source):
+    name = "cost_probe"
 
     def __init__(
         self,
diff --git a/scripts/sources/endpoint_probe.py b/scripts/sources/endpoint_probe.py
new file mode 100644
index 0000000..57a3882
--- /dev/null
+++ b/scripts/sources/endpoint_probe.py
@@ -0,0 +1,131 @@
+"""endpoint_probe.py — discovers :free-suffixed models via GET /v1/models.
+
+Unlike cost_probe.py (which sends real chat completions to cost-verify models),
+this source only makes authenticated GET requests to each configured provider's
+/v1/models endpoint. It emits:
+
+  * positive Evidence for model IDs ending in ':free' that are returned by the
+    endpoint but are not yet in believed_free — new free models to consider adding.
+  * negative Evidence for model IDs ending in ':free' that *are* in believed_free
+    but are no longer returned by the endpoint — models that may have been removed.
+
+Non-:free believed_free models are ignored by this source; their presence/absence
+in the /v1/models listing is handled by other sources (e.g. api_models).
+
+This source makes authenticated network calls but does not spend quota, so it
+is controlled by the existing sync_on_startup / update_on_startup startup flags
+rather than requiring its own 'enabled' toggle.
+"""
+
+from __future__ import annotations
+
+import requests
+
+from llmproxy.config import (
+    get_provider,
+    load_config,
+    provider_api_key,
+    provider_base_url,
+)
+from llmproxy.providers import load_data
+
+from .base import Evidence, Source
+
+DEFAULT_TIMEOUT = 10
+
+
+class EndpointProbeSource(Source):
+    name = "endpoint_probe"
+
+    def __init__(
+        self,
+        config_path: str | None = None,
+        provider_filter: str | None = None,
+        timeout: int = DEFAULT_TIMEOUT,
+    ) -> None:
+        self.config_path = config_path
+        self.provider_filter = provider_filter
+        self.timeout = timeout
+
+    def fetch(self) -> list[Evidence]:
+        config = load_config(self.config_path, force_reload=True)
+        sidecar = load_data()
+
+        # Build set of believed_free :free-suffixed models per provider.
+        believed_free_per_provider: dict[str, set[str]] = {}
+        for prov_name, prov_data in sidecar.get("providers", {}).items():
+            for qualified in prov_data.get("believed_free", []):
+                if "/" not in qualified:
+                    continue
+                p, m = qualified.split("/", 1)
+                if m.endswith(":free"):
+                    believed_free_per_provider.setdefault(p, set()).add(m)
+
+        evidence: list[Evidence] = []
+        for provider_name, provider_cfg in config.get("providers", {}).items():
+            if self.provider_filter and provider_name != self.provider_filter:
+                continue
+            if not get_provider(config, provider_name):
+                continue
+            api_key = provider_api_key(provider_cfg)
+            if not api_key:
+                continue
+            base_url = provider_base_url(provider_cfg)
+            if not base_url:
+                continue
+
+            endpoint_ids = self._fetch_model_ids(base_url, api_key)
+            if endpoint_ids is None:
+                continue  # network failure — emit no opinion (fail-soft)
+
+            endpoint_free = {m for m in endpoint_ids if m.endswith(":free")}
+            known_free = believed_free_per_provider.get(provider_name, set())
+            models_url = f"{base_url}/models"
+
+            # Positive: new :free models not yet in believed_free.
+            for model_id in sorted(endpoint_free - known_free):
+                evidence.append(Evidence(
+                    provider=provider_name,
+                    model_id=f"{provider_name}/{model_id}",
+                    is_free=True,
+                    source=self.name,
+                    confidence="high",
+                    url=models_url,
+                    notes="endpoint lists model with :free suffix; not yet in believed_free",
+                ))
+
+            # Negative: believed_free :free models absent from endpoint.
+            for model_id in sorted(known_free - endpoint_free):
+                evidence.append(Evidence(
+                    provider=provider_name,
+                    model_id=f"{provider_name}/{model_id}",
+                    is_free=False,
+                    source=self.name,
+                    confidence="high",
+                    url=models_url,
+                    notes="endpoint no longer lists this :free model",
+                ))
+
+        return evidence
+
+    def _fetch_model_ids(self, base_url: str, api_key: str) -> list[str] | None:
+        """GET {base_url}/models and return the list of model id strings, or None on error."""
+        try:
+            resp = requests.get(
+                f"{base_url}/models",
+                headers={"Authorization": f"Bearer {api_key}"},
+                timeout=self.timeout,
+            )
+            if resp.status_code >= 400:
+                return None
+            data = resp.json()
+            models = data.get("data") if isinstance(data, dict) else data
+            if not isinstance(models, list):
+                return None
+            return [
+                m.get("id", "")
+                for m in models
+                if isinstance(m, dict) and m.get("id")
+            ]
+        except Exception:  # noqa: BLE001 — any failure means no opinion
+            return None
diff --git a/scripts/update_free_models.py b/scripts/update_free_models.py
index 152b44f..ab79ef1 100644
--- a/scripts/update_free_models.py
+++ b/scripts/update_free_models.py
@@ -9,7 +9,8 @@
   python scripts/update_free_models.py --source openrouter,docs
   python scripts/update_free_models.py --regen-config-only
   python scripts/update_free_models.py --config ~/.config/llmproxy/config.json
-  python scripts/update_free_models.py --probe --probe-concurrency 2
+  python scripts/update_free_models.py --cost-probe --cost-probe-concurrency 2
+  python scripts/update_free_models.py --endpoint-probe
 
 Behavior
 --------
@@ -48,9 +49,11 @@
     load_config as load_user_config,
 )
 from llmproxy.config import (  # noqa: E402
-    load_probe_state,
+    load_cost_probe_state,
+    load_endpoint_probe_state,
     save_config,
-    save_probe_state,
+    save_cost_probe_state,
+    save_endpoint_probe_state,
 )
 from llmproxy.providers import (  # noqa: E402
     DATA_PATH,
@@ -59,8 +62,9 @@
     load_data,
 )
 from scripts.sources import ALL_SOURCES, OPT_IN_SOURCES, Evidence  # noqa: E402
+from scripts.sources.cost_probe import CostProbeSource  # noqa: E402
+from scripts.sources.endpoint_probe import EndpointProbeSource  # noqa: E402
 from scripts.sources.litellm_cost_map import fetch_pricing_map  # noqa: E402
-from scripts.sources.probe import ProbeSource  # noqa: E402
 
 CONFIG_EXAMPLE_PATH = REPO_ROOT / "config.example.json"
 
@@ -566,7 +570,11 @@ def regenerate_config_example(sidecar: dict, server_block: dict | None = None,
         "free_tier": {
             "sync_on_startup": True,
             "update_on_startup": False,
-            "probe": {
+            "endpoint_probe": {
+                "frequency_minutes": 30,
+                "timeout_sec": 10,
+            },
+            "cost_probe": {
                 "enabled": False,
                 "autoremove": False,
                 "frequency_days": 0,
@@ -574,6 +582,7 @@ def regenerate_config_example(sidecar: dict, server_block: dict | None = None,
         },
         "providers_pr": {
             "enabled": False,
+            "frequency_days": 0,
             "repo": None,
             "base": "main",
             "branch": "llmproxy-auto/providers",
@@ -835,12 +844,16 @@ def _run_source(
     probe_max: int | None = None,
     probe_provider: str | None = None,
     probe_concurrency: int | None = None,
+    endpoint_probe_timeout: int = 10,
 ) -> tuple[str, bool, list[Evidence], str | None]:
     try:
-        if source_name == "probe":
-            src = ProbeSource(config_path=config_path, max_models=probe_max,
-                              provider_filter=probe_provider,
-                              concurrency=probe_concurrency)
+        if source_name == "cost_probe":
+            src = CostProbeSource(config_path=config_path, max_models=probe_max,
+                                  provider_filter=probe_provider,
+                                  concurrency=probe_concurrency)
+        elif source_name == "endpoint_probe":
+            src = EndpointProbeSource(config_path=config_path,
+                                      timeout=endpoint_probe_timeout)
         else:
             cls = ALL_SOURCES[source_name]
             src = cls()
@@ -852,7 +865,7 @@ def _run_source(
 
 def _probe_due(last_probe_at: str | None, frequency_days, now: datetime | None = None
                ) -> tuple[bool, float | None]:
-    """Decide whether the cost probe is due to run.
+    """Decide whether a probe/PR is due to run.
 
     Returns ``(due, days_since_last)``. The probe is due when:
       * ``frequency_days`` is missing or <= 0 (no throttle — run every time), or
@@ -889,22 +902,26 @@ def main(argv: list[str] | None = None) -> int:
                     help="Limit updates to a single provider (e.g. 'google').")
     ap.add_argument("--source", default=",".join(default_sources),
                     help="Comma-separated source names (default: all except opt-in probes).")
-    ap.add_argument("--probe", action="store_true",
+    ap.add_argument("--cost-probe", action="store_true",
                     help="Actively probe believed_free models for cost (sends real "
                          "requests; requires configured API keys). Also enabled by "
-                         "setting probe_cost: true in config.json.")
-    ap.add_argument("--probe-max", type=int, metavar="N",
-                    help="Probe at most N models (bounds spend).")
-    ap.add_argument("--probe-provider", metavar="NAME",
-                    help="Only probe models from this provider.")
-    ap.add_argument("--probe-concurrency", type=int, metavar="N",
-                    help="Max concurrent probe requests per provider (default 3). "
+                         "setting free_tier.cost_probe.enabled: true in config.json.")
+    ap.add_argument("--cost-probe-max", type=int, metavar="N",
+                    help="Cost-probe at most N models (bounds spend).")
+    ap.add_argument("--cost-probe-provider", metavar="NAME",
+                    help="Only cost-probe models from this provider.")
+    ap.add_argument("--cost-probe-concurrency", type=int, metavar="N",
+                    help="Max concurrent cost-probe requests per provider (default 3). "
                          "Different providers always run in parallel; this bounds "
                          "in-flight requests to any single provider to avoid "
                          "tripping its rate limit.")
+    ap.add_argument("--endpoint-probe", action="store_true",
+                    help="Probe each provider's /v1/models endpoint to discover new "
+                         ":free-suffixed models. Also enabled by sync_on_startup or "
+                         "update_on_startup in config.json.")
     ap.add_argument("--ignore-throttle", action="store_true",
-                    help="Probe even if probe_frequency_days says it is too soon "
-                         "since the last probe (bypasses the throttle).")
+                    help="Run probes even if their frequency setting says it is too "
+                         "soon since the last run (bypasses all throttles).")
     ap.add_argument("--regen-config-only", action="store_true",
                     help="Skip scraping; regenerate config.example.json from the current sidecar.")
     ap.add_argument("--sync-config-only", action="store_true",
@@ -927,31 +944,50 @@ def main(argv: list[str] | None = None) -> int:
             return 2
         return _sync_user_config(load_data(), args.config, dry_run=args.dry_run)
 
-    # Read opt-in flags from the user config (probe_cost / autoremove_believed_free).
+    # Read opt-in flags from the user config (cost_probe / endpoint_probe / autoremove).
     try:
         user_cfg = load_user_config(args.config, force_reload=True)
     except Exception:  # noqa: BLE001 — a missing/broken config must not break scraping
         user_cfg = {}
-    probe_cfg = user_cfg.get("free_tier", {}).get("probe", {})
-    probe_cost = bool(probe_cfg.get("enabled", False)) or args.probe
-    autoremove = bool(probe_cfg.get("autoremove", False))
-
-    # Throttle the probe to at most once every probe_frequency_days. The last-run
-    # timestamp lives in a sibling cache file (probe_state.json), not config.json.
+    free_tier_cfg = user_cfg.get("free_tier", {})
+    cost_probe_cfg = free_tier_cfg.get("cost_probe", {})
+    cost_probe_enabled = bool(cost_probe_cfg.get("enabled", False)) or args.cost_probe
+    autoremove = bool(cost_probe_cfg.get("autoremove", False))
+    ep_cfg = free_tier_cfg.get("endpoint_probe", {})
+    ep_enabled = bool(
+        free_tier_cfg.get("sync_on_startup") or free_tier_cfg.get("update_on_startup")
+    ) or args.endpoint_probe
+
+    # Throttle the cost probe to at most once every frequency_days. The last-run
+    # timestamp lives in cost_probe_state.json (sibling of config.json).
     # --ignore-throttle bypasses this; frequency 0 means "probe every time".
-    if probe_cost and not args.ignore_throttle:
-        state = load_probe_state(args.config)
+    if cost_probe_enabled and not args.ignore_throttle:
+        cp_state = load_cost_probe_state(args.config)
         due, days_since = _probe_due(
-            state.get("last_probe_at"), probe_cfg.get("frequency_days", 0)
+            cp_state.get("last_probe_at"), cost_probe_cfg.get("frequency_days", 0)
         )
         if not due:
-            freq = probe_cfg.get("frequency_days", 0)
+            freq = cost_probe_cfg.get("frequency_days", 0)
             since = f"{days_since:.1f}" if days_since is not None else "?"
             print(_warn(
-                f"  ⚠  probe throttled — last run was {since} day(s) ago, "
-                f"probe_frequency_days={freq}. Use --ignore-throttle to override."
+                f"  ⚠  cost_probe throttled — last run was {since} day(s) ago, "
+                f"frequency_days={freq}. Use --ignore-throttle to override."
             ))
-            probe_cost = False
+            cost_probe_enabled = False
+
+    # Throttle the endpoint probe to at most once every frequency_minutes.
+    if ep_enabled and not args.ignore_throttle:
+        ep_state = load_endpoint_probe_state(args.config)
+        freq_days = ep_cfg.get("frequency_minutes", 30) / 1440.0
+        ep_due, ep_days_since = _probe_due(ep_state.get("last_probe_at"), freq_days)
+        if not ep_due:
+            freq_min = ep_cfg.get("frequency_minutes", 30)
+            ep_since_min = f"{ep_days_since * 1440:.1f}" if ep_days_since is not None else "?"
+            print(_warn(
+                f"  ⚠  endpoint_probe throttled — last run was {ep_since_min} min ago, "
+                f"frequency_minutes={freq_min}. Use --ignore-throttle to override."
+            ))
+            ep_enabled = False
 
     if args.regen_config_only:
         write_config_example()
@@ -961,24 +997,27 @@ def main(argv: list[str] | None = None) -> int:
         return 0
 
     requested = [s.strip() for s in args.source.split(",") if s.strip()]
-    if probe_cost and "probe" not in requested:
-        requested.append("probe")
+    if cost_probe_enabled and "cost_probe" not in requested:
+        requested.append("cost_probe")
+    if ep_enabled and "endpoint_probe" not in requested:
+        requested.append("endpoint_probe")
     unknown = [s for s in requested if s not in ALL_SOURCES]
     if unknown:
         print(_err(f"Unknown source(s): {unknown}. Known: {sorted(ALL_SOURCES.keys())}"))
         return 2
 
     print(_h(f"\nFetching evidence from sources: {requested}"))
-    if "probe" in requested:
-        print(_warn("  ⚠  probe enabled — sending real requests to believed_free models "
+    if "cost_probe" in requested:
+        print(_warn("  ⚠  cost_probe enabled — sending real requests to believed_free models "
                     "(uses configured API keys / quota)."))
     all_evidence: list[Evidence] = []
     source_status: dict[str, bool] = {}
     with ThreadPoolExecutor(max_workers=min(5, len(requested))) as ex:
         futures = {
             ex.submit(_run_source, s, config_path=args.config,
-                      probe_max=args.probe_max, probe_provider=args.probe_provider,
-                      probe_concurrency=args.probe_concurrency): s
+                      probe_max=args.cost_probe_max, probe_provider=args.cost_probe_provider,
+                      probe_concurrency=args.cost_probe_concurrency,
+                      endpoint_probe_timeout=ep_cfg.get("timeout_sec", 10)): s
             for s in requested
         }
         for fut in as_completed(futures):
@@ -990,16 +1029,16 @@ def main(argv: list[str] | None = None) -> int:
             else:
                 print(_err(f"  {name}: FAILED — {err}"))
 
-    # Probe-confirmed paid models. When autoremove_believed_free is off (the
-    # default), we report these but do NOT remove them from believed_free.
-    probe_paid = {
+    # Cost-probe-confirmed paid models. When autoremove is off (the default),
+    # we report these but do NOT remove them from believed_free.
+    cost_probe_paid = {
         (ev.provider, ev.model_id)
         for ev in all_evidence
-        if ev.source == "probe" and ev.is_free is False
+        if ev.source == "cost_probe" and ev.is_free is False
     }
-    if probe_paid:
-        print(_h("\n=== Probe flagged believed_free models reporting a cost ==="))
-        for _provider_name, model_id in sorted(probe_paid):
+    if cost_probe_paid:
+        print(_h("\n=== cost_probe flagged believed_free models reporting a cost ==="))
+        for _provider_name, model_id in sorted(cost_probe_paid):
             print(f"  {_warn('⚠')} {model_id}")
         if autoremove:
             print(_warn("  autoremove_believed_free=true → these will be removed."))
@@ -1009,7 +1048,7 @@ def main(argv: list[str] | None = None) -> int:
         if not autoremove:
             all_evidence = [
                 ev for ev in all_evidence
-                if not (ev.source == "probe" and ev.is_free is False)
+                if not (ev.source == "cost_probe" and ev.is_free is False)
             ]
 
     # If only "api" succeeded for a provider, we trust /models presence as
@@ -1070,14 +1109,14 @@ def main(argv: list[str] | None = None) -> int:
     else:
         print(_dim("\nNo changes to apply."))
 
-    # Record when the probe last ran so probe_frequency_days can throttle the
-    # next invocation. Only on a real run where the probe was actually included.
-    # Done after (and independent of) the sidecar write above so a read-only
-    # providers.json can't prevent the throttle from advancing.
-    if "probe" in requested:
-        save_probe_state(
-            {"last_probe_at": datetime.now(UTC).isoformat()}, args.config
-        )
+    # Record when each probe last ran so frequency settings can throttle the
+    # next invocation. Done after (and independent of) the sidecar write so a
+    # read-only providers.json can't prevent the throttle from advancing.
+    now_iso = datetime.now(UTC).isoformat()
+    if "cost_probe" in requested:
+        save_cost_probe_state({"last_probe_at": now_iso}, args.config)
+    if "endpoint_probe" in requested:
+        save_endpoint_probe_state({"last_probe_at": now_iso}, args.config)
 
     # Sync the user config even when the sidecar was unchanged — a stale config
     # should still be reconciled against the current sidecar.

From 45d5bf1f7dccf3dc5593d3b4821184aeb9740c25 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 11:19:42 +0000
Subject: [PATCH 2/3] =?UTF-8?q?fix:=20resolve=20CI=20failures=20after=20pr?=
 =?UTF-8?q?obe=E2=86=92cost=5Fprobe=20rename?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename ProbeSource→CostProbeSource throughout test_probe_source.py
  (module path, monkeypatch targets, class usage, source name assertion)
- Sort cost_probe/endpoint_probe imports to correct alphabetical position
  in scripts/sources/__init__.py (ruff I001)
- Rename unused loop variable prov_name→_prov_name in endpoint_probe.py
  (ruff B007)
- Fix server.py: split `import os, sys` into separate lines (ruff E401),
  separate import groups with blank lines per isort (ruff I001), and
  remove unused `save_pr_state as _save_pr_state` alias (ruff F401)
- Regenerate config.example.json with cost_probe/endpoint_probe sections

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01RkRhX4Znv73oPcZy41bnyS
---
 config.example.json                     |  7 +++++-
 llmproxy/server.py                      | 25 +++++++++++++--------
 scripts/sources/__init__.py             |  4 ++--
 scripts/sources/endpoint_probe.py       |  2 +-
 tests/test_scraper/test_probe_source.py | 30 ++++++++++++-------------
 5 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/config.example.json b/config.example.json
index cec67d6..bbdd146 100644
--- a/config.example.json
+++ b/config.example.json
@@ -1282,7 +1282,11 @@
   "free_tier": {
     "sync_on_startup": true,
     "update_on_startup": false,
-    "probe": {
+    "endpoint_probe": {
+      "frequency_minutes": 30,
+      "timeout_sec": 10
+    },
+    "cost_probe": {
       "enabled": false,
       "autoremove": false,
       "frequency_days": 0
@@ -1290,6 +1294,7 @@
   },
   "providers_pr": {
     "enabled": false,
+    "frequency_days": 0,
     "repo": null,
     "base": "main",
     "branch": "llmproxy-auto/providers",
diff --git a/llmproxy/server.py b/llmproxy/server.py
index ca98ddc..f05c79e 100755
--- a/llmproxy/server.py
+++ b/llmproxy/server.py
@@ -1429,12 +1429,13 @@ def _maybe_fire_endpoint_probe(
     freq_min = ep_cfg.get("frequency_minutes", 30)
     freq_days = freq_min / 1440.0
     try:
-        import sys, os as _os
+        import os as _os
+        import sys
         repo_root = _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__)))
         if repo_root not in sys.path:
             sys.path.insert(0, repo_root)
-        from scripts.update_free_models import _probe_due
         from llmproxy.config import load_endpoint_probe_state
+        from scripts.update_free_models import _probe_due
     except Exception:  # noqa: BLE001 — scripts/ may not be available
         return
     state = load_endpoint_probe_state(config_path)
@@ -1471,12 +1472,13 @@ def _maybe_fire_cost_probe(
     cost_probe_cfg = free_tier.get("cost_probe", {})
     freq_days = cost_probe_cfg.get("frequency_days", 0)
     try:
-        import sys, os as _os
+        import os as _os
+        import sys
         repo_root = _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__)))
         if repo_root not in sys.path:
             sys.path.insert(0, repo_root)
-        from scripts.update_free_models import _probe_due
         from llmproxy.config import load_cost_probe_state
+        from scripts.update_free_models import _probe_due
     except Exception:  # noqa: BLE001
         return
     state = load_cost_probe_state(config_path)
@@ -1516,13 +1518,14 @@ def _maybe_fire_pr_if_due(config: dict, config_path: str | None) -> None:
     if not freq_days or freq_days <= 0:
         return  # no throttle configured — PR is opened immediately after updates
     try:
-        import sys, os as _os
+        import os as _os
+        import sys
         repo_root = _os.path.dirname(_os.path.dirname(_os.path.abspath(__file__)))
         if repo_root not in sys.path:
             sys.path.insert(0, repo_root)
-        from scripts.update_free_models import _probe_due
         from llmproxy.config import load_pr_state
         from llmproxy.providers import DATA_PATH as _DATA_PATH
+        from scripts.update_free_models import _probe_due
     except Exception:  # noqa: BLE001
         return
     pr_state = load_pr_state(config_path)
@@ -1648,12 +1651,14 @@ def _maybe_open_providers_pr(config: dict, providers_text: str, example_text: st
     freq_days = pr_cfg.get("frequency_days", 0)
     if freq_days and freq_days > 0:
         try:
-            import sys as _sys, os as _os2
+            import os as _os2
+            import sys as _sys
             repo_root = _os2.path.dirname(_os2.path.dirname(_os2.path.abspath(__file__)))
             if repo_root not in _sys.path:
                 _sys.path.insert(0, repo_root)
             from scripts.update_free_models import _probe_due
-            from .config import load_pr_state, save_pr_state as _save_pr_state
+
+            from .config import load_pr_state
         except Exception:  # noqa: BLE001
             _probe_due = None  # type: ignore[assignment]
         if _probe_due is not None:
@@ -1717,7 +1722,9 @@ def _maybe_open_providers_pr(config: dict, providers_text: str, example_text: st
         if url:
             logger.info("[providers-pr] %s", url)
             try:
-                from datetime import UTC as _UTC, datetime as _datetime
+                from datetime import UTC as _UTC
+                from datetime import datetime as _datetime
+
                 from .config import save_pr_state as _save_pr_state2
                 _save_pr_state2({"last_pr_at": _datetime.now(_UTC).isoformat()})
             except Exception as _exc:  # noqa: BLE001
diff --git a/scripts/sources/__init__.py b/scripts/sources/__init__.py
index d266e02..1006110 100644
--- a/scripts/sources/__init__.py
+++ b/scripts/sources/__init__.py
@@ -7,11 +7,11 @@
 from .api_models import ApiModelsSource
 from .base import Evidence, Source
 from .community import CommunitySource
+from .cost_probe import CostProbeSource
+from .endpoint_probe import EndpointProbeSource
 from .fireworks import FireworksSource
 from .litellm_cost_map import LiteLLMCostMapSource
 from .openrouter import OpenRouterSource
-from .cost_probe import CostProbeSource
-from .endpoint_probe import EndpointProbeSource
 from .together import TogetherSource
 
 ALL_SOURCES: dict[str, type[Source]] = {
diff --git a/scripts/sources/endpoint_probe.py b/scripts/sources/endpoint_probe.py
index 57a3882..50913fa 100644
--- a/scripts/sources/endpoint_probe.py
+++ b/scripts/sources/endpoint_probe.py
@@ -53,7 +53,7 @@ def fetch(self) -> list[Evidence]:
 
         # Build set of believed_free :free-suffixed models per provider.
         believed_free_per_provider: dict[str, set[str]] = {}
-        for prov_name, prov_data in sidecar.get("providers", {}).items():
+        for _prov_name, prov_data in sidecar.get("providers", {}).items():
             for qualified in prov_data.get("believed_free", []):
                 if "/" not in qualified:
                     continue
diff --git a/tests/test_scraper/test_probe_source.py b/tests/test_scraper/test_probe_source.py
index ea604a2..49b594e 100644
--- a/tests/test_scraper/test_probe_source.py
+++ b/tests/test_scraper/test_probe_source.py
@@ -1,11 +1,11 @@
-"""Tests for the opt-in cost probe source (scripts/sources/probe.py)."""
+"""Tests for the opt-in cost probe source (scripts/sources/cost_probe.py)."""
 
 from __future__ import annotations
 
 import responses
 
 from scripts.sources import OPT_IN_SOURCES
-from scripts.sources.probe import ProbeSource
+from scripts.sources.cost_probe import CostProbeSource
 
 
 def _patch(monkeypatch, *, believed_free, providers, pricing=None):
@@ -15,12 +15,12 @@ def _patch(monkeypatch, *, believed_free, providers, pricing=None):
         prov = pid.split("/", 1)[0]
         sidecar["providers"].setdefault(prov, {"believed_free": []})["believed_free"].append(pid)
     config = {"providers": providers}
-    monkeypatch.setattr("scripts.sources.probe.load_data", lambda: sidecar)
-    monkeypatch.setattr("scripts.sources.probe.load_config", lambda *a, **k: config)
+    monkeypatch.setattr("scripts.sources.cost_probe.load_data", lambda: sidecar)
+    monkeypatch.setattr("scripts.sources.cost_probe.load_config", lambda *a, **k: config)
 
 
 def test_probe_is_opt_in():
-    assert "probe" in OPT_IN_SOURCES
+    assert "cost_probe" in OPT_IN_SOURCES
 
 
 @responses.activate
@@ -35,12 +35,12 @@ def test_probe_flags_paid_model(monkeypatch):
         json={"usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2, "cost": 0.003}},
         status=200,
     )
-    evs = ProbeSource().fetch()
+    evs = CostProbeSource().fetch()
     assert len(evs) == 1
     assert evs[0].is_free is False
     assert evs[0].confidence == "high"
     assert evs[0].model_id == "groq/free-model"
-    assert evs[0].source == "probe"
+    assert evs[0].source == "cost_probe"
 
 
 @responses.activate
@@ -55,7 +55,7 @@ def test_probe_silent_on_zero_cost(monkeypatch):
         json={"usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}},
         status=200,
     )
-    assert ProbeSource().fetch() == []
+    assert CostProbeSource().fetch() == []
 
 
 def test_probe_skips_models_without_api_key(monkeypatch):
@@ -65,7 +65,7 @@ def test_probe_skips_models_without_api_key(monkeypatch):
         providers={"groq": {"base_url": "http://groq.example/v1", "api_key": ""}},
     )
     # No HTTP call should be made (responses not activated → would raise if called).
-    assert ProbeSource().fetch() == []
+    assert CostProbeSource().fetch() == []
 
 
 @responses.activate
@@ -79,7 +79,7 @@ def test_probe_fail_soft_on_error(monkeypatch):
         responses.POST, "http://groq.example/v1/chat/completions",
         status=500,
     )
-    assert ProbeSource().fetch() == []
+    assert CostProbeSource().fetch() == []
 
 
 @responses.activate
@@ -94,7 +94,7 @@ def test_probe_flags_multiple_models_concurrently(monkeypatch):
         json={"usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2, "cost": 0.003}},
         status=200,
     )
-    evs = ProbeSource(concurrency=3).fetch()
+    evs = CostProbeSource(concurrency=3).fetch()
     assert {e.model_id for e in evs} == {"groq/m1", "groq/m2", "groq/m3"}
     assert all(e.is_free is False for e in evs)
 
@@ -111,7 +111,7 @@ def test_probe_respects_max_models(monkeypatch):
         json={"usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2, "cost": 0.003}},
         status=200,
     )
-    evs = ProbeSource(max_models=2).fetch()
+    evs = CostProbeSource(max_models=2).fetch()
     assert len(evs) == 2
     # Only the first two believed_free candidates are probed (budget bound).
     assert len(responses.calls) == 2
@@ -142,8 +142,8 @@ def fake_probe(self, base_url, api_key, model):
             with lock:
                 state["in_flight"] -= 1
 
-    monkeypatch.setattr(ProbeSource, "_probe", fake_probe)
-    evs = ProbeSource(concurrency=2).fetch()
+    monkeypatch.setattr(CostProbeSource, "_probe", fake_probe)
+    evs = CostProbeSource(concurrency=2).fetch()
     assert len(evs) == 6
     assert state["peak"] <= 2
 
@@ -161,6 +161,6 @@ def test_probe_computes_cost_from_pricing(monkeypatch):
         json={"usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150}},
         status=200,
     )
-    evs = ProbeSource().fetch()
+    evs = CostProbeSource().fetch()
     assert len(evs) == 1
     assert "cost=" in evs[0].notes

From c4c2a3f7f5c20ed5e0545a82744981110fe5b558 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 11:27:34 +0000
Subject: [PATCH 3/3] =?UTF-8?q?fix:=20update=20tests=20for=20probe?=
 =?UTF-8?q?=E2=86=92cost=5Fprobe=20rename,=20429=20retry,=20flaky=20fusion?=
 =?UTF-8?q?=20order?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- test_admin_api: check free_tier.cost_probe (not free_tier.probe) after PUT
- test_probe_throttle: import/use cost_probe_* state helpers; --source cost_probe
- test_fusion: use sorted() comparison for backfill panel order (parallel threads
  make request order non-deterministic, only set membership matters)
- docs/base.py: retry GET up to 3 times on 429 with exponential-ish backoff,
  honouring Retry-After; return [] on exhaustion instead of raising
- test_docs_huggingface: add tests for 429 retry-then-succeed and exhausted-429
  returns-empty, monkeypatching time.sleep to keep tests fast

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01RkRhX4Znv73oPcZy41bnyS
---
 scripts/sources/docs/base.py                | 24 +++++++++++++----
 tests/test_admin_api.py                     |  4 +--
 tests/test_fusion.py                        |  2 +-
 tests/test_scraper/test_docs_huggingface.py | 21 +++++++++++++++
 tests/test_scraper/test_probe_throttle.py   | 30 ++++++++++++---------
 5 files changed, 60 insertions(+), 21 deletions(-)

diff --git a/scripts/sources/docs/base.py b/scripts/sources/docs/base.py
index 9ee0615..73c3ae0 100644
--- a/scripts/sources/docs/base.py
+++ b/scripts/sources/docs/base.py
@@ -2,11 +2,17 @@
 
 from __future__ import annotations
 
+import time
+
 import requests
 
 from ..base import Evidence, Source
 
 TIMEOUT = (5, 10)
+_HEADERS = {
+    "User-Agent": "llmproxy-update-free-models/1.0 (+https://github.com/billjr99/llmproxy)",
+}
+_429_RETRY_DELAYS = (5.0, 15.0, 30.0)  # seconds between successive 429 retries
 
 
 class DocsScraperBase(Source):
@@ -21,11 +27,19 @@ class DocsScraperBase(Source):
     provider_key: str = ""  # e.g. "google", "groq"
 
     def fetch(self) -> list[Evidence]:
-        resp = requests.get(self.url, timeout=TIMEOUT, headers={
-            "User-Agent": "llmproxy-update-free-models/1.0 (+https://github.com/billjr99/llmproxy)",
-        })
-        resp.raise_for_status()
-        return list(self.parse(resp.text))
+        delays = iter(_429_RETRY_DELAYS)
+        while True:
+            resp = requests.get(self.url, timeout=TIMEOUT, headers=_HEADERS)
+            if resp.status_code == 429:
+                wait = next(delays, None)
+                if wait is None:
+                    return []  # retries exhausted — skip silently rather than raise
+                ra = resp.headers.get("Retry-After", "")
+                actual_wait = float(ra) if ra.isdigit() else wait
+                time.sleep(min(actual_wait, 60.0))
+                continue
+            resp.raise_for_status()
+            return list(self.parse(resp.text))
 
     def parse(self, html: str) -> list[Evidence]:  # noqa: D401 — overridden by subclasses
         raise NotImplementedError
diff --git a/tests/test_admin_api.py b/tests/test_admin_api.py
index 72e36b9..dc72a13 100644
--- a/tests/test_admin_api.py
+++ b/tests/test_admin_api.py
@@ -362,8 +362,8 @@ def test_put_maintenance_sets_flags(client, cfg_path):
     })
     assert resp.status_code == 200
     saved = _read_config(cfg_path)
-    assert saved["free_tier"]["probe"]["enabled"] is True
-    assert saved["free_tier"]["probe"]["frequency_days"] == 7
+    assert saved["free_tier"]["cost_probe"]["enabled"] is True
+    assert saved["free_tier"]["cost_probe"]["frequency_days"] == 7
     assert saved["providers_pr"]["repo"] == "BillJr99/llmproxy"
     assert saved["providers_pr"]["token"] == "ghp_secret"
     # Token is never echoed back verbatim
diff --git a/tests/test_fusion.py b/tests/test_fusion.py
index 783fe09..ce05400 100644
--- a/tests/test_fusion.py
+++ b/tests/test_fusion.py
@@ -239,7 +239,7 @@ def test_fusion_backfills_failed_panel_from_reserve(server, monkeypatch):
     rep = json.loads(resp.get_data())["llmproxy_fusion"]
     assert rep["panel"] == ["p4/m4", "p5/m5"]  # reserve backfilled the failed slots
     assert {f["model"] for f in rep["failed_models"]} == {"p0/m0", "p1/m1", "p2/m2", "p3/m3"}
-    assert seen["panel"] == ["m0", "m1", "m2", "m3", "m4", "m5"]  # every slot attempted once
+    assert sorted(seen["panel"]) == ["m0", "m1", "m2", "m3", "m4", "m5"]  # every slot attempted once
 
 
 def test_fusion_all_panel_fail_error_lists_reasons(server, monkeypatch):
diff --git a/tests/test_scraper/test_docs_huggingface.py b/tests/test_scraper/test_docs_huggingface.py
index 55dc969..11b07af 100644
--- a/tests/test_scraper/test_docs_huggingface.py
+++ b/tests/test_scraper/test_docs_huggingface.py
@@ -41,3 +41,24 @@ def test_unrelated_page_yields_empty():
     html = "<html><body><p>Some other HF docs page</p></body></html>"
     responses.add(responses.GET, URL, body=html, status=200)
     assert HuggingFaceDocs().fetch() == []
+
+
+@responses.activate
+def test_429_retried_and_succeeds(fixtures_dir: Path, monkeypatch):
+    """A single 429 is retried; the scraper returns results on the next attempt."""
+    monkeypatch.setattr("scripts.sources.docs.base.time.sleep", lambda _: None)
+    html = (fixtures_dir / "huggingface_providers.html").read_text()
+    responses.add(responses.GET, URL, status=429)
+    responses.add(responses.GET, URL, body=html, status=200, content_type="text/html")
+    evs = HuggingFaceDocs().fetch()
+    assert evs  # recovered after retry
+    assert len(responses.calls) == 2
+
+
+@responses.activate
+def test_429_exhausted_returns_empty(monkeypatch):
+    """When all retries are consumed the scraper returns [] instead of raising."""
+    monkeypatch.setattr("scripts.sources.docs.base.time.sleep", lambda _: None)
+    for _ in range(4):  # initial attempt + 3 retries
+        responses.add(responses.GET, URL, status=429)
+    assert HuggingFaceDocs().fetch() == []
diff --git a/tests/test_scraper/test_probe_throttle.py b/tests/test_scraper/test_probe_throttle.py
index e400175..5fe3161 100644
--- a/tests/test_scraper/test_probe_throttle.py
+++ b/tests/test_scraper/test_probe_throttle.py
@@ -1,12 +1,16 @@
 """Tests for the probe-frequency throttle (scripts/update_free_models._probe_due)
-and the probe-state cache helpers (llmproxy.config)."""
+and the cost-probe-state cache helpers (llmproxy.config)."""
 
 from __future__ import annotations
 
 import json
 from datetime import UTC, datetime, timedelta
 
-from llmproxy.config import get_probe_state_path, load_probe_state, save_probe_state
+from llmproxy.config import (
+    get_cost_probe_state_path,
+    load_cost_probe_state,
+    save_cost_probe_state,
+)
 from scripts.update_free_models import _probe_due
 
 
@@ -66,21 +70,21 @@ def test_non_numeric_frequency_defaults_to_always_due():
     assert due is True
 
 
-# --- probe-state cache helpers ---------------------------------------------
+# --- cost-probe-state cache helpers -----------------------------------------
 
 def test_probe_state_roundtrip(tmp_path):
     cfg = str(tmp_path / "config.json")
-    assert load_probe_state(cfg) == {}
+    assert load_cost_probe_state(cfg) == {}
     ts = datetime.now(UTC).isoformat()
-    assert save_probe_state({"last_probe_at": ts}, cfg) is True
-    assert get_probe_state_path(cfg) == tmp_path / "probe_state.json"
-    assert load_probe_state(cfg) == {"last_probe_at": ts}
+    assert save_cost_probe_state({"last_probe_at": ts}, cfg) is True
+    assert get_cost_probe_state_path(cfg) == tmp_path / "cost_probe_state.json"
+    assert load_cost_probe_state(cfg) == {"last_probe_at": ts}
 
 
 def test_probe_state_corrupt_returns_empty(tmp_path):
     cfg = str(tmp_path / "config.json")
-    get_probe_state_path(cfg).write_text("{not json", encoding="utf-8")
-    assert load_probe_state(cfg) == {}
+    get_cost_probe_state_path(cfg).write_text("{not json", encoding="utf-8")
+    assert load_cost_probe_state(cfg) == {}
 
 
 class _ReadOnlyPath:
@@ -97,7 +101,7 @@ def __str__(self):
 
 def test_probe_timestamp_recorded_when_sidecar_write_fails(tmp_path, monkeypatch):
     """A read-only providers.json must not prevent the probe-throttle timestamp
-    (which lives next to the user config, e.g. /config/probe_state.json) from
+    (which lives next to the user config, e.g. /config/cost_probe_state.json) from
     advancing — regression for the container bind-mount case."""
     import scripts.update_free_models as ufm
 
@@ -108,10 +112,10 @@ def test_probe_timestamp_recorded_when_sidecar_write_fails(tmp_path, monkeypatch
     monkeypatch.setattr(ufm, "apply_updates", lambda *a, **k: True)
     monkeypatch.setattr(ufm, "DATA_PATH", _ReadOnlyPath())
 
-    rc = ufm.main(["--source", "probe", "--config", str(cfg)])
+    rc = ufm.main(["--source", "cost_probe", "--config", str(cfg)])
 
     assert rc == 0  # the run completed despite the read-only sidecar
-    state = load_probe_state(str(cfg))
+    state = load_cost_probe_state(str(cfg))
     assert "last_probe_at" in state  # throttle timestamp persisted to the bind mount
 
 
@@ -126,7 +130,7 @@ def test_sidecar_mirrored_to_config_dir_when_readonly(tmp_path, monkeypatch):
     monkeypatch.setattr(ufm, "apply_updates", lambda *a, **k: True)
     monkeypatch.setattr(ufm, "DATA_PATH", _ReadOnlyPath())
 
-    ufm.main(["--source", "probe", "--config", str(cfg)])
+    ufm.main(["--source", "cost_probe", "--config", str(cfg)])
 
     mirrored_providers = tmp_path / "providers.json"
     mirrored_example = tmp_path / "config.example.json"