From dc61e29bd6f6b3d5c5b8c46105fe95c53a5cf18e Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 19 Jun 2026 01:28:25 +0000 Subject: [PATCH 1/3] Add ranked favorite_free_models config field for free-tier routing priority Introduces a top-level `favorite_free_models` list in config.json. When routing through any `*/free` virtual endpoint or the free tier of `llmproxy/loadbalanced`, models in this list are promoted to the front of the candidate pool in ranked order before the normal capacity/request-fit/capability algorithm handles the rest. A favorite is only used if it currently passes `_is_model_free()` (believed_free and not cost-observed); it is silently skipped otherwise. Removing a model from believed_free (cost observed at runtime) leaves it in favorite_free_models so it re-promotes automatically when a future sync restores its free status. Changes: - server.py: _apply_favorite_free_ordering() helper + call in _proxy_endpoint() (after all ordering passes, for is_free_virtual) and inside _loadbalanced_ordered_candidates() on the free tier bucket only - admin.py: GET/PUT /admin/api/favorite-free-models endpoint; favorite_free_models included in GET /admin/api/config response - static/admin/index.html: Favorite free models card in Models & Categorizations tab with grouped-by-provider picker, ranked list with up/down/remove, auto-save - tests/test_favorite_ordering.py: unit tests for _apply_favorite_free_ordering() - tests/test_admin_api.py: API endpoint tests - README.md: documents favorite_free_models in config schema and cycling description Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01XSnxDp6BPeErjdqembchkZ --- README.md | 47 ++++++++++++- llmproxy/admin.py | 13 ++++ llmproxy/server.py | 27 ++++++++ llmproxy/static/admin/index.html | 81 +++++++++++++++++++++- tests/test_admin_api.py | 41 +++++++++++ tests/test_favorite_ordering.py | 112 +++++++++++++++++++++++++++++++ 6 files changed, 319 insertions(+), 2 deletions(-) create mode 100644 tests/test_favorite_ordering.py diff --git a/README.md b/README.md index cbffccc..b1773b2 100644 --- a/README.md +++ b/README.md @@ -184,7 +184,9 @@ When a request targets a (non-fusion) virtual model, llmproxy: reorderings may then run on top without ever dropping a candidate: the [request-fit triage](#request-fit-triage-every-free-and-local-virtual) for the `*/free` and `*/local` virtuals, and [capability ordering](#capability-aware-routing--failover) - when the request forces a capability. + when the request forces a capability. Finally, any models listed in + [`favorite_free_models`](#favorite_free_models) that are present in the pool + are promoted to the front in ranked order before cycling begins. 3. **Tries each candidate in order**, returning the first **usable** response. A candidate is considered to have **failed** — so llmproxy moves on to the next @@ -678,6 +680,10 @@ Config is stored at `~/.config/llmproxy/config.json` (or the path in "tokens_per_day": 500000 } }, + "favorite_free_models": [ + "google/gemini-2.5-flash", + "groq/llama-3.1-8b-instant" + ], "free_tier": { "sync_on_startup": true, @@ -864,6 +870,45 @@ process — so it is "as far as we can tell in the moment". Any field set to `nu is ignored; a provider with no `free_allowance` simply never gains free-in-the- moment status. + +### `favorite_free_models` — ranked priority list for free-tier routing + +`favorite_free_models` is an **optional** top-level array of model IDs listed in +preference order. When a `*/free` virtual endpoint (e.g. `llmproxy/free`, +`llmproxy/deep__free`) or the free tier of `llmproxy/loadbalanced` selects a +backend, models in this list are promoted to the front of the candidate pool +**in the order listed**, before the normal capacity/request-fit/capability +algorithm handles the rest. + +```json +"favorite_free_models": [ + "google/gemini-2.5-flash", + "anthropic/claude-3-5-haiku-20251001", + "gpt-4o-mini" +] +``` + +Each entry is matched case-insensitively against the upstream model ID (bare, +e.g. `gpt-4o-mini`) or the fully-qualified proxy ID (e.g. +`openai/gpt-4o-mini`). A favorite is only promoted if it is **currently +believed-free** (present in `believed_free` and not flagged as cost-observed); +if it is absent from the virtual model's candidate pool it is silently skipped +and the remaining favorites and the normal algorithm continue unchanged. + +**Cost-observation persistence:** if a favorite is later removed from +`believed_free` because a cost was observed at runtime, it remains in +`favorite_free_models`. When a future sync restores it to the free pool (e.g. +the provider makes it free again), it is automatically re-promoted without any +manual config change. + +`favorite_free_models` has no effect on non-free virtual endpoints +(`llmproxy/deep`, `llmproxy/tools`, etc.) or on fusion virtuals. + +The admin UI's **Models & Categorizations** tab includes a **Favorite free +models** panel where you can add models from a grouped-by-provider picker, +reorder them with up/down buttons, and remove entries — changes are saved +immediately. + ### Token + cost accounting — `GET /v1/usage` diff --git a/llmproxy/admin.py b/llmproxy/admin.py index fcdd5c1..f647d81 100644 --- a/llmproxy/admin.py +++ b/llmproxy/admin.py @@ -463,6 +463,7 @@ def api_get_config(): return jsonify({ "providers": providers, "believed_free": config.get("believed_free", []), + "favorite_free_models": config.get("favorite_free_models", []), "model_reasoning": config.get("model_reasoning", {}), "model_capabilities": config.get("model_capabilities", {}), "free_limits": config.get("free_limits", {}), @@ -794,6 +795,18 @@ def _put_section(key: str, validate): return jsonify({key: payload}) +@bp.route("/admin/api/favorite-free-models", methods=["GET", "PUT"]) +def api_favorite_free_models(): + if request.method == "GET": + return jsonify({"favorite_free_models": _load().get("favorite_free_models", [])}) + + def validate(p): + if not (isinstance(p, list) and all(isinstance(x, str) for x in p)): + return "favorite_free_models must be a list of strings." + return None + return _put_section("favorite_free_models", validate) + + @bp.route("/admin/api/believed-free", methods=["GET", "PUT"]) def api_believed_free(): if request.method == "GET": diff --git a/llmproxy/server.py b/llmproxy/server.py index aec3ea0..96d1a75 100755 --- a/llmproxy/server.py +++ b/llmproxy/server.py @@ -2943,6 +2943,30 @@ def _provider_exposes_to_virtual_models(provider_cfg: dict) -> bool: return provider_cfg.get("expose_to_virtual_models", True) is not False +def _apply_favorite_free_ordering( + candidates: list[tuple[str, dict, str]], + config: dict, +) -> list[tuple[str, dict, str]]: + """Promote favorite_free_models to the front in ranked order. + + Only candidates already present in the pool are promoted — favorites not in + the pool (e.g. cost-observed, not believed_free) are silently skipped. + Non-matching candidates retain their existing order after the favorites. + """ + favorites = config.get("favorite_free_models", []) + if not favorites: + return candidates + remaining = list(candidates) + front: list[tuple[str, dict, str]] = [] + for fav in favorites: + fav_lower = fav.lower() + for i, (pname, pcfg, umodel) in enumerate(remaining): + if fav_lower in (umodel.lower(), f"{pname}/{umodel}".lower()): + front.append(remaining.pop(i)) + break + return front + remaining + + def _param_count(model_id: str) -> float: """Best-effort parameter count (in billions) parsed from a model id. @@ -3436,6 +3460,7 @@ def _price(c: tuple[str, dict, str]) -> float: continue if tier == _TIER_FREE: bucket = _quality_ordered_candidates(bucket, free_limits, reasoning_map) + bucket = _apply_favorite_free_ordering(bucket, config) elif tier == _TIER_LOCAL: # $0 like free — prefer the strongest local model (e.g. the larger # Ollama model) rather than rotating randomly. @@ -4253,6 +4278,8 @@ def on_success(pn: str, um: str, body=None) -> None: logger.info(" [%s] request-fit first-pick tier=%s", model_full, _target_reasoning_tier(payload)) if needed: ordered = _order_by_capability(ordered, needed, _model_capabilities(config)) + if is_free_virtual: + ordered = _apply_favorite_free_ordering(ordered, config) logger.info(" [%s] cycling through %d candidate(s)", model_full, len(ordered)) if is_streaming: timeout = server_cfg.get("stream_timeout", 300) diff --git a/llmproxy/static/admin/index.html b/llmproxy/static/admin/index.html index 9102e6a..8533fac 100644 --- a/llmproxy/static/admin/index.html +++ b/llmproxy/static/admin/index.html @@ -87,6 +87,8 @@ .vm code.id { font-size: 14px; font-weight: 700; } a { color: var(--accent); } .hidden { display: none !important; } + .fav-row { display: grid; grid-template-columns: auto 1fr auto auto auto; gap: 8px; align-items: center; padding: 7px 10px; border: 1px solid var(--border); border-radius: 8px; margin-bottom: 6px; } + .fav-rank { font-size: 11px; font-weight: 700; color: var(--muted); min-width: 18px; text-align: right; } @@ -164,6 +166,15 @@

Models & Categorizations

+
+

Favorite free models

+

These models are tried first (in ranked order) when routing through any */free virtual endpoint or the free tier of llmproxy/loadbalanced. A model is only selected if it is currently believed-free; it is skipped silently otherwise and the normal selection algorithm continues.

+
+
+ + +
+
@@ -424,7 +435,7 @@

${esc(name)} ${keyPill}

// ---- Models & categorizations ---- $("#discover").addEventListener("click", async () => { toast("Discovering…"); - try { const r = await api("/models"); DISCOVERED = r.models || []; renderModels(); toast(`Discovered ${DISCOVERED.length} models`); } + try { const r = await api("/models"); DISCOVERED = r.models || []; renderModels(); renderFavPicker(); toast(`Discovered ${DISCOVERED.length} models`); } catch (e) { toast(e.message, true); } }); $("#model-filter").addEventListener("input", renderModels); @@ -492,6 +503,73 @@

${esc(name)} ${keyPill}

} $("#refresh-vm").addEventListener("click", loadVirtual); +// ---- Favorite free models ---- +let FAVS = []; + +function renderFavPicker() { + const sel = $("#fav-pick"); + // Group discovered/known models by provider + const all = modelUniverse(); + const groups = {}; + for (const m of all) { + const slash = m.indexOf("/"); + const grp = slash >= 0 ? m.slice(0, slash) : "(bare)"; + (groups[grp] = groups[grp] || []).push(m); + } + sel.innerHTML = `` + + Object.keys(groups).sort().map(g => + `${groups[g].map(m => ``).join("")}` + ).join(""); +} + +function renderFavsList() { + const box = $("#fav-list"); + if (!FAVS.length) { box.innerHTML = `

No favorites yet.

`; return; } + box.innerHTML = FAVS.map((m, i) => ` +
+ ${i + 1} + ${esc(m)} + + + +
`).join(""); + box.querySelectorAll("[data-up]").forEach(b => b.addEventListener("click", () => moveFav(+b.dataset.up, -1))); + box.querySelectorAll("[data-dn]").forEach(b => b.addEventListener("click", () => moveFav(+b.dataset.dn, 1))); + box.querySelectorAll("[data-rm]").forEach(b => b.addEventListener("click", () => removeFav(+b.dataset.rm))); +} + +async function saveFavs() { + try { + await api("/favorite-free-models", { method: "PUT", body: JSON.stringify(FAVS) }); + toast("Favorite free models saved"); + } catch (e) { toast(e.message, true); } +} + +function moveFav(i, dir) { + const j = i + dir; + if (j < 0 || j >= FAVS.length) return; + [FAVS[i], FAVS[j]] = [FAVS[j], FAVS[i]]; + renderFavsList(); saveFavs(); +} + +function removeFav(i) { + FAVS.splice(i, 1); renderFavsList(); saveFavs(); +} + +$("#fav-add").addEventListener("click", () => { + const v = $("#fav-pick").value; if (!v) return; + if (FAVS.includes(v)) { toast("Already in favorites", true); return; } + FAVS.push(v); renderFavsList(); renderFavPicker(); saveFavs(); +}); + +async function loadFavs() { + try { + const r = await api("/favorite-free-models"); + FAVS = r.favorite_free_models || []; + renderFavsList(); renderFavPicker(); + } catch (e) { toast(e.message, true); } +} + // ---- Bootstrap ---- async function bootstrap() { try { @@ -515,6 +593,7 @@

${esc(name)} ${keyPill}

renderTemplateSelect(); await loadProviders(); renderModels(); + await loadFavs(); } catch (e) { setAuthState(false, "auth required"); toast(e.message, true); diff --git a/tests/test_admin_api.py b/tests/test_admin_api.py index 124dfa8..72e36b9 100644 --- a/tests/test_admin_api.py +++ b/tests/test_admin_api.py @@ -207,6 +207,47 @@ def test_put_believed_free_rejects_non_list(client): assert resp.status_code == 400 +# --------------------------------------------------------------------------- # +# Favorite free models + +def test_get_favorite_free_models_default_empty(client): + resp = client.get("/admin/api/favorite-free-models") + assert resp.status_code == 200 + assert resp.get_json()["favorite_free_models"] == [] + + +def test_put_favorite_free_models_valid(client, cfg_path): + favs = ["google/gemini-2.5-flash", "openai/gpt-4o-mini"] + resp = client.put("/admin/api/favorite-free-models", json=favs) + assert resp.status_code == 200 + assert resp.get_json()["favorite_free_models"] == favs + assert _read_config(cfg_path)["favorite_free_models"] == favs + + +def test_put_favorite_free_models_rejects_non_list(client): + resp = client.put("/admin/api/favorite-free-models", json={"model": "x"}) + assert resp.status_code == 400 + + +def test_put_favorite_free_models_rejects_non_string_entries(client): + resp = client.put("/admin/api/favorite-free-models", json=["ok", 42]) + assert resp.status_code == 400 + + +def test_favorite_free_models_in_config_get(client, cfg_path): + favs = ["google/gemini-flash"] + client.put("/admin/api/favorite-free-models", json=favs) + resp = client.get("/admin/api/config") + assert resp.status_code == 200 + assert resp.get_json()["favorite_free_models"] == favs + + +def test_get_favorite_free_models_round_trips_empty_list(client, cfg_path): + client.put("/admin/api/favorite-free-models", json=[]) + resp = client.get("/admin/api/favorite-free-models") + assert resp.get_json()["favorite_free_models"] == [] + + def test_put_model_reasoning_validates_level(client): resp = client.put("/admin/api/model-reasoning", json={"m": "ultra"}) assert resp.status_code == 400 diff --git a/tests/test_favorite_ordering.py b/tests/test_favorite_ordering.py new file mode 100644 index 0000000..44bf126 --- /dev/null +++ b/tests/test_favorite_ordering.py @@ -0,0 +1,112 @@ +"""Unit tests for _apply_favorite_free_ordering() in server.py.""" + +from __future__ import annotations + +import importlib +import json +from pathlib import Path + +import pytest + + +@pytest.fixture(autouse=True) +def _reload_server(monkeypatch, tmp_path): + cfg = tmp_path / "config.json" + cfg.write_text(json.dumps({"providers": {}, "believed_free": []})) + monkeypatch.setenv("LLMPROXY_CONFIG", str(cfg)) + from llmproxy import config as config_mod + importlib.reload(config_mod) + from llmproxy import server as server_mod + importlib.reload(server_mod) + + +def _fn(): + from llmproxy import server as server_mod + return server_mod._apply_favorite_free_ordering + + +def _c(provider, model): + """Build a minimal (provider_name, provider_cfg, upstream_model) tuple.""" + return (provider, {}, model) + + +def test_empty_favorites_returns_unchanged(): + candidates = [_c("openai", "gpt-4o"), _c("google", "gemini-flash")] + result = _fn()(candidates, {"favorite_free_models": []}) + assert result == candidates + + +def test_no_favorites_key_returns_unchanged(): + candidates = [_c("openai", "gpt-4o"), _c("google", "gemini-flash")] + result = _fn()(candidates, {}) + assert result == candidates + + +def test_favorite_promoted_to_front(): + a = _c("openai", "gpt-4o") + b = _c("google", "gemini-flash") + candidates = [a, b] + result = _fn()(candidates, {"favorite_free_models": ["google/gemini-flash"]}) + assert result[0] == b + assert result[1] == a + + +def test_multiple_favorites_ranked_order(): + a = _c("openai", "gpt-4o-mini") + b = _c("google", "gemini-flash") + c = _c("anthropic", "haiku") + candidates = [a, b, c] + result = _fn()(candidates, {"favorite_free_models": ["anthropic/haiku", "google/gemini-flash"]}) + assert result[0] == c + assert result[1] == b + assert result[2] == a + + +def test_favorite_not_in_pool_silently_skipped(): + a = _c("openai", "gpt-4o") + candidates = [a] + # "paid-model" is not in the pool + result = _fn()(candidates, {"favorite_free_models": ["openai/paid-model", "openai/gpt-4o"]}) + assert result == [a] + + +def test_bare_id_match(): + a = _c("openai", "gpt-4o-mini") + b = _c("google", "gemini-flash") + candidates = [a, b] + # bare id (no provider prefix) matches by upstream_model + result = _fn()(candidates, {"favorite_free_models": ["gemini-flash"]}) + assert result[0] == b + assert result[1] == a + + +def test_qualified_id_match(): + a = _c("openai", "gpt-4o-mini") + b = _c("google", "gemini-flash") + candidates = [a, b] + result = _fn()(candidates, {"favorite_free_models": ["openai/gpt-4o-mini"]}) + assert result[0] == a + assert result[1] == b + + +def test_case_insensitive_match(): + a = _c("Google", "Gemini-Flash") + candidates = [a] + result = _fn()(candidates, {"favorite_free_models": ["google/gemini-flash"]}) + assert result == [a] + + +def test_non_favorite_order_preserved(): + a = _c("a", "model-a") + b = _c("b", "model-b") + c = _c("c", "model-c") + d = _c("d", "model-d") + candidates = [a, b, c, d] + # Only promote c; a, b, d should remain in their original relative order + result = _fn()(candidates, {"favorite_free_models": ["c/model-c"]}) + assert result == [c, a, b, d] + + +def test_empty_candidates_returns_empty(): + result = _fn()([], {"favorite_free_models": ["openai/gpt-4o"]}) + assert result == [] From 3ad518adc13ddedaca864dc0edaeea808ce6106f Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 19 Jun 2026 01:41:11 +0000 Subject: [PATCH 2/3] Match favorite_free_models ignoring :variant suffixes Specifying "x/y" in favorite_free_models now matches both "x/y" and "x/y:free" (or any :variant suffix), which is the common pattern for OpenRouter free-tier model IDs. Matching strips the suffix from the candidate before comparing, so users don't need to know whether the upstream ID includes the suffix. Both bare ("gemini-flash") and provider-qualified ("google/gemini-flash") forms match with or without a suffix on the candidate. Exact matches with a suffix ("google/gemini-flash:free") also continue to work. Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01XSnxDp6BPeErjdqembchkZ --- llmproxy/server.py | 9 ++++++++- tests/test_favorite_ordering.py | 27 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/llmproxy/server.py b/llmproxy/server.py index 96d1a75..8e9a0a6 100755 --- a/llmproxy/server.py +++ b/llmproxy/server.py @@ -2952,6 +2952,9 @@ def _apply_favorite_free_ordering( Only candidates already present in the pool are promoted — favorites not in the pool (e.g. cost-observed, not believed_free) are silently skipped. Non-matching candidates retain their existing order after the favorites. + + Matching is case-insensitive and ignores :variant suffixes (e.g. :free, + :nitro) so that "x/y" matches both "x/y" and "x/y:free". """ favorites = config.get("favorite_free_models", []) if not favorites: @@ -2961,7 +2964,11 @@ def _apply_favorite_free_ordering( for fav in favorites: fav_lower = fav.lower() for i, (pname, pcfg, umodel) in enumerate(remaining): - if fav_lower in (umodel.lower(), f"{pname}/{umodel}".lower()): + umodel_lower = umodel.lower() + umodel_base = umodel_lower.split(":")[0] # strip :variant suffix + qualified = f"{pname}/{umodel}".lower() + qualified_base = f"{pname}/{umodel_base}" + if fav_lower in (umodel_lower, umodel_base, qualified, qualified_base): front.append(remaining.pop(i)) break return front + remaining diff --git a/tests/test_favorite_ordering.py b/tests/test_favorite_ordering.py index 44bf126..3803e83 100644 --- a/tests/test_favorite_ordering.py +++ b/tests/test_favorite_ordering.py @@ -110,3 +110,30 @@ def test_non_favorite_order_preserved(): def test_empty_candidates_returns_empty(): result = _fn()([], {"favorite_free_models": ["openai/gpt-4o"]}) assert result == [] + + +def test_variant_suffix_stripped_bare(): + # "gemini-flash" matches upstream_model "gemini-flash:free" + a = _c("google", "gemini-flash:free") + b = _c("openai", "gpt-4o") + candidates = [b, a] + result = _fn()(candidates, {"favorite_free_models": ["gemini-flash"]}) + assert result[0] == a + + +def test_variant_suffix_stripped_qualified(): + # "google/gemini-flash" matches (provider="google", model="gemini-flash:free") + a = _c("google", "gemini-flash:free") + b = _c("openai", "gpt-4o") + candidates = [b, a] + result = _fn()(candidates, {"favorite_free_models": ["google/gemini-flash"]}) + assert result[0] == a + + +def test_exact_with_suffix_still_matches(): + # Specifying "google/gemini-flash:free" also works (exact match) + a = _c("google", "gemini-flash:free") + b = _c("openai", "gpt-4o") + candidates = [b, a] + result = _fn()(candidates, {"favorite_free_models": ["google/gemini-flash:free"]}) + assert result[0] == a From 2d5d1e43c4425c43e284b4dabca8cd9b4abdf93e Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 19 Jun 2026 10:12:23 +0000 Subject: [PATCH 3/3] Fix ruff lint errors: rename unused pcfg to _pcfg, remove unused Path import Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01XSnxDp6BPeErjdqembchkZ --- llmproxy/server.py | 2 +- tests/test_favorite_ordering.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/llmproxy/server.py b/llmproxy/server.py index 8e9a0a6..ba8b965 100755 --- a/llmproxy/server.py +++ b/llmproxy/server.py @@ -2963,7 +2963,7 @@ def _apply_favorite_free_ordering( front: list[tuple[str, dict, str]] = [] for fav in favorites: fav_lower = fav.lower() - for i, (pname, pcfg, umodel) in enumerate(remaining): + for i, (pname, _pcfg, umodel) in enumerate(remaining): umodel_lower = umodel.lower() umodel_base = umodel_lower.split(":")[0] # strip :variant suffix qualified = f"{pname}/{umodel}".lower() diff --git a/tests/test_favorite_ordering.py b/tests/test_favorite_ordering.py index 3803e83..47dbfea 100644 --- a/tests/test_favorite_ordering.py +++ b/tests/test_favorite_ordering.py @@ -4,7 +4,6 @@ import importlib import json -from pathlib import Path import pytest