diff --git a/README.md b/README.md
index cbffccc..b1773b2 100644
--- a/README.md
+++ b/README.md
@@ -184,7 +184,9 @@ When a request targets a (non-fusion) virtual model, llmproxy:
reorderings may then run on top without ever dropping a candidate: the
[request-fit triage](#request-fit-triage-every-free-and-local-virtual) for the
`*/free` and `*/local` virtuals, and [capability ordering](#capability-aware-routing--failover)
- when the request forces a capability.
+ when the request forces a capability. Finally, any models listed in
+ [`favorite_free_models`](#favorite_free_models) that are present in the pool
+ are promoted to the front in ranked order before cycling begins.
3. **Tries each candidate in order**, returning the first **usable** response.
A candidate is considered to have **failed** — so llmproxy moves on to the next
@@ -678,6 +680,10 @@ Config is stored at `~/.config/llmproxy/config.json` (or the path in
"tokens_per_day": 500000
}
},
+ "favorite_free_models": [
+ "google/gemini-2.5-flash",
+ "groq/llama-3.1-8b-instant"
+ ],
"free_tier": {
"sync_on_startup": true,
@@ -864,6 +870,45 @@ process — so it is "as far as we can tell in the moment". Any field set to `nu
is ignored; a provider with no `free_allowance` simply never gains free-in-the-
moment status.
+
+### `favorite_free_models` — ranked priority list for free-tier routing
+
+`favorite_free_models` is an **optional** top-level array of model IDs listed in
+preference order. When a `*/free` virtual endpoint (e.g. `llmproxy/free`,
+`llmproxy/deep__free`) or the free tier of `llmproxy/loadbalanced` selects a
+backend, models in this list are promoted to the front of the candidate pool
+**in the order listed**, before the normal capacity/request-fit/capability
+algorithm handles the rest.
+
+```json
+"favorite_free_models": [
+ "google/gemini-2.5-flash",
+ "anthropic/claude-3-5-haiku-20251001",
+ "gpt-4o-mini"
+]
+```
+
+Each entry is matched case-insensitively against the upstream model ID (bare,
+e.g. `gpt-4o-mini`) or the fully-qualified proxy ID (e.g.
+`openai/gpt-4o-mini`). A favorite is only promoted if it is **currently
+believed-free** (present in `believed_free` and not flagged as cost-observed);
+if it is absent from the virtual model's candidate pool it is silently skipped
+and the remaining favorites and the normal algorithm continue unchanged.
+
+**Cost-observation persistence:** if a favorite is later removed from
+`believed_free` because a cost was observed at runtime, it remains in
+`favorite_free_models`. When a future sync restores it to the free pool (e.g.
+the provider makes it free again), it is automatically re-promoted without any
+manual config change.
+
+`favorite_free_models` has no effect on non-free virtual endpoints
+(`llmproxy/deep`, `llmproxy/tools`, etc.) or on fusion virtuals.
+
+The admin UI's **Models & Categorizations** tab includes a **Favorite free
+models** panel where you can add models from a grouped-by-provider picker,
+reorder them with up/down buttons, and remove entries — changes are saved
+immediately.
+
### Token + cost accounting — `GET /v1/usage`
diff --git a/llmproxy/admin.py b/llmproxy/admin.py
index fcdd5c1..f647d81 100644
--- a/llmproxy/admin.py
+++ b/llmproxy/admin.py
@@ -463,6 +463,7 @@ def api_get_config():
return jsonify({
"providers": providers,
"believed_free": config.get("believed_free", []),
+ "favorite_free_models": config.get("favorite_free_models", []),
"model_reasoning": config.get("model_reasoning", {}),
"model_capabilities": config.get("model_capabilities", {}),
"free_limits": config.get("free_limits", {}),
@@ -794,6 +795,18 @@ def _put_section(key: str, validate):
return jsonify({key: payload})
+@bp.route("/admin/api/favorite-free-models", methods=["GET", "PUT"])
+def api_favorite_free_models():
+ if request.method == "GET":
+ return jsonify({"favorite_free_models": _load().get("favorite_free_models", [])})
+
+ def validate(p):
+ if not (isinstance(p, list) and all(isinstance(x, str) for x in p)):
+ return "favorite_free_models must be a list of strings."
+ return None
+ return _put_section("favorite_free_models", validate)
+
+
@bp.route("/admin/api/believed-free", methods=["GET", "PUT"])
def api_believed_free():
if request.method == "GET":
diff --git a/llmproxy/server.py b/llmproxy/server.py
index aec3ea0..ba8b965 100755
--- a/llmproxy/server.py
+++ b/llmproxy/server.py
@@ -2943,6 +2943,37 @@ def _provider_exposes_to_virtual_models(provider_cfg: dict) -> bool:
return provider_cfg.get("expose_to_virtual_models", True) is not False
+def _apply_favorite_free_ordering(
+ candidates: list[tuple[str, dict, str]],
+ config: dict,
+) -> list[tuple[str, dict, str]]:
+ """Promote favorite_free_models to the front in ranked order.
+
+ Only candidates already present in the pool are promoted — favorites not in
+ the pool (e.g. cost-observed, not believed_free) are silently skipped.
+ Non-matching candidates retain their existing order after the favorites.
+
+ Matching is case-insensitive and ignores :variant suffixes (e.g. :free,
+ :nitro) so that "x/y" matches both "x/y" and "x/y:free".
+ """
+ favorites = config.get("favorite_free_models", [])
+ if not favorites:
+ return candidates
+ remaining = list(candidates)
+ front: list[tuple[str, dict, str]] = []
+ for fav in favorites:
+ fav_lower = fav.lower()
+ for i, (pname, _pcfg, umodel) in enumerate(remaining):
+ umodel_lower = umodel.lower()
+ umodel_base = umodel_lower.split(":")[0] # strip :variant suffix
+ qualified = f"{pname}/{umodel}".lower()
+ qualified_base = f"{pname}/{umodel_base}"
+ if fav_lower in (umodel_lower, umodel_base, qualified, qualified_base):
+ front.append(remaining.pop(i))
+ break
+ return front + remaining
+
+
def _param_count(model_id: str) -> float:
"""Best-effort parameter count (in billions) parsed from a model id.
@@ -3436,6 +3467,7 @@ def _price(c: tuple[str, dict, str]) -> float:
continue
if tier == _TIER_FREE:
bucket = _quality_ordered_candidates(bucket, free_limits, reasoning_map)
+ bucket = _apply_favorite_free_ordering(bucket, config)
elif tier == _TIER_LOCAL:
# $0 like free — prefer the strongest local model (e.g. the larger
# Ollama model) rather than rotating randomly.
@@ -4253,6 +4285,8 @@ def on_success(pn: str, um: str, body=None) -> None:
logger.info(" [%s] request-fit first-pick tier=%s", model_full, _target_reasoning_tier(payload))
if needed:
ordered = _order_by_capability(ordered, needed, _model_capabilities(config))
+ if is_free_virtual:
+ ordered = _apply_favorite_free_ordering(ordered, config)
logger.info(" [%s] cycling through %d candidate(s)", model_full, len(ordered))
if is_streaming:
timeout = server_cfg.get("stream_timeout", 300)
diff --git a/llmproxy/static/admin/index.html b/llmproxy/static/admin/index.html
index 9102e6a..8533fac 100644
--- a/llmproxy/static/admin/index.html
+++ b/llmproxy/static/admin/index.html
@@ -87,6 +87,8 @@
.vm code.id { font-size: 14px; font-weight: 700; }
a { color: var(--accent); }
.hidden { display: none !important; }
+ .fav-row { display: grid; grid-template-columns: auto 1fr auto auto auto; gap: 8px; align-items: center; padding: 7px 10px; border: 1px solid var(--border); border-radius: 8px; margin-bottom: 6px; }
+ .fav-rank { font-size: 11px; font-weight: 700; color: var(--muted); min-width: 18px; text-align: right; }
@@ -164,6 +166,15 @@
Models & Categorizations
+
+
Favorite free models
+
These models are tried first (in ranked order) when routing through any */free virtual endpoint or the free tier of llmproxy/loadbalanced. A model is only selected if it is currently believed-free; it is skipped silently otherwise and the normal selection algorithm continues.