BillJr99 · BillJr99 · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/README.md b/README.md
@@ -184,7 +184,9 @@ When a request targets a (non-fusion) virtual model, llmproxy:
    reorderings may then run on top without ever dropping a candidate: the
    [request-fit triage](#request-fit-triage-every-free-and-local-virtual) for the
    `*/free` and `*/local` virtuals, and [capability ordering](#capability-aware-routing--failover)
-   when the request forces a capability.
+   when the request forces a capability. Finally, any models listed in
+   [`favorite_free_models`](#favorite_free_models) that are present in the pool
+   are promoted to the front in ranked order before cycling begins.
 3. **Tries each candidate in order**, returning the first **usable** response.
 
 A candidate is considered to have **failed** — so llmproxy moves on to the next
@@ -678,6 +680,10 @@ Config is stored at `~/.config/llmproxy/config.json` (or the path in
       "tokens_per_day": 500000
     }
   },
+  "favorite_free_models": [
+    "google/gemini-2.5-flash",
+    "groq/llama-3.1-8b-instant"
+  ],
 
   "free_tier": {
     "sync_on_startup": true,
@@ -864,6 +870,45 @@ process — so it is "as far as we can tell in the moment". Any field set to `nu
 is ignored; a provider with no `free_allowance` simply never gains free-in-the-
 moment status.
 
+<a name="favorite_free_models"></a>
+### `favorite_free_models` — ranked priority list for free-tier routing
+
+`favorite_free_models` is an **optional** top-level array of model IDs listed in
+preference order.  When a `*/free` virtual endpoint (e.g. `llmproxy/free`,
+`llmproxy/deep__free`) or the free tier of `llmproxy/loadbalanced` selects a
+backend, models in this list are promoted to the front of the candidate pool
+**in the order listed**, before the normal capacity/request-fit/capability
+algorithm handles the rest.
+
+```json
+"favorite_free_models": [
+  "google/gemini-2.5-flash",
+  "anthropic/claude-3-5-haiku-20251001",
+  "gpt-4o-mini"
+]
+```
+
+Each entry is matched case-insensitively against the upstream model ID (bare,
+e.g. `gpt-4o-mini`) or the fully-qualified proxy ID (e.g.
+`openai/gpt-4o-mini`).  A favorite is only promoted if it is **currently
+believed-free** (present in `believed_free` and not flagged as cost-observed);
+if it is absent from the virtual model's candidate pool it is silently skipped
+and the remaining favorites and the normal algorithm continue unchanged.
+
+**Cost-observation persistence:** if a favorite is later removed from
+`believed_free` because a cost was observed at runtime, it remains in
+`favorite_free_models`.  When a future sync restores it to the free pool (e.g.
+the provider makes it free again), it is automatically re-promoted without any
+manual config change.
+
+`favorite_free_models` has no effect on non-free virtual endpoints
+(`llmproxy/deep`, `llmproxy/tools`, etc.) or on fusion virtuals.
+
+The admin UI's **Models & Categorizations** tab includes a **Favorite free
+models** panel where you can add models from a grouped-by-provider picker,
+reorder them with up/down buttons, and remove entries — changes are saved
+immediately.
+
 <a name="usage-accounting"></a>
 ### Token + cost accounting — `GET /v1/usage`
 

diff --git a/llmproxy/admin.py b/llmproxy/admin.py
@@ -463,6 +463,7 @@ def api_get_config():
     return jsonify({
         "providers": providers,
         "believed_free": config.get("believed_free", []),
+        "favorite_free_models": config.get("favorite_free_models", []),
         "model_reasoning": config.get("model_reasoning", {}),
         "model_capabilities": config.get("model_capabilities", {}),
         "free_limits": config.get("free_limits", {}),
@@ -794,6 +795,18 @@ def _put_section(key: str, validate):
     return jsonify({key: payload})
 
 
+@bp.route("/admin/api/favorite-free-models", methods=["GET", "PUT"])
+def api_favorite_free_models():
+    if request.method == "GET":
+        return jsonify({"favorite_free_models": _load().get("favorite_free_models", [])})
+
+    def validate(p):
+        if not (isinstance(p, list) and all(isinstance(x, str) for x in p)):
+            return "favorite_free_models must be a list of strings."
+        return None
+    return _put_section("favorite_free_models", validate)
+
+
 @bp.route("/admin/api/believed-free", methods=["GET", "PUT"])
 def api_believed_free():
     if request.method == "GET":

diff --git a/llmproxy/server.py b/llmproxy/server.py
@@ -2943,6 +2943,37 @@ def _provider_exposes_to_virtual_models(provider_cfg: dict) -> bool:
     return provider_cfg.get("expose_to_virtual_models", True) is not False
 
 
+def _apply_favorite_free_ordering(
+    candidates: list[tuple[str, dict, str]],
+    config: dict,
+) -> list[tuple[str, dict, str]]:
+    """Promote favorite_free_models to the front in ranked order.
+
+    Only candidates already present in the pool are promoted — favorites not in
+    the pool (e.g. cost-observed, not believed_free) are silently skipped.
+    Non-matching candidates retain their existing order after the favorites.
+
+    Matching is case-insensitive and ignores :variant suffixes (e.g. :free,
+    :nitro) so that "x/y" matches both "x/y" and "x/y:free".
+    """
+    favorites = config.get("favorite_free_models", [])
+    if not favorites:
+        return candidates
+    remaining = list(candidates)
+    front: list[tuple[str, dict, str]] = []
+    for fav in favorites:
+        fav_lower = fav.lower()
+        for i, (pname, _pcfg, umodel) in enumerate(remaining):
+            umodel_lower = umodel.lower()
+            umodel_base = umodel_lower.split(":")[0]  # strip :variant suffix
+            qualified = f"{pname}/{umodel}".lower()
+            qualified_base = f"{pname}/{umodel_base}"
+            if fav_lower in (umodel_lower, umodel_base, qualified, qualified_base):
+                front.append(remaining.pop(i))
+                break
+    return front + remaining
+
+
 def _param_count(model_id: str) -> float:
     """Best-effort parameter count (in billions) parsed from a model id.
 
@@ -3436,6 +3467,7 @@ def _price(c: tuple[str, dict, str]) -> float:
             continue
         if tier == _TIER_FREE:
             bucket = _quality_ordered_candidates(bucket, free_limits, reasoning_map)
+            bucket = _apply_favorite_free_ordering(bucket, config)
         elif tier == _TIER_LOCAL:
             # $0 like free — prefer the strongest local model (e.g. the larger
             # Ollama model) rather than rotating randomly.
@@ -4253,6 +4285,8 @@ def on_success(pn: str, um: str, body=None) -> None:
             logger.info("  [%s] request-fit first-pick tier=%s", model_full, _target_reasoning_tier(payload))
         if needed:
             ordered = _order_by_capability(ordered, needed, _model_capabilities(config))
+        if is_free_virtual:
+            ordered = _apply_favorite_free_ordering(ordered, config)
         logger.info("  [%s] cycling through %d candidate(s)", model_full, len(ordered))
         if is_streaming:
             timeout = server_cfg.get("stream_timeout", 300)

diff --git a/llmproxy/static/admin/index.html b/llmproxy/static/admin/index.html
@@ -87,6 +87,8 @@
   .vm code.id { font-size: 14px; font-weight: 700; }
   a { color: var(--accent); }
   .hidden { display: none !important; }
+  .fav-row { display: grid; grid-template-columns: auto 1fr auto auto auto; gap: 8px; align-items: center; padding: 7px 10px; border: 1px solid var(--border); border-radius: 8px; margin-bottom: 6px; }
+  .fav-rank { font-size: 11px; font-weight: 700; color: var(--muted); min-width: 18px; text-align: right; }
 </style>
 </head>
 <body>
@@ -164,6 +166,15 @@ <h2>Models &amp; Categorizations</h2>
       <div id="model-grid" class="grid-models" style="margin-top:12px"></div>
       <div class="actions"><button class="btn primary" id="save-cats">Save categorizations</button></div>
     </div>
+    <div class="card">
+      <h3>Favorite free models</h3>
+      <p class="muted">These models are tried first (in ranked order) when routing through any <code>*/free</code> virtual endpoint or the free tier of <code>llmproxy/loadbalanced</code>. A model is only selected if it is currently believed-free; it is skipped silently otherwise and the normal selection algorithm continues.</p>
+      <div id="fav-list" style="margin-bottom:12px"></div>
+      <div class="inline" style="gap:8px;flex-wrap:wrap">
+        <select id="fav-pick" style="flex:1;min-width:180px"><option value="">— pick a model to add —</option></select>
+        <button class="btn" id="fav-add">Add</button>
+      </div>
+    </div>
   </section>
 
   <!-- VIRTUAL -->
@@ -424,7 +435,7 @@ <h3>${esc(name)} ${keyPill}</h3>
 // ---- Models & categorizations ----
 $("#discover").addEventListener("click", async () => {
   toast("Discovering…");
-  try { const r = await api("/models"); DISCOVERED = r.models || []; renderModels(); toast(`Discovered ${DISCOVERED.length} models`); }
+  try { const r = await api("/models"); DISCOVERED = r.models || []; renderModels(); renderFavPicker(); toast(`Discovered ${DISCOVERED.length} models`); }
   catch (e) { toast(e.message, true); }
 });
 $("#model-filter").addEventListener("input", renderModels);
@@ -492,6 +503,73 @@ <h3>${esc(name)} ${keyPill}</h3>
 }
 $("#refresh-vm").addEventListener("click", loadVirtual);
 
+// ---- Favorite free models ----
+let FAVS = [];
+
+function renderFavPicker() {
+  const sel = $("#fav-pick");
+  // Group discovered/known models by provider
+  const all = modelUniverse();
+  const groups = {};
+  for (const m of all) {
+    const slash = m.indexOf("/");
+    const grp = slash >= 0 ? m.slice(0, slash) : "(bare)";
+    (groups[grp] = groups[grp] || []).push(m);
+  }
+  sel.innerHTML = `<option value="">— pick a model to add —</option>` +
+    Object.keys(groups).sort().map(g =>
+      `<optgroup label="${esc(g)}">${groups[g].map(m => `<option value="${esc(m)}">${esc(m)}</option>`).join("")}</optgroup>`
+    ).join("");
+}
+
+function renderFavsList() {
+  const box = $("#fav-list");
+  if (!FAVS.length) { box.innerHTML = `<p class="muted">No favorites yet.</p>`; return; }
+  box.innerHTML = FAVS.map((m, i) => `
+    <div class="fav-row">
+      <span class="fav-rank">${i + 1}</span>
+      <code style="font-size:13px;word-break:break-all">${esc(m)}</code>
+      <button class="btn" data-up="${i}" ${i === 0 ? "disabled" : ""} title="Move up">↑</button>
+      <button class="btn" data-dn="${i}" ${i === FAVS.length - 1 ? "disabled" : ""} title="Move down">↓</button>
+      <button class="btn danger" data-rm="${i}" title="Remove">✕</button>
+    </div>`).join("");
+  box.querySelectorAll("[data-up]").forEach(b => b.addEventListener("click", () => moveFav(+b.dataset.up, -1)));
+  box.querySelectorAll("[data-dn]").forEach(b => b.addEventListener("click", () => moveFav(+b.dataset.dn, 1)));
+  box.querySelectorAll("[data-rm]").forEach(b => b.addEventListener("click", () => removeFav(+b.dataset.rm)));
+}
+
+async function saveFavs() {
+  try {
+    await api("/favorite-free-models", { method: "PUT", body: JSON.stringify(FAVS) });
+    toast("Favorite free models saved");
+  } catch (e) { toast(e.message, true); }
+}
+
+function moveFav(i, dir) {
+  const j = i + dir;
+  if (j < 0 || j >= FAVS.length) return;
+  [FAVS[i], FAVS[j]] = [FAVS[j], FAVS[i]];
+  renderFavsList(); saveFavs();
+}
+
+function removeFav(i) {
+  FAVS.splice(i, 1); renderFavsList(); saveFavs();
+}
+
+$("#fav-add").addEventListener("click", () => {
+  const v = $("#fav-pick").value; if (!v) return;
+  if (FAVS.includes(v)) { toast("Already in favorites", true); return; }
+  FAVS.push(v); renderFavsList(); renderFavPicker(); saveFavs();
+});
+
+async function loadFavs() {
+  try {
+    const r = await api("/favorite-free-models");
+    FAVS = r.favorite_free_models || [];
+    renderFavsList(); renderFavPicker();
+  } catch (e) { toast(e.message, true); }
+}
+
 // ---- Bootstrap ----
 async function bootstrap() {
   try {
@@ -515,6 +593,7 @@ <h3>${esc(name)} ${keyPill}</h3>
     renderTemplateSelect();
     await loadProviders();
     renderModels();
+    await loadFavs();
   } catch (e) {
     setAuthState(false, "auth required");
     toast(e.message, true);

diff --git a/tests/test_admin_api.py b/tests/test_admin_api.py
@@ -207,6 +207,47 @@ def test_put_believed_free_rejects_non_list(client):
     assert resp.status_code == 400
 
 
+# --------------------------------------------------------------------------- #
+# Favorite free models
+
+def test_get_favorite_free_models_default_empty(client):
+    resp = client.get("/admin/api/favorite-free-models")
+    assert resp.status_code == 200
+    assert resp.get_json()["favorite_free_models"] == []
+
+
+def test_put_favorite_free_models_valid(client, cfg_path):
+    favs = ["google/gemini-2.5-flash", "openai/gpt-4o-mini"]
+    resp = client.put("/admin/api/favorite-free-models", json=favs)
+    assert resp.status_code == 200
+    assert resp.get_json()["favorite_free_models"] == favs
+    assert _read_config(cfg_path)["favorite_free_models"] == favs
+
+
+def test_put_favorite_free_models_rejects_non_list(client):
+    resp = client.put("/admin/api/favorite-free-models", json={"model": "x"})
+    assert resp.status_code == 400
+
+
+def test_put_favorite_free_models_rejects_non_string_entries(client):
+    resp = client.put("/admin/api/favorite-free-models", json=["ok", 42])
+    assert resp.status_code == 400
+
+
+def test_favorite_free_models_in_config_get(client, cfg_path):
+    favs = ["google/gemini-flash"]
+    client.put("/admin/api/favorite-free-models", json=favs)
+    resp = client.get("/admin/api/config")
+    assert resp.status_code == 200
+    assert resp.get_json()["favorite_free_models"] == favs
+
+
+def test_get_favorite_free_models_round_trips_empty_list(client, cfg_path):
+    client.put("/admin/api/favorite-free-models", json=[])
+    resp = client.get("/admin/api/favorite-free-models")
+    assert resp.get_json()["favorite_free_models"] == []
+
+
 def test_put_model_reasoning_validates_level(client):
     resp = client.put("/admin/api/model-reasoning", json={"m": "ultra"})
     assert resp.status_code == 400