From cd01e4d5bab0e976baa0935b6b22ea63e58a7449 Mon Sep 17 00:00:00 2001 From: bergeouss <48155732+bergeouss@users.noreply.github.com> Date: Thu, 23 Apr 2026 18:45:46 +0200 Subject: [PATCH] feat(models): live-first model fetching for all OpenAI-compat providers (#892) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(models): live-first model fetching for all OpenAI-compat providers (#871) The WebUI model picker relied on hardcoded _PROVIDER_MODELS as primary source for providers like zai, minimax, mistralai, xai, openai-codex, deepseek, and gemini. These lists go stale — new models don't appear until someone manually updates the dict. Add an OpenAI-compat /v1/models fetch fallback in _handle_live_models() that fires when provider_model_ids() is unavailable or returns []. The resolution chain is now: 1. hermes_cli.provider_model_ids() (agent's live fetch) 2. Custom providers from config.yaml 3. Direct /v1/models fetch for known OpenAI-compat endpoints 4. Static _PROVIDER_MODELS as last-resort offline fallback Covers: zai, minimax, mistralai, xai, openai-codex, deepseek, gemini. Uses urllib (stdlib) — no new dependencies. Static lists remain as offline fallback so the UI always shows something. Closes #871 Co-Authored-By: Claude Opus 4.7 * refactor(models): address review feedback on live fetch (#892) Five changes from nesquena-hermes review: 1. Move _OPENAI_COMPAT_ENDPOINTS to module level — avoid dict reconstruction per request 2. Document urllib blocking behavior — 8s timeout acceptable because server is threaded and frontend enriches in background 3. Add TODO comment for TTL-based caching follow-up 4. Remove openai-codex from endpoint map — same endpoint as base openai provider, already covered by provider_model_ids() 5. Restrict API key lookup to provider-scoped and model.api_key only — remove top-level api_key fallback to prevent cross-provider key leakage Co-Authored-By: Claude Opus 4.7 --------- Co-authored-by: Claude Opus 4.7 --- api/routes.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 5 deletions(-) diff --git a/api/routes.py b/api/routes.py index ca5c78e..b8c0c05 100644 --- a/api/routes.py +++ b/api/routes.py @@ -24,6 +24,27 @@ _PROVIDER_ALIASES = { "openai-codex": "openai", } +# OpenAI-compatible /v1/models endpoints for live model discovery. +# Used as fallback when hermes_cli.provider_model_ids() is unavailable or +# returns [] for a provider (#871). Kept at module level so the dict is +# built once, not reconstructed per request. +_OPENAI_COMPAT_ENDPOINTS = { + "zai": "https://api.z.ai/v1", + "minimax": "https://api.minimax.chat/v1", + "mistralai": "https://api.mistral.ai/v1", + "xai": "https://api.x.ai/v1", + "deepseek": "https://api.deepseek.com/v1", + "gemini": "https://generativelanguage.googleapis.com/v1beta/openai", +} +# NOTE: "openai-codex" is excluded because it maps to the same endpoint as +# the base "openai" provider (api.openai.com/v1). When both are configured +# the openai provider is already wired through provider_model_ids(); codex- +# specific model filtering happens downstream in hermes_cli. +# +# TODO: Add TTL-based caching (e.g. 60s) so repeated model-list requests +# don't hit provider APIs. The frontend already caches via _liveModelCache +# but the backend re-fetches on every /api/models/live call. + from api.config import ( STATE_DIR, SESSION_DIR, @@ -2168,9 +2189,7 @@ def _handle_live_models(handler, parsed): ids = _pmi(provider) except Exception as _import_err: logger.debug("provider_model_ids import failed for %s: %s", provider, _import_err) - # Last resort: return the WebUI's own static catalog - from api.config import _PROVIDER_MODELS as _pm - ids = [m["id"] for m in _pm.get(provider, [])] + ids = [] if not ids: # For 'custom' provider, provider_model_ids() returns [] because @@ -2188,8 +2207,49 @@ def _handle_live_models(handler, parsed): ] except Exception: pass - if not ids: - return j(handler, {"provider": provider, "models": [], "count": 0}) + + # ── OpenAI-compat live fetch fallback ────────────────────────────────── + # When provider_model_ids() is unavailable or returns [] for a provider + # that exposes a standard /v1/models endpoint, fetch directly. This + # eliminates the need to keep _PROVIDER_MODELS in sync for providers + # that have a discoverable API (#871). + # + # WARNING: This uses synchronous urllib.request which blocks the worker + # thread for up to 8 seconds on timeout. This is acceptable because: + # (a) the server uses threading (not async), so other requests continue; + # (b) the frontend shows the static list immediately and enriches in + # the background via _fetchLiveModels(), so the user never waits. + if not ids: + _ep = _OPENAI_COMPAT_ENDPOINTS.get(provider) + if _ep: + try: + import urllib.request + _providers_cfg = cfg.get("providers", {}) + _prov = _providers_cfg.get(provider, {}) if isinstance(_providers_cfg, dict) else {} + # Only use provider-scoped key — never fall back to a top-level + # api_key which may belong to a different provider. + _key = _prov.get("api_key") if isinstance(_prov, dict) else None + if not _key: + _key = cfg.get("model", {}).get("api_key") + if _key: + _req = urllib.request.Request( + f"{_ep}/models", + headers={"Authorization": f"Bearer {_key}"}, + ) + with urllib.request.urlopen(_req, timeout=8) as _resp: + _body = json.loads(_resp.read()) + ids = [m.get("id", "") for m in _body.get("data", []) if m.get("id")] + logger.debug("Live-fetched %d models from %s /v1/models", len(ids), provider) + except Exception as _fetch_err: + logger.debug("Live fetch from %s failed: %s", provider, _fetch_err) + # Fall through to static list below + + # Static fallback — only reached when live fetch also failed. + if not ids: + from api.config import _PROVIDER_MODELS as _pm + ids = [m["id"] for m in _pm.get(provider, [])] + if not ids: + return j(handler, {"provider": provider, "models": [], "count": 0}) # Normalise to {id, label} — provider_model_ids() returns plain string IDs. # For ollama-cloud use the shared Ollama formatter (handles `:variant` suffix).