diff --git a/api/routes.py b/api/routes.py index ca5c78e..b8c0c05 100644 --- a/api/routes.py +++ b/api/routes.py @@ -24,6 +24,27 @@ _PROVIDER_ALIASES = { "openai-codex": "openai", } +# OpenAI-compatible /v1/models endpoints for live model discovery. +# Used as fallback when hermes_cli.provider_model_ids() is unavailable or +# returns [] for a provider (#871). Kept at module level so the dict is +# built once, not reconstructed per request. +_OPENAI_COMPAT_ENDPOINTS = { + "zai": "https://api.z.ai/v1", + "minimax": "https://api.minimax.chat/v1", + "mistralai": "https://api.mistral.ai/v1", + "xai": "https://api.x.ai/v1", + "deepseek": "https://api.deepseek.com/v1", + "gemini": "https://generativelanguage.googleapis.com/v1beta/openai", +} +# NOTE: "openai-codex" is excluded because it maps to the same endpoint as +# the base "openai" provider (api.openai.com/v1). When both are configured +# the openai provider is already wired through provider_model_ids(); codex- +# specific model filtering happens downstream in hermes_cli. +# +# TODO: Add TTL-based caching (e.g. 60s) so repeated model-list requests +# don't hit provider APIs. The frontend already caches via _liveModelCache +# but the backend re-fetches on every /api/models/live call. + from api.config import ( STATE_DIR, SESSION_DIR, @@ -2168,9 +2189,7 @@ def _handle_live_models(handler, parsed): ids = _pmi(provider) except Exception as _import_err: logger.debug("provider_model_ids import failed for %s: %s", provider, _import_err) - # Last resort: return the WebUI's own static catalog - from api.config import _PROVIDER_MODELS as _pm - ids = [m["id"] for m in _pm.get(provider, [])] + ids = [] if not ids: # For 'custom' provider, provider_model_ids() returns [] because @@ -2188,8 +2207,49 @@ def _handle_live_models(handler, parsed): ] except Exception: pass - if not ids: - return j(handler, {"provider": provider, "models": [], "count": 0}) + + # ── OpenAI-compat live fetch fallback ────────────────────────────────── + # When provider_model_ids() is unavailable or returns [] for a provider + # that exposes a standard /v1/models endpoint, fetch directly. This + # eliminates the need to keep _PROVIDER_MODELS in sync for providers + # that have a discoverable API (#871). + # + # WARNING: This uses synchronous urllib.request which blocks the worker + # thread for up to 8 seconds on timeout. This is acceptable because: + # (a) the server uses threading (not async), so other requests continue; + # (b) the frontend shows the static list immediately and enriches in + # the background via _fetchLiveModels(), so the user never waits. + if not ids: + _ep = _OPENAI_COMPAT_ENDPOINTS.get(provider) + if _ep: + try: + import urllib.request + _providers_cfg = cfg.get("providers", {}) + _prov = _providers_cfg.get(provider, {}) if isinstance(_providers_cfg, dict) else {} + # Only use provider-scoped key — never fall back to a top-level + # api_key which may belong to a different provider. + _key = _prov.get("api_key") if isinstance(_prov, dict) else None + if not _key: + _key = cfg.get("model", {}).get("api_key") + if _key: + _req = urllib.request.Request( + f"{_ep}/models", + headers={"Authorization": f"Bearer {_key}"}, + ) + with urllib.request.urlopen(_req, timeout=8) as _resp: + _body = json.loads(_resp.read()) + ids = [m.get("id", "") for m in _body.get("data", []) if m.get("id")] + logger.debug("Live-fetched %d models from %s /v1/models", len(ids), provider) + except Exception as _fetch_err: + logger.debug("Live fetch from %s failed: %s", provider, _fetch_err) + # Fall through to static list below + + # Static fallback — only reached when live fetch also failed. + if not ids: + from api.config import _PROVIDER_MODELS as _pm + ids = [m["id"] for m in _pm.get(provider, [])] + if not ids: + return j(handler, {"provider": provider, "models": [], "count": 0}) # Normalise to {id, label} — provider_model_ids() returns plain string IDs. # For ollama-cloud use the shared Ollama formatter (handles `:variant` suffix).