feat(models): live-first model fetching for all OpenAI-compat providers (#892)
* feat(models): live-first model fetching for all OpenAI-compat providers (#871) The WebUI model picker relied on hardcoded _PROVIDER_MODELS as primary source for providers like zai, minimax, mistralai, xai, openai-codex, deepseek, and gemini. These lists go stale — new models don't appear until someone manually updates the dict. Add an OpenAI-compat /v1/models fetch fallback in _handle_live_models() that fires when provider_model_ids() is unavailable or returns []. The resolution chain is now: 1. hermes_cli.provider_model_ids() (agent's live fetch) 2. Custom providers from config.yaml 3. Direct /v1/models fetch for known OpenAI-compat endpoints 4. Static _PROVIDER_MODELS as last-resort offline fallback Covers: zai, minimax, mistralai, xai, openai-codex, deepseek, gemini. Uses urllib (stdlib) — no new dependencies. Static lists remain as offline fallback so the UI always shows something. Closes #871 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * refactor(models): address review feedback on live fetch (#892) Five changes from nesquena-hermes review: 1. Move _OPENAI_COMPAT_ENDPOINTS to module level — avoid dict reconstruction per request 2. Document urllib blocking behavior — 8s timeout acceptable because server is threaded and frontend enriches in background 3. Add TODO comment for TTL-based caching follow-up 4. Remove openai-codex from endpoint map — same endpoint as base openai provider, already covered by provider_model_ids() 5. Restrict API key lookup to provider-scoped and model.api_key only — remove top-level api_key fallback to prevent cross-provider key leakage Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -24,6 +24,27 @@ _PROVIDER_ALIASES = {
|
|||||||
"openai-codex": "openai",
|
"openai-codex": "openai",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# OpenAI-compatible /v1/models endpoints for live model discovery.
|
||||||
|
# Used as fallback when hermes_cli.provider_model_ids() is unavailable or
|
||||||
|
# returns [] for a provider (#871). Kept at module level so the dict is
|
||||||
|
# built once, not reconstructed per request.
|
||||||
|
_OPENAI_COMPAT_ENDPOINTS = {
|
||||||
|
"zai": "https://api.z.ai/v1",
|
||||||
|
"minimax": "https://api.minimax.chat/v1",
|
||||||
|
"mistralai": "https://api.mistral.ai/v1",
|
||||||
|
"xai": "https://api.x.ai/v1",
|
||||||
|
"deepseek": "https://api.deepseek.com/v1",
|
||||||
|
"gemini": "https://generativelanguage.googleapis.com/v1beta/openai",
|
||||||
|
}
|
||||||
|
# NOTE: "openai-codex" is excluded because it maps to the same endpoint as
|
||||||
|
# the base "openai" provider (api.openai.com/v1). When both are configured
|
||||||
|
# the openai provider is already wired through provider_model_ids(); codex-
|
||||||
|
# specific model filtering happens downstream in hermes_cli.
|
||||||
|
#
|
||||||
|
# TODO: Add TTL-based caching (e.g. 60s) so repeated model-list requests
|
||||||
|
# don't hit provider APIs. The frontend already caches via _liveModelCache
|
||||||
|
# but the backend re-fetches on every /api/models/live call.
|
||||||
|
|
||||||
from api.config import (
|
from api.config import (
|
||||||
STATE_DIR,
|
STATE_DIR,
|
||||||
SESSION_DIR,
|
SESSION_DIR,
|
||||||
@@ -2168,9 +2189,7 @@ def _handle_live_models(handler, parsed):
|
|||||||
ids = _pmi(provider)
|
ids = _pmi(provider)
|
||||||
except Exception as _import_err:
|
except Exception as _import_err:
|
||||||
logger.debug("provider_model_ids import failed for %s: %s", provider, _import_err)
|
logger.debug("provider_model_ids import failed for %s: %s", provider, _import_err)
|
||||||
# Last resort: return the WebUI's own static catalog
|
ids = []
|
||||||
from api.config import _PROVIDER_MODELS as _pm
|
|
||||||
ids = [m["id"] for m in _pm.get(provider, [])]
|
|
||||||
|
|
||||||
if not ids:
|
if not ids:
|
||||||
# For 'custom' provider, provider_model_ids() returns [] because
|
# For 'custom' provider, provider_model_ids() returns [] because
|
||||||
@@ -2188,8 +2207,49 @@ def _handle_live_models(handler, parsed):
|
|||||||
]
|
]
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if not ids:
|
|
||||||
return j(handler, {"provider": provider, "models": [], "count": 0})
|
# ── OpenAI-compat live fetch fallback ──────────────────────────────────
|
||||||
|
# When provider_model_ids() is unavailable or returns [] for a provider
|
||||||
|
# that exposes a standard /v1/models endpoint, fetch directly. This
|
||||||
|
# eliminates the need to keep _PROVIDER_MODELS in sync for providers
|
||||||
|
# that have a discoverable API (#871).
|
||||||
|
#
|
||||||
|
# WARNING: This uses synchronous urllib.request which blocks the worker
|
||||||
|
# thread for up to 8 seconds on timeout. This is acceptable because:
|
||||||
|
# (a) the server uses threading (not async), so other requests continue;
|
||||||
|
# (b) the frontend shows the static list immediately and enriches in
|
||||||
|
# the background via _fetchLiveModels(), so the user never waits.
|
||||||
|
if not ids:
|
||||||
|
_ep = _OPENAI_COMPAT_ENDPOINTS.get(provider)
|
||||||
|
if _ep:
|
||||||
|
try:
|
||||||
|
import urllib.request
|
||||||
|
_providers_cfg = cfg.get("providers", {})
|
||||||
|
_prov = _providers_cfg.get(provider, {}) if isinstance(_providers_cfg, dict) else {}
|
||||||
|
# Only use provider-scoped key — never fall back to a top-level
|
||||||
|
# api_key which may belong to a different provider.
|
||||||
|
_key = _prov.get("api_key") if isinstance(_prov, dict) else None
|
||||||
|
if not _key:
|
||||||
|
_key = cfg.get("model", {}).get("api_key")
|
||||||
|
if _key:
|
||||||
|
_req = urllib.request.Request(
|
||||||
|
f"{_ep}/models",
|
||||||
|
headers={"Authorization": f"Bearer {_key}"},
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(_req, timeout=8) as _resp:
|
||||||
|
_body = json.loads(_resp.read())
|
||||||
|
ids = [m.get("id", "") for m in _body.get("data", []) if m.get("id")]
|
||||||
|
logger.debug("Live-fetched %d models from %s /v1/models", len(ids), provider)
|
||||||
|
except Exception as _fetch_err:
|
||||||
|
logger.debug("Live fetch from %s failed: %s", provider, _fetch_err)
|
||||||
|
# Fall through to static list below
|
||||||
|
|
||||||
|
# Static fallback — only reached when live fetch also failed.
|
||||||
|
if not ids:
|
||||||
|
from api.config import _PROVIDER_MODELS as _pm
|
||||||
|
ids = [m["id"] for m in _pm.get(provider, [])]
|
||||||
|
if not ids:
|
||||||
|
return j(handler, {"provider": provider, "models": [], "count": 0})
|
||||||
|
|
||||||
# Normalise to {id, label} — provider_model_ids() returns plain string IDs.
|
# Normalise to {id, label} — provider_model_ids() returns plain string IDs.
|
||||||
# For ollama-cloud use the shared Ollama formatter (handles `:variant` suffix).
|
# For ollama-cloud use the shared Ollama formatter (handles `:variant` suffix).
|
||||||
|
|||||||
Reference in New Issue
Block a user