perf: TTL cache for model list + incremental session index (#780)
Fixes AWS IMDS timeout on model dropdown. Incremental index writes. Co-authored-by: starship-s <starship-s@users.noreply.github.com>
This commit is contained in:
@@ -10,6 +10,7 @@ Discovery order for all paths:
|
||||
"""
|
||||
|
||||
import collections
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -802,6 +803,26 @@ def set_hermes_default_model(model_id: str) -> dict:
|
||||
return get_available_models()
|
||||
|
||||
|
||||
# ── TTL cache for get_available_models() ─────────────────────────────────────
|
||||
_available_models_cache: dict | None = None
|
||||
_available_models_cache_ts: float = 0.0
|
||||
_AVAILABLE_MODELS_CACHE_TTL: float = 60.0 # seconds — refresh at most once per minute
|
||||
_available_models_cache_lock = threading.Lock()
|
||||
|
||||
|
||||
def invalidate_models_cache():
|
||||
"""Force the TTL cache for get_available_models() to be cleared.
|
||||
|
||||
Call this after modifying config.cfg in-memory (e.g. in tests) so
|
||||
the next call to get_available_models() picks up the changes rather
|
||||
than returning a stale cached result.
|
||||
"""
|
||||
global _available_models_cache, _available_models_cache_ts
|
||||
with _available_models_cache_lock:
|
||||
_available_models_cache = None
|
||||
_available_models_cache_ts = 0.0
|
||||
|
||||
|
||||
def get_available_models() -> dict:
|
||||
"""
|
||||
Return available models grouped by provider.
|
||||
@@ -821,12 +842,24 @@ def get_available_models() -> dict:
|
||||
# Reload config from disk if config.yaml has changed since last load.
|
||||
# This ensures CLI model changes are picked up on page refresh without
|
||||
# a server restart, while avoiding clearing in-memory mocks during tests. (#585)
|
||||
try:
|
||||
_current_mtime = Path(_get_config_path()).stat().st_mtime
|
||||
except OSError:
|
||||
_current_mtime = 0.0
|
||||
if _current_mtime != _cfg_mtime:
|
||||
reload_config()
|
||||
# Must run BEFORE the TTL check so config edits within the 60s window are visible.
|
||||
global _available_models_cache, _available_models_cache_ts
|
||||
with _available_models_cache_lock:
|
||||
try:
|
||||
_current_mtime = Path(_get_config_path()).stat().st_mtime
|
||||
except OSError:
|
||||
_current_mtime = 0.0
|
||||
# Note: env-var changes (e.g. API key rotation) are not detected by mtime;
|
||||
# cache will be stale for up to TTL seconds in that case.
|
||||
if _current_mtime != _cfg_mtime:
|
||||
reload_config()
|
||||
# Config changed — force cache invalidation
|
||||
_available_models_cache = None
|
||||
_available_models_cache_ts = 0.0
|
||||
# Serve from TTL cache if fresh.
|
||||
now = time.monotonic()
|
||||
if _available_models_cache is not None and (now - _available_models_cache_ts) < _AVAILABLE_MODELS_CACHE_TTL:
|
||||
return copy.deepcopy(_available_models_cache)
|
||||
active_provider = None
|
||||
default_model = get_effective_default_model(cfg)
|
||||
groups = []
|
||||
@@ -1277,11 +1310,16 @@ def get_available_models() -> dict:
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
result = {
|
||||
"active_provider": active_provider,
|
||||
"default_model": default_model,
|
||||
"groups": groups,
|
||||
}
|
||||
# Cache the result for TTL seconds
|
||||
with _available_models_cache_lock:
|
||||
_available_models_cache = result
|
||||
_available_models_cache_ts = time.monotonic()
|
||||
return copy.deepcopy(result)
|
||||
|
||||
|
||||
# ── Static file path ─────────────────────────────────────────────────────────
|
||||
|
||||
@@ -4,6 +4,7 @@ Hermes Web UI -- Session model and in-memory session store.
|
||||
import collections
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
@@ -19,22 +20,63 @@ from api.workspace import get_last_workspace
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _write_session_index():
|
||||
"""Rebuild the session index file for O(1) future reads."""
|
||||
entries = []
|
||||
for p in SESSION_DIR.glob('*.json'):
|
||||
if p.name.startswith('_'): continue
|
||||
try:
|
||||
s = Session.load(p.stem)
|
||||
if s: entries.append(s.compact())
|
||||
except Exception:
|
||||
logger.debug("Failed to load session from %s", p)
|
||||
with LOCK:
|
||||
for s in SESSIONS.values():
|
||||
if not any(e['session_id'] == s.session_id for e in entries):
|
||||
entries.append(s.compact())
|
||||
entries.sort(key=lambda s: s['updated_at'], reverse=True)
|
||||
SESSION_INDEX_FILE.write_text(json.dumps(entries, ensure_ascii=False, indent=2), encoding='utf-8')
|
||||
def _write_session_index(updates=None):
|
||||
"""Update the session index file.
|
||||
|
||||
When *updates* is provided (a list of Session objects whose compact
|
||||
entries should be refreshed), this does a targeted in-place update of
|
||||
the existing index — O(1) for single-session changes. When *updates*
|
||||
is None, a full rebuild is performed (used on startup / first call).
|
||||
"""
|
||||
# Lazy full-rebuild path — used when index doesn't exist yet.
|
||||
if updates is None or not SESSION_INDEX_FILE.exists():
|
||||
entries = []
|
||||
for p in SESSION_DIR.glob('*.json'):
|
||||
if p.name.startswith('_'): continue
|
||||
try:
|
||||
s = Session.load(p.stem)
|
||||
if s: entries.append(s.compact())
|
||||
except Exception:
|
||||
logger.debug("Failed to load session from %s", p)
|
||||
with LOCK:
|
||||
for s in SESSIONS.values():
|
||||
if not any(e['session_id'] == s.session_id for e in entries):
|
||||
entries.append(s.compact())
|
||||
entries.sort(key=lambda s: s['updated_at'], reverse=True)
|
||||
_tmp = SESSION_INDEX_FILE.with_suffix('.tmp')
|
||||
_tmp.write_text(json.dumps(entries, ensure_ascii=False, indent=2), encoding='utf-8')
|
||||
os.replace(_tmp, SESSION_INDEX_FILE)
|
||||
return
|
||||
|
||||
# Fast path: patch existing index with updated sessions.
|
||||
# This avoids loading every session file on every single save().
|
||||
# LOCK covers the entire read-patch-write to prevent concurrent save() calls
|
||||
# from both reading the same baseline and one losing its update.
|
||||
_fallback = False
|
||||
try:
|
||||
with LOCK:
|
||||
existing = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
|
||||
# Build lookup of updated entries
|
||||
updated_map = {s.session_id: s.compact() for s in updates}
|
||||
existing_ids = {e.get('session_id') for e in existing}
|
||||
# Add any updated entries not yet in the index
|
||||
for sid, entry in updated_map.items():
|
||||
if sid not in existing_ids:
|
||||
existing.append(entry)
|
||||
# Replace matching entries in-place
|
||||
for i, e in enumerate(existing):
|
||||
sid = e.get('session_id')
|
||||
if sid in updated_map:
|
||||
existing[i] = updated_map[sid]
|
||||
existing.sort(key=lambda s: s.get('updated_at', 0), reverse=True)
|
||||
_tmp = SESSION_INDEX_FILE.with_suffix('.tmp')
|
||||
_tmp.write_text(json.dumps(existing, ensure_ascii=False, indent=2), encoding='utf-8')
|
||||
os.replace(_tmp, SESSION_INDEX_FILE)
|
||||
except Exception:
|
||||
_fallback = True
|
||||
if _fallback:
|
||||
# Corrupt or missing index — fall back to full rebuild (called outside LOCK to avoid deadlock)
|
||||
_write_session_index(updates=None)
|
||||
|
||||
|
||||
class Session:
|
||||
@@ -86,7 +128,7 @@ class Session:
|
||||
json.dumps(self.__dict__, ensure_ascii=False, indent=2),
|
||||
encoding='utf-8',
|
||||
)
|
||||
_write_session_index()
|
||||
_write_session_index(updates=[self])
|
||||
|
||||
@classmethod
|
||||
def load(cls, sid):
|
||||
|
||||
Reference in New Issue
Block a user