perf: TTL cache for model list + incremental session index (#780)

Fixes AWS IMDS timeout on model dropdown. Incremental index writes. Co-authored-by: starship-s <starship-s@users.noreply.github.com>
2026-04-20 17:33:03 -07:00
parent a7e8b1ab83
commit b6d335feaa
6 changed files with 713 additions and 24 deletions
--- a/api/config.py
+++ b/api/config.py
@@ -10,6 +10,7 @@ Discovery order for all paths:
 """

 import collections
+import copy
 import json
 import logging
 import os
@@ -802,6 +803,26 @@ def set_hermes_default_model(model_id: str) -> dict:
    return get_available_models()


+# ── TTL cache for get_available_models() ─────────────────────────────────────
+_available_models_cache: dict | None = None
+_available_models_cache_ts: float = 0.0
+_AVAILABLE_MODELS_CACHE_TTL: float = 60.0  # seconds — refresh at most once per minute
+_available_models_cache_lock = threading.Lock()
+
+
+def invalidate_models_cache():
+    """Force the TTL cache for get_available_models() to be cleared.
+
+    Call this after modifying config.cfg in-memory (e.g. in tests) so
+    the next call to get_available_models() picks up the changes rather
+    than returning a stale cached result.
+    """
+    global _available_models_cache, _available_models_cache_ts
+    with _available_models_cache_lock:
+        _available_models_cache = None
+        _available_models_cache_ts = 0.0
+
+
 def get_available_models() -> dict:
    """
    Return available models grouped by provider.
@@ -821,12 +842,24 @@ def get_available_models() -> dict:
    # Reload config from disk if config.yaml has changed since last load.
    # This ensures CLI model changes are picked up on page refresh without
    # a server restart, while avoiding clearing in-memory mocks during tests. (#585)
-    try:
-        _current_mtime = Path(_get_config_path()).stat().st_mtime
-    except OSError:
-        _current_mtime = 0.0
-    if _current_mtime != _cfg_mtime:
-        reload_config()
+    # Must run BEFORE the TTL check so config edits within the 60s window are visible.
+    global _available_models_cache, _available_models_cache_ts
+    with _available_models_cache_lock:
+        try:
+            _current_mtime = Path(_get_config_path()).stat().st_mtime
+        except OSError:
+            _current_mtime = 0.0
+        # Note: env-var changes (e.g. API key rotation) are not detected by mtime;
+        # cache will be stale for up to TTL seconds in that case.
+        if _current_mtime != _cfg_mtime:
+            reload_config()
+            # Config changed — force cache invalidation
+            _available_models_cache = None
+            _available_models_cache_ts = 0.0
+        # Serve from TTL cache if fresh.
+        now = time.monotonic()
+        if _available_models_cache is not None and (now - _available_models_cache_ts) < _AVAILABLE_MODELS_CACHE_TTL:
+            return copy.deepcopy(_available_models_cache)
    active_provider = None
    default_model = get_effective_default_model(cfg)
    groups = []
@@ -1277,11 +1310,16 @@ def get_available_models() -> dict:
                    }
                )

-    return {
+    result = {
        "active_provider": active_provider,
        "default_model": default_model,
        "groups": groups,
    }
+    # Cache the result for TTL seconds
+    with _available_models_cache_lock:
+        _available_models_cache = result
+        _available_models_cache_ts = time.monotonic()
+    return copy.deepcopy(result)


 # ── Static file path ─────────────────────────────────────────────────────────
--- a/api/models.py
+++ b/api/models.py
@@ -4,6 +4,7 @@ Hermes Web UI -- Session model and in-memory session store.
 import collections
 import json
 import logging
+import os
 import time
 import uuid
 from pathlib import Path
@@ -19,22 +20,63 @@ from api.workspace import get_last_workspace
 logger = logging.getLogger(__name__)


-def _write_session_index():
-    """Rebuild the session index file for O(1) future reads."""
-    entries = []
-    for p in SESSION_DIR.glob('*.json'):
-        if p.name.startswith('_'): continue
-        try:
-            s = Session.load(p.stem)
-            if s: entries.append(s.compact())
-        except Exception:
-            logger.debug("Failed to load session from %s", p)
-    with LOCK:
-        for s in SESSIONS.values():
-            if not any(e['session_id'] == s.session_id for e in entries):
-                entries.append(s.compact())
-    entries.sort(key=lambda s: s['updated_at'], reverse=True)
-    SESSION_INDEX_FILE.write_text(json.dumps(entries, ensure_ascii=False, indent=2), encoding='utf-8')
+def _write_session_index(updates=None):
+    """Update the session index file.
+
+    When *updates* is provided (a list of Session objects whose compact
+    entries should be refreshed), this does a targeted in-place update of
+    the existing index — O(1) for single-session changes.  When *updates*
+    is None, a full rebuild is performed (used on startup / first call).
+    """
+    # Lazy full-rebuild path — used when index doesn't exist yet.
+    if updates is None or not SESSION_INDEX_FILE.exists():
+        entries = []
+        for p in SESSION_DIR.glob('*.json'):
+            if p.name.startswith('_'): continue
+            try:
+                s = Session.load(p.stem)
+                if s: entries.append(s.compact())
+            except Exception:
+                logger.debug("Failed to load session from %s", p)
+        with LOCK:
+            for s in SESSIONS.values():
+                if not any(e['session_id'] == s.session_id for e in entries):
+                    entries.append(s.compact())
+            entries.sort(key=lambda s: s['updated_at'], reverse=True)
+            _tmp = SESSION_INDEX_FILE.with_suffix('.tmp')
+            _tmp.write_text(json.dumps(entries, ensure_ascii=False, indent=2), encoding='utf-8')
+            os.replace(_tmp, SESSION_INDEX_FILE)
+        return
+
+    # Fast path: patch existing index with updated sessions.
+    # This avoids loading every session file on every single save().
+    # LOCK covers the entire read-patch-write to prevent concurrent save() calls
+    # from both reading the same baseline and one losing its update.
+    _fallback = False
+    try:
+        with LOCK:
+            existing = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
+            # Build lookup of updated entries
+            updated_map = {s.session_id: s.compact() for s in updates}
+            existing_ids = {e.get('session_id') for e in existing}
+            # Add any updated entries not yet in the index
+            for sid, entry in updated_map.items():
+                if sid not in existing_ids:
+                    existing.append(entry)
+            # Replace matching entries in-place
+            for i, e in enumerate(existing):
+                sid = e.get('session_id')
+                if sid in updated_map:
+                    existing[i] = updated_map[sid]
+            existing.sort(key=lambda s: s.get('updated_at', 0), reverse=True)
+            _tmp = SESSION_INDEX_FILE.with_suffix('.tmp')
+            _tmp.write_text(json.dumps(existing, ensure_ascii=False, indent=2), encoding='utf-8')
+            os.replace(_tmp, SESSION_INDEX_FILE)
+    except Exception:
+        _fallback = True
+    if _fallback:
+        # Corrupt or missing index — fall back to full rebuild (called outside LOCK to avoid deadlock)
+        _write_session_index(updates=None)


 class Session:
@@ -86,7 +128,7 @@ class Session:
            json.dumps(self.__dict__, ensure_ascii=False, indent=2),
            encoding='utf-8',
        )
-        _write_session_index()
+        _write_session_index(updates=[self])

    @classmethod
    def load(cls, sid):