perf: TTL cache for model list + incremental session index (#780)

Fixes AWS IMDS timeout on model dropdown. Incremental index writes. Co-authored-by: starship-s <starship-s@users.noreply.github.com>
2026-04-20 17:33:03 -07:00
parent a7e8b1ab83
commit b6d335feaa
6 changed files with 713 additions and 24 deletions
--- a/tests/test_ttl_cache.py
+++ b/tests/test_ttl_cache.py
@@ -0,0 +1,226 @@
+"""
+Tests for the TTL cache in api/config.py — get_available_models().
+
+Validates:
+  - Cache hit within TTL window
+  - TTL expiry triggers re-scan
+  - Config mtime change invalidates cache before TTL check
+  - copy.deepcopy() isolation (mutating returned dict doesn't pollute cache)
+  - invalidate_models_cache() direct invalidation
+"""
+import time
+from unittest.mock import patch
+
+import api.config as config
+
+
+def _reset_cache():
+    """Reset TTL cache globals to a clean state."""
+    config._available_models_cache = None
+    config._available_models_cache_ts = 0.0
+
+
+# ── 1. test_cache_hit_within_ttl ──────────────────────────────────────────
+
+def test_cache_hit_within_ttl():
+    """Call get_available_models() twice within the TTL window.
+    The second call should return cached data without re-scanning providers.
+    We verify this by patching reload_config (called when cache is cold)
+    and asserting it is only invoked once.
+    """
+    _reset_cache()
+    original_reload = config.reload_config
+
+    call_count = 0
+
+    def _counting_reload():
+        nonlocal call_count
+        call_count += 1
+        return original_reload()
+
+    with patch.object(config, "reload_config", wraps=original_reload, side_effect=_counting_reload):
+        saved_mtime = config._cfg_mtime
+        try:
+            # Force mtime mismatch so the first call triggers reload_config + cache fill
+            config._cfg_mtime = 0.0
+            result1 = config.get_available_models()
+            first_call_count = call_count
+
+            # Sync _cfg_mtime to the actual file so the second call doesn't
+            # re-trigger reload_config via mtime mismatch — we want it to hit the TTL cache.
+            try:
+                config._cfg_mtime = config.Path(config._get_config_path()).stat().st_mtime
+            except OSError:
+                config._cfg_mtime = 0.0
+
+            result2 = config.get_available_models()
+
+            # Both results should have the same structure
+            assert "groups" in result1
+            assert "groups" in result2
+
+            # reload_config should not have been called again for the second invocation
+            # (the TTL cache served it)
+            assert call_count == first_call_count, (
+                f"Expected no extra reload_config calls, but got "
+                f"{call_count - first_call_count} extra"
+            )
+        finally:
+            config._cfg_mtime = saved_mtime
+    _reset_cache()
+
+
+# ── 2. test_ttl_expiry ───────────────────────────────────────────────────
+
+def test_ttl_expiry():
+    """Populate the cache, then advance time.monotonic() past 60s.
+    The next call should re-scan (not serve from cache).
+    """
+    _reset_cache()
+
+    # Ensure _cfg_mtime matches file so mtime check doesn't invalidate
+    try:
+        config._cfg_mtime = config.Path(config._get_config_path()).stat().st_mtime
+    except OSError:
+        config._cfg_mtime = 0.0
+
+    # First call populates cache
+    result1 = config.get_available_models()
+    assert config._available_models_cache is not None, "Cache should be populated"
+
+    # Record the cache timestamp
+    cache_ts = config._available_models_cache_ts
+
+    # Advance time.monotonic() by more than the TTL
+    original_monotonic = time.monotonic
+    offset = config._AVAILABLE_MODELS_CACHE_TTL + 10.0  # 70s past the real monotonic
+
+    with patch.object(time, "monotonic", side_effect=lambda: original_monotonic() + offset):
+        result2 = config.get_available_models()
+
+    # The cache should have been refreshed — the timestamp must be newer
+    assert config._available_models_cache_ts > cache_ts, (
+        "Cache should have been refreshed after TTL expiry"
+    )
+
+    _reset_cache()
+
+
+# ── 3. test_mtime_invalidation ───────────────────────────────────────────
+
+def test_mtime_invalidation():
+    """Populate the cache, then change _cfg_mtime to simulate a config file
+    change on disk. The next call should invalidate the cache and re-scan.
+    """
+    _reset_cache()
+
+    # Ensure _cfg_mtime matches file so first call doesn't re-scan due to mtime
+    try:
+        real_mtime = config.Path(config._get_config_path()).stat().st_mtime
+    except OSError:
+        real_mtime = 0.0
+    config._cfg_mtime = real_mtime
+
+    # First call populates cache
+    result1 = config.get_available_models()
+    assert config._available_models_cache is not None
+
+    # Simulate config.yaml changed on disk by setting _cfg_mtime to 0
+    # (which won't match the actual file mtime)
+    config._cfg_mtime = 0.0
+
+    # The next call should detect mtime mismatch, reload, and invalidate cache
+    old_cache = config._available_models_cache
+    old_ts = config._available_models_cache_ts
+
+    result2 = config.get_available_models()
+
+    # Cache must have been refreshed — timestamp advanced since we reset it
+    # to 0.0 on invalidation.
+    assert config._available_models_cache_ts > 0.0, (
+        "Cache timestamp should be updated after invalidation + rebuild"
+    )
+
+    # Restore
+    config._cfg_mtime = real_mtime
+    _reset_cache()
+
+
+# ── 4. test_deepcopy_isolation ────────────────────────────────────────────
+
+def test_deepcopy_isolation():
+    """Mutating the returned dict from get_available_models() must not
+    affect the cache or subsequent return values.
+    """
+    _reset_cache()
+
+    # Ensure _cfg_mtime matches file so mtime check doesn't invalidate
+    try:
+        config._cfg_mtime = config.Path(config._get_config_path()).stat().st_mtime
+    except OSError:
+        config._cfg_mtime = 0.0
+
+    # First call populates cache
+    result1 = config.get_available_models()
+
+    # Mutate the returned dict
+    if result1["groups"]:
+        result1["groups"][0]["models"].clear()
+    result1["groups"].append({"provider": "FAKE", "models": [{"id": "fake-model"}]})
+    result1["active_provider"] = "HACKED"
+
+    # Second call should return an unmutated copy
+    result2 = config.get_available_models()
+
+    # The mutated keys must not appear in the second result
+    assert result2["active_provider"] != "HACKED", "Mutation leaked into cache"
+    assert not any(
+        g.get("provider") == "FAKE" for g in result2["groups"]
+    ), "Fake provider leaked into cache"
+
+    # If there were groups originally, the first group's models should not be empty
+    # (unless it genuinely had no models, which is unlikely)
+    if result1["groups"] and result2["groups"]:
+        # result1["groups"][0]["models"] was cleared, but result2 should be intact
+        assert len(result2["groups"][0].get("models", [])) > 0, (
+            "Mutation of result1 cleared models in result2 — deepcopy failed"
+        )
+
+    _reset_cache()
+
+
+# ── 5. test_invalidate_models_cache_direct ───────────────────────────────
+
+def test_invalidate_models_cache_direct():
+    """Call invalidate_models_cache() after populating the cache.
+    _AVAILABLE_MODELS_CACHE should be None and the next call should re-scan.
+    """
+    _reset_cache()
+
+    # Ensure _cfg_mtime matches file so mtime check doesn't invalidate
+    try:
+        config._cfg_mtime = config.Path(config._get_config_path()).stat().st_mtime
+    except OSError:
+        config._cfg_mtime = 0.0
+
+    # First call populates cache
+    result1 = config.get_available_models()
+    assert config._available_models_cache is not None, "Cache should be populated"
+    first_ts = config._available_models_cache_ts
+
+    # Directly invalidate
+    config.invalidate_models_cache()
+
+    # Cache must be cleared
+    assert config._available_models_cache is None, (
+        "invalidate_models_cache() should set _AVAILABLE_MODELS_CACHE to None"
+    )
+
+    # Next call should re-scan and produce a fresh cache
+    result2 = config.get_available_models()
+    assert config._available_models_cache is not None, "Cache should be re-populated"
+    assert config._available_models_cache_ts >= first_ts, (
+        "Cache timestamp should be updated after re-scan"
+    )
+
+    _reset_cache()