diff --git a/api/models.py b/api/models.py index ef039e5..c70886d 100644 --- a/api/models.py +++ b/api/models.py @@ -20,6 +20,25 @@ from api.workspace import get_last_workspace logger = logging.getLogger(__name__) +def _index_entry_exists(session_id: str, in_memory_ids=None) -> bool: + """Return True if an index entry still has backing state. + + A session can legitimately exist either as a persisted JSON file or as an + in-memory Session object that has not been flushed yet. This helper is used + to prune stale `_index.json` rows left behind after session-id rotation or + file removal. + """ + if not session_id: + return False + if in_memory_ids is None: + with LOCK: + in_memory_ids = set(SESSIONS.keys()) + if session_id in in_memory_ids: + return True + p = SESSION_DIR / f'{session_id}.json' + return p.exists() + + def _write_session_index(updates=None): """Update the session index file. @@ -56,6 +75,11 @@ def _write_session_index(updates=None): try: with LOCK: existing = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8')) + in_memory_ids = set(SESSIONS.keys()) + existing = [ + e for e in existing + if _index_entry_exists(e.get('session_id'), in_memory_ids=in_memory_ids) + ] # Build lookup of updated entries updated_map = {s.session_id: s.compact() for s in updates} existing_ids = {e.get('session_id') for e in existing} @@ -212,6 +236,10 @@ def all_sessions(): if SESSION_INDEX_FILE.exists(): try: index = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8')) + index = [ + s for s in index + if _index_entry_exists(s.get('session_id')) + ] # Overlay any in-memory sessions that may be newer than the index index_map = {s['session_id']: s for s in index} with LOCK: diff --git a/tests/test_session_index.py b/tests/test_session_index.py index 92846b8..f4702ae 100644 --- a/tests/test_session_index.py +++ b/tests/test_session_index.py @@ -146,6 +146,38 @@ def test_new_session_appended_to_index(): assert "sess_b" in ids +def test_incremental_update_prunes_stale_entries(): + """Ghost rows whose backing JSON file is gone must be dropped on the fast path. + + This covers session-id rotation paths (e.g. compression) where the old id can + linger in `_index.json` after the file has been renamed. + """ + index_file = models.SESSION_INDEX_FILE + + stale = { + "session_id": "ghost_sid", + "title": "Ghost", + "updated_at": 150.0, + "workspace": "/tmp", + "model": "test", + "message_count": 1, + "created_at": 100.0, + "pinned": False, + "archived": False, + } + _write_index_file(index_file, [stale]) + + sA = _make_session("sess_a", "Alpha", updated_at=200.0) + sA.path.write_text(json.dumps(sA.__dict__, ensure_ascii=False, indent=2), encoding="utf-8") + + _write_session_index(updates=[sA]) + + index = _read_index(index_file) + ids = {e["session_id"] for e in index} + assert "sess_a" in ids + assert "ghost_sid" not in ids, "stale entry with no backing file must be pruned" + + # ── 8. test_first_call_full_rebuild ────────────────────────────────────── def test_first_call_full_rebuild(): @@ -348,3 +380,33 @@ def test_deadlock_guard_on_fallback(): # The index should still be valid after fallback index = _read_index(index_file) assert isinstance(index, list) + + +def test_all_sessions_ignores_stale_index_entries(): + """Reading via all_sessions() must not surface ghost rows from _index.json.""" + index_file = models.SESSION_INDEX_FILE + + valid_session = _make_session("sess_a", "Alpha", updated_at=200.0) + valid_session.path.write_text( + json.dumps(valid_session.__dict__, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + + valid = valid_session.compact() + stale = { + "session_id": "ghost_sid", + "title": "Ghost", + "updated_at": 150.0, + "workspace": "/tmp", + "model": "test", + "message_count": 1, + "created_at": 100.0, + "pinned": False, + "archived": False, + } + _write_index_file(index_file, [stale, valid]) + + rows = models.all_sessions() + ids = {e["session_id"] for e in rows} + assert "sess_a" in ids + assert "ghost_sid" not in ids