fix: prune stale session index entries after session-id rotation — v0.50.148 (PR #847 by @franksong2702)
Prune ghost _index.json rows whose backing session file no longer exists, on both incremental index writes and all_sessions() reads. Fixes duplicate session entries after session-id rotation (e.g. context compression). Also pre-snapshots in_memory_ids under a single LOCK acquisition in all_sessions() rather than one per row. Closes #846. Review additions: optimised lock pattern in all_sessions() (one LOCK acquisition instead of N). Tests: 1856 passing.
This commit is contained in:
@@ -20,6 +20,25 @@ from api.workspace import get_last_workspace
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _index_entry_exists(session_id: str, in_memory_ids=None) -> bool:
|
||||
"""Return True if an index entry still has backing state.
|
||||
|
||||
A session can legitimately exist either as a persisted JSON file or as an
|
||||
in-memory Session object that has not been flushed yet. This helper is used
|
||||
to prune stale `_index.json` rows left behind after session-id rotation or
|
||||
file removal.
|
||||
"""
|
||||
if not session_id:
|
||||
return False
|
||||
if in_memory_ids is None:
|
||||
with LOCK:
|
||||
in_memory_ids = set(SESSIONS.keys())
|
||||
if session_id in in_memory_ids:
|
||||
return True
|
||||
p = SESSION_DIR / f'{session_id}.json'
|
||||
return p.exists()
|
||||
|
||||
|
||||
def _write_session_index(updates=None):
|
||||
"""Update the session index file.
|
||||
|
||||
@@ -56,6 +75,11 @@ def _write_session_index(updates=None):
|
||||
try:
|
||||
with LOCK:
|
||||
existing = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
|
||||
in_memory_ids = set(SESSIONS.keys())
|
||||
existing = [
|
||||
e for e in existing
|
||||
if _index_entry_exists(e.get('session_id'), in_memory_ids=in_memory_ids)
|
||||
]
|
||||
# Build lookup of updated entries
|
||||
updated_map = {s.session_id: s.compact() for s in updates}
|
||||
existing_ids = {e.get('session_id') for e in existing}
|
||||
@@ -212,6 +236,10 @@ def all_sessions():
|
||||
if SESSION_INDEX_FILE.exists():
|
||||
try:
|
||||
index = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
|
||||
index = [
|
||||
s for s in index
|
||||
if _index_entry_exists(s.get('session_id'))
|
||||
]
|
||||
# Overlay any in-memory sessions that may be newer than the index
|
||||
index_map = {s['session_id']: s for s in index}
|
||||
with LOCK:
|
||||
|
||||
@@ -146,6 +146,38 @@ def test_new_session_appended_to_index():
|
||||
assert "sess_b" in ids
|
||||
|
||||
|
||||
def test_incremental_update_prunes_stale_entries():
|
||||
"""Ghost rows whose backing JSON file is gone must be dropped on the fast path.
|
||||
|
||||
This covers session-id rotation paths (e.g. compression) where the old id can
|
||||
linger in `_index.json` after the file has been renamed.
|
||||
"""
|
||||
index_file = models.SESSION_INDEX_FILE
|
||||
|
||||
stale = {
|
||||
"session_id": "ghost_sid",
|
||||
"title": "Ghost",
|
||||
"updated_at": 150.0,
|
||||
"workspace": "/tmp",
|
||||
"model": "test",
|
||||
"message_count": 1,
|
||||
"created_at": 100.0,
|
||||
"pinned": False,
|
||||
"archived": False,
|
||||
}
|
||||
_write_index_file(index_file, [stale])
|
||||
|
||||
sA = _make_session("sess_a", "Alpha", updated_at=200.0)
|
||||
sA.path.write_text(json.dumps(sA.__dict__, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
_write_session_index(updates=[sA])
|
||||
|
||||
index = _read_index(index_file)
|
||||
ids = {e["session_id"] for e in index}
|
||||
assert "sess_a" in ids
|
||||
assert "ghost_sid" not in ids, "stale entry with no backing file must be pruned"
|
||||
|
||||
|
||||
# ── 8. test_first_call_full_rebuild ──────────────────────────────────────
|
||||
|
||||
def test_first_call_full_rebuild():
|
||||
@@ -348,3 +380,33 @@ def test_deadlock_guard_on_fallback():
|
||||
# The index should still be valid after fallback
|
||||
index = _read_index(index_file)
|
||||
assert isinstance(index, list)
|
||||
|
||||
|
||||
def test_all_sessions_ignores_stale_index_entries():
|
||||
"""Reading via all_sessions() must not surface ghost rows from _index.json."""
|
||||
index_file = models.SESSION_INDEX_FILE
|
||||
|
||||
valid_session = _make_session("sess_a", "Alpha", updated_at=200.0)
|
||||
valid_session.path.write_text(
|
||||
json.dumps(valid_session.__dict__, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
valid = valid_session.compact()
|
||||
stale = {
|
||||
"session_id": "ghost_sid",
|
||||
"title": "Ghost",
|
||||
"updated_at": 150.0,
|
||||
"workspace": "/tmp",
|
||||
"model": "test",
|
||||
"message_count": 1,
|
||||
"created_at": 100.0,
|
||||
"pinned": False,
|
||||
"archived": False,
|
||||
}
|
||||
_write_index_file(index_file, [stale, valid])
|
||||
|
||||
rows = models.all_sessions()
|
||||
ids = {e["session_id"] for e in rows}
|
||||
assert "sess_a" in ids
|
||||
assert "ghost_sid" not in ids
|
||||
|
||||
Reference in New Issue
Block a user