fix: prune stale session index entries after session-id rotation — v0.50.148 (PR #847 by @franksong2702)

Prune ghost _index.json rows whose backing session file no longer exists, on both incremental index writes and all_sessions() reads. Fixes duplicate session entries after session-id rotation (e.g. context compression). Also pre-snapshots in_memory_ids under a single LOCK acquisition in all_sessions() rather than one per row.

Closes #846.

Review additions: optimised lock pattern in all_sessions() (one LOCK acquisition instead of N). Tests: 1856 passing.
This commit is contained in:
Frank Song
2026-04-23 00:26:38 +08:00
committed by GitHub
parent 85434dd03c
commit 13dbd818c9
2 changed files with 90 additions and 0 deletions

View File

@@ -20,6 +20,25 @@ from api.workspace import get_last_workspace
logger = logging.getLogger(__name__)
def _index_entry_exists(session_id: str, in_memory_ids=None) -> bool:
"""Return True if an index entry still has backing state.
A session can legitimately exist either as a persisted JSON file or as an
in-memory Session object that has not been flushed yet. This helper is used
to prune stale `_index.json` rows left behind after session-id rotation or
file removal.
"""
if not session_id:
return False
if in_memory_ids is None:
with LOCK:
in_memory_ids = set(SESSIONS.keys())
if session_id in in_memory_ids:
return True
p = SESSION_DIR / f'{session_id}.json'
return p.exists()
def _write_session_index(updates=None):
"""Update the session index file.
@@ -56,6 +75,11 @@ def _write_session_index(updates=None):
try:
with LOCK:
existing = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
in_memory_ids = set(SESSIONS.keys())
existing = [
e for e in existing
if _index_entry_exists(e.get('session_id'), in_memory_ids=in_memory_ids)
]
# Build lookup of updated entries
updated_map = {s.session_id: s.compact() for s in updates}
existing_ids = {e.get('session_id') for e in existing}
@@ -212,6 +236,10 @@ def all_sessions():
if SESSION_INDEX_FILE.exists():
try:
index = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
index = [
s for s in index
if _index_entry_exists(s.get('session_id'))
]
# Overlay any in-memory sessions that may be newer than the index
index_map = {s['session_id']: s for s in index}
with LOCK:

View File

@@ -146,6 +146,38 @@ def test_new_session_appended_to_index():
assert "sess_b" in ids
def test_incremental_update_prunes_stale_entries():
"""Ghost rows whose backing JSON file is gone must be dropped on the fast path.
This covers session-id rotation paths (e.g. compression) where the old id can
linger in `_index.json` after the file has been renamed.
"""
index_file = models.SESSION_INDEX_FILE
stale = {
"session_id": "ghost_sid",
"title": "Ghost",
"updated_at": 150.0,
"workspace": "/tmp",
"model": "test",
"message_count": 1,
"created_at": 100.0,
"pinned": False,
"archived": False,
}
_write_index_file(index_file, [stale])
sA = _make_session("sess_a", "Alpha", updated_at=200.0)
sA.path.write_text(json.dumps(sA.__dict__, ensure_ascii=False, indent=2), encoding="utf-8")
_write_session_index(updates=[sA])
index = _read_index(index_file)
ids = {e["session_id"] for e in index}
assert "sess_a" in ids
assert "ghost_sid" not in ids, "stale entry with no backing file must be pruned"
# ── 8. test_first_call_full_rebuild ──────────────────────────────────────
def test_first_call_full_rebuild():
@@ -348,3 +380,33 @@ def test_deadlock_guard_on_fallback():
# The index should still be valid after fallback
index = _read_index(index_file)
assert isinstance(index, list)
def test_all_sessions_ignores_stale_index_entries():
"""Reading via all_sessions() must not surface ghost rows from _index.json."""
index_file = models.SESSION_INDEX_FILE
valid_session = _make_session("sess_a", "Alpha", updated_at=200.0)
valid_session.path.write_text(
json.dumps(valid_session.__dict__, ensure_ascii=False, indent=2),
encoding="utf-8",
)
valid = valid_session.compact()
stale = {
"session_id": "ghost_sid",
"title": "Ghost",
"updated_at": 150.0,
"workspace": "/tmp",
"model": "test",
"message_count": 1,
"created_at": 100.0,
"pinned": False,
"archived": False,
}
_write_index_file(index_file, [stale, valid])
rows = models.all_sessions()
ids = {e["session_id"] for e in rows}
assert "sess_a" in ids
assert "ghost_sid" not in ids