fix: prune stale session index entries after session-id rotation — v0.50.148 (PR #847 by @franksong2702)
Prune ghost _index.json rows whose backing session file no longer exists, on both incremental index writes and all_sessions() reads. Fixes duplicate session entries after session-id rotation (e.g. context compression). Also pre-snapshots in_memory_ids under a single LOCK acquisition in all_sessions() rather than one per row. Closes #846. Review additions: optimised lock pattern in all_sessions() (one LOCK acquisition instead of N). Tests: 1856 passing.
This commit is contained in:
@@ -20,6 +20,25 @@ from api.workspace import get_last_workspace
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _index_entry_exists(session_id: str, in_memory_ids=None) -> bool:
|
||||||
|
"""Return True if an index entry still has backing state.
|
||||||
|
|
||||||
|
A session can legitimately exist either as a persisted JSON file or as an
|
||||||
|
in-memory Session object that has not been flushed yet. This helper is used
|
||||||
|
to prune stale `_index.json` rows left behind after session-id rotation or
|
||||||
|
file removal.
|
||||||
|
"""
|
||||||
|
if not session_id:
|
||||||
|
return False
|
||||||
|
if in_memory_ids is None:
|
||||||
|
with LOCK:
|
||||||
|
in_memory_ids = set(SESSIONS.keys())
|
||||||
|
if session_id in in_memory_ids:
|
||||||
|
return True
|
||||||
|
p = SESSION_DIR / f'{session_id}.json'
|
||||||
|
return p.exists()
|
||||||
|
|
||||||
|
|
||||||
def _write_session_index(updates=None):
|
def _write_session_index(updates=None):
|
||||||
"""Update the session index file.
|
"""Update the session index file.
|
||||||
|
|
||||||
@@ -56,6 +75,11 @@ def _write_session_index(updates=None):
|
|||||||
try:
|
try:
|
||||||
with LOCK:
|
with LOCK:
|
||||||
existing = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
|
existing = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
|
||||||
|
in_memory_ids = set(SESSIONS.keys())
|
||||||
|
existing = [
|
||||||
|
e for e in existing
|
||||||
|
if _index_entry_exists(e.get('session_id'), in_memory_ids=in_memory_ids)
|
||||||
|
]
|
||||||
# Build lookup of updated entries
|
# Build lookup of updated entries
|
||||||
updated_map = {s.session_id: s.compact() for s in updates}
|
updated_map = {s.session_id: s.compact() for s in updates}
|
||||||
existing_ids = {e.get('session_id') for e in existing}
|
existing_ids = {e.get('session_id') for e in existing}
|
||||||
@@ -212,6 +236,10 @@ def all_sessions():
|
|||||||
if SESSION_INDEX_FILE.exists():
|
if SESSION_INDEX_FILE.exists():
|
||||||
try:
|
try:
|
||||||
index = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
|
index = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
|
||||||
|
index = [
|
||||||
|
s for s in index
|
||||||
|
if _index_entry_exists(s.get('session_id'))
|
||||||
|
]
|
||||||
# Overlay any in-memory sessions that may be newer than the index
|
# Overlay any in-memory sessions that may be newer than the index
|
||||||
index_map = {s['session_id']: s for s in index}
|
index_map = {s['session_id']: s for s in index}
|
||||||
with LOCK:
|
with LOCK:
|
||||||
|
|||||||
@@ -146,6 +146,38 @@ def test_new_session_appended_to_index():
|
|||||||
assert "sess_b" in ids
|
assert "sess_b" in ids
|
||||||
|
|
||||||
|
|
||||||
|
def test_incremental_update_prunes_stale_entries():
|
||||||
|
"""Ghost rows whose backing JSON file is gone must be dropped on the fast path.
|
||||||
|
|
||||||
|
This covers session-id rotation paths (e.g. compression) where the old id can
|
||||||
|
linger in `_index.json` after the file has been renamed.
|
||||||
|
"""
|
||||||
|
index_file = models.SESSION_INDEX_FILE
|
||||||
|
|
||||||
|
stale = {
|
||||||
|
"session_id": "ghost_sid",
|
||||||
|
"title": "Ghost",
|
||||||
|
"updated_at": 150.0,
|
||||||
|
"workspace": "/tmp",
|
||||||
|
"model": "test",
|
||||||
|
"message_count": 1,
|
||||||
|
"created_at": 100.0,
|
||||||
|
"pinned": False,
|
||||||
|
"archived": False,
|
||||||
|
}
|
||||||
|
_write_index_file(index_file, [stale])
|
||||||
|
|
||||||
|
sA = _make_session("sess_a", "Alpha", updated_at=200.0)
|
||||||
|
sA.path.write_text(json.dumps(sA.__dict__, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||||
|
|
||||||
|
_write_session_index(updates=[sA])
|
||||||
|
|
||||||
|
index = _read_index(index_file)
|
||||||
|
ids = {e["session_id"] for e in index}
|
||||||
|
assert "sess_a" in ids
|
||||||
|
assert "ghost_sid" not in ids, "stale entry with no backing file must be pruned"
|
||||||
|
|
||||||
|
|
||||||
# ── 8. test_first_call_full_rebuild ──────────────────────────────────────
|
# ── 8. test_first_call_full_rebuild ──────────────────────────────────────
|
||||||
|
|
||||||
def test_first_call_full_rebuild():
|
def test_first_call_full_rebuild():
|
||||||
@@ -348,3 +380,33 @@ def test_deadlock_guard_on_fallback():
|
|||||||
# The index should still be valid after fallback
|
# The index should still be valid after fallback
|
||||||
index = _read_index(index_file)
|
index = _read_index(index_file)
|
||||||
assert isinstance(index, list)
|
assert isinstance(index, list)
|
||||||
|
|
||||||
|
|
||||||
|
def test_all_sessions_ignores_stale_index_entries():
|
||||||
|
"""Reading via all_sessions() must not surface ghost rows from _index.json."""
|
||||||
|
index_file = models.SESSION_INDEX_FILE
|
||||||
|
|
||||||
|
valid_session = _make_session("sess_a", "Alpha", updated_at=200.0)
|
||||||
|
valid_session.path.write_text(
|
||||||
|
json.dumps(valid_session.__dict__, ensure_ascii=False, indent=2),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
valid = valid_session.compact()
|
||||||
|
stale = {
|
||||||
|
"session_id": "ghost_sid",
|
||||||
|
"title": "Ghost",
|
||||||
|
"updated_at": 150.0,
|
||||||
|
"workspace": "/tmp",
|
||||||
|
"model": "test",
|
||||||
|
"message_count": 1,
|
||||||
|
"created_at": 100.0,
|
||||||
|
"pinned": False,
|
||||||
|
"archived": False,
|
||||||
|
}
|
||||||
|
_write_index_file(index_file, [stale, valid])
|
||||||
|
|
||||||
|
rows = models.all_sessions()
|
||||||
|
ids = {e["session_id"] for e in rows}
|
||||||
|
assert "sess_a" in ids
|
||||||
|
assert "ghost_sid" not in ids
|
||||||
|
|||||||
Reference in New Issue
Block a user