fix: harden session persistence and per-session lock handling during streaming (v0.50.175, #910) (#910)
Co-authored-by: starship-s Co-authored-by: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
This commit is contained in:
@@ -29,6 +29,11 @@
|
||||
workspace subtree) and never enumerate blocked system roots. (`api/routes.py`,
|
||||
`api/workspace.py`, `static/panels.js`, `static/style.css`) (partial for #616)
|
||||
|
||||
## [v0.50.175] — 2026-04-23
|
||||
|
||||
### Fixed
|
||||
- **Session persistence hardened against concurrent write races** — all session-mutation paths (streaming success/error/cancel, periodic checkpoint, HTTP endpoints for title/personality/workspace/clear/pin/archive/project) now hold a per-session `_agent_lock` during in-memory mutation and `Session.save()`. The checkpoint thread is stopped and joined before the final save, preventing stale object clobbers. `Session.save()` uses fsync + atomic rename with a pid+thread_id tmp suffix. `_write_session_index()` gets a dedicated `_INDEX_WRITE_LOCK` so disk I/O runs outside the global `LOCK`, reducing head-of-line blocking. Context compression now runs the LLM call outside the lock with a stale-edit check (409) on write-back. (`api/streaming.py`, `api/models.py`, `api/routes.py`, `api/session_ops.py`, `api/config.py`) Closes #765. Co-authored by @starship-s.
|
||||
|
||||
## [v0.50.174] — 2026-04-23
|
||||
|
||||
### Fixed
|
||||
|
||||
@@ -1683,6 +1683,25 @@ SESSION_AGENT_LOCKS_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _get_session_agent_lock(session_id: str) -> threading.Lock:
|
||||
"""Return the per-session Lock used to serialize all Session mutations.
|
||||
|
||||
Lock lifecycle invariant:
|
||||
- A Lock is created lazily on first access and lives in SESSION_AGENT_LOCKS
|
||||
for the lifetime of the session.
|
||||
- The entry is pruned in /api/session/delete (under SESSION_AGENT_LOCKS_LOCK)
|
||||
so deleted sessions don't leak a Lock forever.
|
||||
- During context compression the agent may rotate session_id. The
|
||||
streaming thread migrates the lock entry atomically under
|
||||
SESSION_AGENT_LOCKS_LOCK: it aliases the new session_id to the *same*
|
||||
Lock object and pops the old-id entry (see streaming.py compression
|
||||
block). This ensures that subsequent callers using the new ID still
|
||||
acquire the same Lock, while the old-id entry is removed to prevent a
|
||||
leak. The streaming thread already holds the Lock during this
|
||||
migration, so the reference stays alive even after the dict entry is
|
||||
removed.
|
||||
- Lock contract: hold for the in-memory mutation + s.save() only; never
|
||||
across network I/O (LLM calls, HTTP requests).
|
||||
"""
|
||||
with SESSION_AGENT_LOCKS_LOCK:
|
||||
if session_id not in SESSION_AGENT_LOCKS:
|
||||
SESSION_AGENT_LOCKS[session_id] = threading.Lock()
|
||||
|
||||
208
api/models.py
208
api/models.py
@@ -1,10 +1,9 @@
|
||||
"""
|
||||
Hermes Web UI -- Session model and in-memory session store.
|
||||
"""
|
||||
"""Hermes Web UI -- Session model and in-memory session store."""
|
||||
import collections
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
@@ -19,6 +18,46 @@ from api.workspace import get_last_workspace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stale temp-file cleanup
|
||||
# ---------------------------------------------------------------------------
|
||||
# Both Session.save() and _write_session_index() use the atomic-write pattern:
|
||||
# write to <path>.tmp.<pid>.<tid> → os.replace() to final path
|
||||
# If the process crashes between write and replace the .tmp file is left
|
||||
# behind. Because the name embeds pid + tid, leftover files can never be
|
||||
# reused by a different process/thread, so they are safe to remove on the
|
||||
# next startup. _cleanup_stale_tmp_files() is called from the full-rebuild
|
||||
# path of _write_session_index (i.e. at first index access / startup) and
|
||||
# removes any *.tmp.* file whose mtime is older than one hour.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_STALE_TMP_AGE_SECONDS = 3600 # 1 hour
|
||||
|
||||
# Serializes index writers so concurrent Session.save() calls cannot race on
|
||||
# stale baselines while still allowing LOCK to be released before disk I/O.
|
||||
_INDEX_WRITE_LOCK = threading.RLock()
|
||||
|
||||
|
||||
def _cleanup_stale_tmp_files() -> None:
|
||||
"""Best-effort removal of stale ``*.tmp.*`` files from SESSION_DIR.
|
||||
|
||||
Only files whose mtime is older than ``_STALE_TMP_AGE_SECONDS`` are
|
||||
removed so that in-flight writes from a long-running sibling process
|
||||
are not disturbed. Errors are logged and swallowed — this must never
|
||||
prevent startup.
|
||||
"""
|
||||
cutoff = time.time() - _STALE_TMP_AGE_SECONDS
|
||||
try:
|
||||
for p in SESSION_DIR.glob('*.tmp.*'):
|
||||
try:
|
||||
if p.stat().st_mtime < cutoff:
|
||||
p.unlink(missing_ok=True)
|
||||
logger.debug("Cleaned up stale tmp file: %s", p.name)
|
||||
except OSError:
|
||||
pass # best-effort
|
||||
except Exception:
|
||||
pass # SESSION_DIR may not exist yet; that's fine
|
||||
|
||||
|
||||
def _index_entry_exists(session_id: str, in_memory_ids=None) -> bool:
|
||||
"""Return True if an index entry still has backing state.
|
||||
@@ -46,58 +85,101 @@ def _write_session_index(updates=None):
|
||||
entries should be refreshed), this does a targeted in-place update of
|
||||
the existing index — O(1) for single-session changes. When *updates*
|
||||
is None, a full rebuild is performed (used on startup / first call).
|
||||
"""
|
||||
# Lazy full-rebuild path — used when index doesn't exist yet.
|
||||
if updates is None or not SESSION_INDEX_FILE.exists():
|
||||
entries = []
|
||||
for p in SESSION_DIR.glob('*.json'):
|
||||
if p.name.startswith('_'): continue
|
||||
try:
|
||||
s = Session.load(p.stem)
|
||||
if s: entries.append(s.compact())
|
||||
except Exception:
|
||||
logger.debug("Failed to load session from %s", p)
|
||||
with LOCK:
|
||||
for s in SESSIONS.values():
|
||||
if not any(e['session_id'] == s.session_id for e in entries):
|
||||
entries.append(s.compact())
|
||||
entries.sort(key=lambda s: s['updated_at'], reverse=True)
|
||||
_tmp = SESSION_INDEX_FILE.with_suffix('.tmp')
|
||||
_tmp.write_text(json.dumps(entries, ensure_ascii=False, indent=2), encoding='utf-8')
|
||||
os.replace(_tmp, SESSION_INDEX_FILE)
|
||||
return
|
||||
|
||||
# Fast path: patch existing index with updated sessions.
|
||||
# This avoids loading every session file on every single save().
|
||||
# LOCK covers the entire read-patch-write to prevent concurrent save() calls
|
||||
# from both reading the same baseline and one losing its update.
|
||||
_fallback = False
|
||||
try:
|
||||
with LOCK:
|
||||
existing = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
|
||||
in_memory_ids = set(SESSIONS.keys())
|
||||
existing = [
|
||||
e for e in existing
|
||||
if _index_entry_exists(e.get('session_id'), in_memory_ids=in_memory_ids)
|
||||
]
|
||||
# Build lookup of updated entries
|
||||
updated_map = {s.session_id: s.compact() for s in updates}
|
||||
existing_ids = {e.get('session_id') for e in existing}
|
||||
# Add any updated entries not yet in the index
|
||||
for sid, entry in updated_map.items():
|
||||
if sid not in existing_ids:
|
||||
existing.append(entry)
|
||||
# Replace matching entries in-place
|
||||
for i, e in enumerate(existing):
|
||||
sid = e.get('session_id')
|
||||
if sid in updated_map:
|
||||
existing[i] = updated_map[sid]
|
||||
existing.sort(key=lambda s: s.get('updated_at', 0), reverse=True)
|
||||
_tmp = SESSION_INDEX_FILE.with_suffix('.tmp')
|
||||
_tmp.write_text(json.dumps(existing, ensure_ascii=False, indent=2), encoding='utf-8')
|
||||
os.replace(_tmp, SESSION_INDEX_FILE)
|
||||
except Exception:
|
||||
_fallback = True
|
||||
LOCK protects in-memory state snapshots and payload construction only;
|
||||
disk I/O (write/flush/fsync/replace) always runs outside LOCK.
|
||||
"""
|
||||
_tmp = SESSION_INDEX_FILE.with_suffix(f'.tmp.{os.getpid()}.{threading.current_thread().ident}')
|
||||
|
||||
with _INDEX_WRITE_LOCK:
|
||||
# Lazy full-rebuild path — used when index doesn't exist yet.
|
||||
if updates is None or not SESSION_INDEX_FILE.exists():
|
||||
_cleanup_stale_tmp_files() # best-effort sweep on startup / first call
|
||||
entries = []
|
||||
for p in SESSION_DIR.glob('*.json'):
|
||||
if p.name.startswith('_'):
|
||||
continue
|
||||
try:
|
||||
s = Session.load(p.stem)
|
||||
if s:
|
||||
entries.append(s.compact())
|
||||
except Exception:
|
||||
logger.debug("Failed to load session from %s", p)
|
||||
|
||||
with LOCK:
|
||||
existing_ids = {e.get('session_id') for e in entries}
|
||||
for s in SESSIONS.values():
|
||||
if s.session_id not in existing_ids:
|
||||
entries.append(s.compact())
|
||||
entries.sort(key=lambda s: s.get('updated_at', 0), reverse=True)
|
||||
_payload = json.dumps(entries, ensure_ascii=False, indent=2)
|
||||
|
||||
try:
|
||||
with open(_tmp, 'w', encoding='utf-8') as f:
|
||||
f.write(_payload)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(_tmp, SESSION_INDEX_FILE)
|
||||
except Exception:
|
||||
# Best-effort cleanup of stale tmp on failure
|
||||
try:
|
||||
_tmp.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
return
|
||||
|
||||
# Fast path: patch existing index with updated sessions.
|
||||
# This avoids loading every session file on every single save().
|
||||
_fallback = False
|
||||
try:
|
||||
with LOCK:
|
||||
existing = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8'))
|
||||
in_memory_ids = set(SESSIONS.keys())
|
||||
|
||||
# Avoid N filesystem exists() checks under LOCK by collecting
|
||||
# on-disk IDs once.
|
||||
on_disk_ids = {
|
||||
p.stem
|
||||
for p in SESSION_DIR.glob('*.json')
|
||||
if not p.name.startswith('_')
|
||||
}
|
||||
|
||||
existing = [
|
||||
e for e in existing
|
||||
if (e.get('session_id') in in_memory_ids or e.get('session_id') in on_disk_ids)
|
||||
]
|
||||
|
||||
# Build lookup of updated entries
|
||||
updated_map = {s.session_id: s.compact() for s in updates}
|
||||
existing_ids = {e.get('session_id') for e in existing}
|
||||
# Add any updated entries not yet in the index
|
||||
for sid, entry in updated_map.items():
|
||||
if sid not in existing_ids:
|
||||
existing.append(entry)
|
||||
# Replace matching entries in-place
|
||||
for i, e in enumerate(existing):
|
||||
sid = e.get('session_id')
|
||||
if sid in updated_map:
|
||||
existing[i] = updated_map[sid]
|
||||
existing.sort(key=lambda s: s.get('updated_at', 0), reverse=True)
|
||||
_payload = json.dumps(existing, ensure_ascii=False, indent=2)
|
||||
|
||||
try:
|
||||
with open(_tmp, 'w', encoding='utf-8') as f:
|
||||
f.write(_payload)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(_tmp, SESSION_INDEX_FILE)
|
||||
except Exception:
|
||||
try:
|
||||
_tmp.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
except Exception:
|
||||
_fallback = True
|
||||
|
||||
if _fallback:
|
||||
# Corrupt or missing index — fall back to full rebuild (called outside LOCK to avoid deadlock)
|
||||
_write_session_index(updates=None)
|
||||
@@ -157,10 +239,20 @@ class Session:
|
||||
def save(self, touch_updated_at: bool = True, skip_index: bool = False) -> None:
|
||||
if touch_updated_at:
|
||||
self.updated_at = time.time()
|
||||
self.path.write_text(
|
||||
json.dumps(self.__dict__, ensure_ascii=False, indent=2),
|
||||
encoding='utf-8',
|
||||
)
|
||||
payload = json.dumps(self.__dict__, ensure_ascii=False, indent=2)
|
||||
tmp = self.path.with_suffix(f'.tmp.{os.getpid()}.{threading.current_thread().ident}')
|
||||
try:
|
||||
with open(tmp, 'w', encoding='utf-8') as f:
|
||||
f.write(payload)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp, self.path)
|
||||
except Exception:
|
||||
try:
|
||||
tmp.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
if not skip_index:
|
||||
_write_session_index(updates=[self])
|
||||
|
||||
|
||||
153
api/routes.py
153
api/routes.py
@@ -66,6 +66,9 @@ from api.config import (
|
||||
MAX_FILE_BYTES,
|
||||
MAX_UPLOAD_BYTES,
|
||||
CHAT_LOCK,
|
||||
_get_session_agent_lock,
|
||||
SESSION_AGENT_LOCKS,
|
||||
SESSION_AGENT_LOCKS_LOCK,
|
||||
load_settings,
|
||||
save_settings,
|
||||
set_hermes_default_model,
|
||||
@@ -1049,8 +1052,9 @@ def handle_post(handler, parsed) -> bool:
|
||||
s = get_session(body["session_id"])
|
||||
except KeyError:
|
||||
return bad(handler, "Session not found", 404)
|
||||
s.title = str(body["title"]).strip()[:80] or "Untitled"
|
||||
s.save()
|
||||
with _get_session_agent_lock(body["session_id"]):
|
||||
s.title = str(body["title"]).strip()[:80] or "Untitled"
|
||||
s.save()
|
||||
return j(handler, {"session": s.compact()})
|
||||
|
||||
if parsed.path == "/api/personality/set":
|
||||
@@ -1093,8 +1097,9 @@ def handle_post(handler, parsed) -> bool:
|
||||
prompt = "\n".join(p for p in parts if p)
|
||||
else:
|
||||
prompt = str(value)
|
||||
s.personality = name if name else None
|
||||
s.save()
|
||||
with _get_session_agent_lock(sid):
|
||||
s.personality = name if name else None
|
||||
s.save()
|
||||
return j(handler, {"ok": True, "personality": s.personality, "prompt": prompt})
|
||||
|
||||
if parsed.path == "/api/session/update":
|
||||
@@ -1110,9 +1115,10 @@ def handle_post(handler, parsed) -> bool:
|
||||
new_ws = str(resolve_trusted_workspace(body.get("workspace", s.workspace)))
|
||||
except ValueError as e:
|
||||
return bad(handler, str(e))
|
||||
s.workspace = new_ws
|
||||
s.model = body.get("model", s.model)
|
||||
s.save()
|
||||
with _get_session_agent_lock(body["session_id"]):
|
||||
s.workspace = new_ws
|
||||
s.model = body.get("model", s.model)
|
||||
s.save()
|
||||
set_last_workspace(new_ws)
|
||||
return j(handler, {"session": s.compact() | {"messages": s.messages}})
|
||||
|
||||
@@ -1134,6 +1140,10 @@ def handle_post(handler, parsed) -> bool:
|
||||
p.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
logger.debug("Failed to unlink session file %s", p)
|
||||
# Prune the per-session agent lock so deleted sessions don't leak
|
||||
# Lock entries in SESSION_AGENT_LOCKS forever.
|
||||
with SESSION_AGENT_LOCKS_LOCK:
|
||||
SESSION_AGENT_LOCKS.pop(sid, None)
|
||||
try:
|
||||
SESSION_INDEX_FILE.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
@@ -1156,10 +1166,11 @@ def handle_post(handler, parsed) -> bool:
|
||||
s = get_session(body["session_id"])
|
||||
except KeyError:
|
||||
return bad(handler, "Session not found", 404)
|
||||
s.messages = []
|
||||
s.tool_calls = []
|
||||
s.title = "Untitled"
|
||||
s.save()
|
||||
with _get_session_agent_lock(body["session_id"]):
|
||||
s.messages = []
|
||||
s.tool_calls = []
|
||||
s.title = "Untitled"
|
||||
s.save()
|
||||
return j(handler, {"ok": True, "session": s.compact()})
|
||||
|
||||
if parsed.path == "/api/session/truncate":
|
||||
@@ -1174,8 +1185,9 @@ def handle_post(handler, parsed) -> bool:
|
||||
except KeyError:
|
||||
return bad(handler, "Session not found", 404)
|
||||
keep = int(body["keep_count"])
|
||||
s.messages = s.messages[:keep]
|
||||
s.save()
|
||||
with _get_session_agent_lock(body["session_id"]):
|
||||
s.messages = s.messages[:keep]
|
||||
s.save()
|
||||
return j(
|
||||
handler, {"ok": True, "session": s.compact() | {"messages": s.messages}}
|
||||
)
|
||||
@@ -1448,8 +1460,9 @@ def handle_post(handler, parsed) -> bool:
|
||||
s = get_session(body["session_id"])
|
||||
except KeyError:
|
||||
return bad(handler, "Session not found", 404)
|
||||
s.pinned = bool(body.get("pinned", True))
|
||||
s.save()
|
||||
with _get_session_agent_lock(body["session_id"]):
|
||||
s.pinned = bool(body.get("pinned", True))
|
||||
s.save()
|
||||
return j(handler, {"ok": True, "session": s.compact()})
|
||||
|
||||
# ── Session archive (POST) ──
|
||||
@@ -1462,8 +1475,9 @@ def handle_post(handler, parsed) -> bool:
|
||||
s = get_session(body["session_id"])
|
||||
except KeyError:
|
||||
return bad(handler, "Session not found", 404)
|
||||
s.archived = bool(body.get("archived", True))
|
||||
s.save()
|
||||
with _get_session_agent_lock(body["session_id"]):
|
||||
s.archived = bool(body.get("archived", True))
|
||||
s.save()
|
||||
return j(handler, {"ok": True, "session": s.compact()})
|
||||
|
||||
# ── Session move to project (POST) ──
|
||||
@@ -1476,8 +1490,9 @@ def handle_post(handler, parsed) -> bool:
|
||||
s = get_session(body["session_id"])
|
||||
except KeyError:
|
||||
return bad(handler, "Session not found", 404)
|
||||
s.project_id = body.get("project_id") or None
|
||||
s.save()
|
||||
with _get_session_agent_lock(body["session_id"]):
|
||||
s.project_id = body.get("project_id") or None
|
||||
s.save()
|
||||
return j(handler, {"ok": True, "session": s.compact()})
|
||||
|
||||
# ── Project CRUD (POST) ──
|
||||
@@ -2445,13 +2460,14 @@ def _handle_chat_start(handler, body):
|
||||
# Stale stream id from a previous run; clear and continue.
|
||||
s.active_stream_id = None
|
||||
stream_id = uuid.uuid4().hex
|
||||
s.workspace = workspace
|
||||
s.model = model
|
||||
s.active_stream_id = stream_id
|
||||
s.pending_user_message = msg
|
||||
s.pending_attachments = attachments
|
||||
s.pending_started_at = time.time()
|
||||
s.save()
|
||||
with _get_session_agent_lock(s.session_id):
|
||||
s.workspace = workspace
|
||||
s.model = model
|
||||
s.active_stream_id = stream_id
|
||||
s.pending_user_message = msg
|
||||
s.pending_attachments = attachments
|
||||
s.pending_started_at = time.time()
|
||||
s.save()
|
||||
set_last_workspace(workspace)
|
||||
q = queue.Queue()
|
||||
with STREAMS_LOCK:
|
||||
@@ -2470,15 +2486,14 @@ def _handle_chat_start(handler, body):
|
||||
|
||||
def _handle_chat_sync(handler, body):
|
||||
"""Fallback synchronous chat endpoint (POST /api/chat). Not used by frontend."""
|
||||
from api.config import _get_session_agent_lock
|
||||
|
||||
s = get_session(body["session_id"])
|
||||
msg = str(body.get("message", "")).strip()
|
||||
if not msg:
|
||||
return j(handler, {"error": "empty message"}, status=400)
|
||||
workspace = Path(body.get("workspace") or s.workspace).expanduser().resolve()
|
||||
s.workspace = str(workspace)
|
||||
s.model = body.get("model") or s.model
|
||||
with _get_session_agent_lock(s.session_id):
|
||||
s.workspace = str(workspace)
|
||||
s.model = body.get("model") or s.model
|
||||
from api.streaming import _ENV_LOCK
|
||||
|
||||
with _ENV_LOCK:
|
||||
@@ -2559,14 +2574,15 @@ def _handle_chat_sync(handler, body):
|
||||
os.environ.pop("HERMES_SESSION_KEY", None)
|
||||
else:
|
||||
os.environ["HERMES_SESSION_KEY"] = old_session_key
|
||||
s.messages = _restore_reasoning_metadata(
|
||||
_previous_messages,
|
||||
result.get("messages") or s.messages,
|
||||
)
|
||||
# Only auto-generate title when still default; preserves user renames
|
||||
if s.title == "Untitled":
|
||||
s.title = title_from(s.messages, s.title)
|
||||
s.save()
|
||||
with _get_session_agent_lock(s.session_id):
|
||||
s.messages = _restore_reasoning_metadata(
|
||||
_previous_messages,
|
||||
result.get("messages") or s.messages,
|
||||
)
|
||||
# Only auto-generate title when still default; preserves user renames
|
||||
if s.title == "Untitled":
|
||||
s.title = title_from(s.messages, s.title)
|
||||
s.save()
|
||||
# Sync to state.db for /insights (opt-in setting)
|
||||
try:
|
||||
if load_settings().get("sync_to_insights"):
|
||||
@@ -3094,33 +3110,42 @@ def _handle_session_compress(handler, body):
|
||||
if not resolved_api_key:
|
||||
return bad(handler, "No provider configured -- cannot compress.")
|
||||
|
||||
with _cfg._get_session_agent_lock(sid):
|
||||
original_messages = list(messages)
|
||||
approx_tokens = _estimate_messages_tokens_rough(original_messages)
|
||||
# Compute compression *outside* the lock — the LLM round-trip can take
|
||||
# many seconds and we must not block cancel_stream or other writers.
|
||||
# Lock contract: hold for the in-memory mutation only, never across
|
||||
# network I/O.
|
||||
original_messages = list(messages)
|
||||
approx_tokens = _estimate_messages_tokens_rough(original_messages)
|
||||
|
||||
agent = _run_agent.AIAgent(
|
||||
model=resolved_model,
|
||||
provider=resolved_provider,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
platform="cli",
|
||||
quiet_mode=True,
|
||||
enabled_toolsets=_resolve_cli_toolsets(),
|
||||
session_id=sid,
|
||||
)
|
||||
compressed = agent.context_compressor.compress(
|
||||
original_messages,
|
||||
current_tokens=approx_tokens,
|
||||
focus_topic=focus_topic,
|
||||
)
|
||||
new_tokens = _estimate_messages_tokens_rough(compressed)
|
||||
summary = _summarize_manual_compression(
|
||||
original_messages,
|
||||
compressed,
|
||||
approx_tokens,
|
||||
new_tokens,
|
||||
focus_topic=focus_topic,
|
||||
)
|
||||
agent = _run_agent.AIAgent(
|
||||
model=resolved_model,
|
||||
provider=resolved_provider,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
platform="cli",
|
||||
quiet_mode=True,
|
||||
enabled_toolsets=_resolve_cli_toolsets(),
|
||||
session_id=sid,
|
||||
)
|
||||
compressed = agent.context_compressor.compress(
|
||||
original_messages,
|
||||
current_tokens=approx_tokens,
|
||||
focus_topic=focus_topic,
|
||||
)
|
||||
new_tokens = _estimate_messages_tokens_rough(compressed)
|
||||
summary = _summarize_manual_compression(
|
||||
original_messages,
|
||||
compressed,
|
||||
approx_tokens,
|
||||
new_tokens,
|
||||
focus_topic=focus_topic,
|
||||
)
|
||||
|
||||
with _cfg._get_session_agent_lock(sid):
|
||||
# Re-read messages to detect concurrent edits during the LLM call.
|
||||
# If the history changed, the compression result is stale — abort.
|
||||
if _sanitize_messages_for_api(s.messages) != original_messages:
|
||||
return bad(handler, "Session was modified during compression; please retry.", 409)
|
||||
|
||||
s.messages = compressed
|
||||
s.tool_calls = []
|
||||
|
||||
@@ -9,7 +9,7 @@ from __future__ import annotations
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from api.config import LOCK
|
||||
from api.config import LOCK, _get_session_agent_lock
|
||||
from api.models import get_session, SESSIONS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -27,38 +27,43 @@ def retry_last(session_id: str) -> dict[str, Any]:
|
||||
KeyError: session not found
|
||||
ValueError: no user message in transcript
|
||||
"""
|
||||
# get_session() and Session.save() both acquire the module-level LOCK
|
||||
# internally (the latter via _write_session_index()), and LOCK is a
|
||||
# non-reentrant threading.Lock — so they MUST be called outside our
|
||||
# own `with LOCK:` block to avoid self-deadlocking.
|
||||
#
|
||||
# The race we close is the read-modify-write of s.messages: two
|
||||
# concurrent /api/session/retry calls could otherwise both compute the
|
||||
# same last_user_idx from the same history and double-truncate. We
|
||||
# serialize just the in-memory mutation; persistence happens outside
|
||||
# the lock and is naturally last-write-wins on a consistent state.
|
||||
#
|
||||
# Stale-object guard: on a cache miss, two concurrent get_session()
|
||||
# calls can each load and cache a *different* Session instance for the
|
||||
# same session_id (the second store_clobbers the first). Re-bind to
|
||||
# the canonical cached instance inside the lock so the mutation lands
|
||||
# on the object the next reader will see, not a stale parallel copy.
|
||||
s = get_session(session_id) # raises KeyError if missing
|
||||
with LOCK:
|
||||
s = SESSIONS.get(session_id, s)
|
||||
history = s.messages or []
|
||||
last_user_idx = None
|
||||
for i in range(len(history) - 1, -1, -1):
|
||||
if history[i].get('role') == 'user':
|
||||
last_user_idx = i
|
||||
break
|
||||
if last_user_idx is None:
|
||||
raise ValueError('No previous message to retry.')
|
||||
# Acquire the per-session agent lock as the outermost lock so that the
|
||||
# read-modify-write of s.messages is serialised with the periodic
|
||||
# checkpoint thread, cancel_stream, and all other session writers.
|
||||
# Lock ordering: _agent_lock → LOCK → _write_session_index (LOCK).
|
||||
with _get_session_agent_lock(session_id):
|
||||
# get_session() and Session.save() both acquire the module-level LOCK
|
||||
# internally (the latter via _write_session_index()), and LOCK is a
|
||||
# non-reentrant threading.Lock — so they MUST be called outside our
|
||||
# own `with LOCK:` block to avoid self-deadlocking.
|
||||
#
|
||||
# The race we close is the read-modify-write of s.messages: two
|
||||
# concurrent /api/session/retry calls could otherwise both compute the
|
||||
# same last_user_idx from the same history and double-truncate. We
|
||||
# serialize just the in-memory mutation; persistence happens inside
|
||||
# the per-session lock so the checkpoint thread cannot race us.
|
||||
#
|
||||
# Stale-object guard: on a cache miss, two concurrent get_session()
|
||||
# calls can each load and cache a *different* Session instance for the
|
||||
# same session_id (the second store clobbers the first). Re-bind to
|
||||
# the canonical cached instance inside the lock so the mutation lands
|
||||
# on the object the next reader will see, not a stale parallel copy.
|
||||
s = get_session(session_id) # raises KeyError if missing
|
||||
with LOCK:
|
||||
s = SESSIONS.get(session_id, s)
|
||||
history = s.messages or []
|
||||
last_user_idx = None
|
||||
for i in range(len(history) - 1, -1, -1):
|
||||
if history[i].get('role') == 'user':
|
||||
last_user_idx = i
|
||||
break
|
||||
if last_user_idx is None:
|
||||
raise ValueError('No previous message to retry.')
|
||||
|
||||
last_user_text = _extract_text(history[last_user_idx].get('content', ''))
|
||||
removed_count = len(history) - last_user_idx
|
||||
s.messages = history[:last_user_idx]
|
||||
s.save()
|
||||
last_user_text = _extract_text(history[last_user_idx].get('content', ''))
|
||||
removed_count = len(history) - last_user_idx
|
||||
s.messages = history[:last_user_idx]
|
||||
s.save()
|
||||
return {'last_user_text': last_user_text, 'removed_count': removed_count}
|
||||
|
||||
|
||||
@@ -72,23 +77,28 @@ def undo_last(session_id: str) -> dict[str, Any]:
|
||||
KeyError: session not found
|
||||
ValueError: no user message in transcript
|
||||
"""
|
||||
s = get_session(session_id) # acquires LOCK transiently
|
||||
with LOCK:
|
||||
# Stale-object guard — see retry_last for the rationale.
|
||||
s = SESSIONS.get(session_id, s)
|
||||
history = s.messages or []
|
||||
last_user_idx = None
|
||||
for i in range(len(history) - 1, -1, -1):
|
||||
if history[i].get('role') == 'user':
|
||||
last_user_idx = i
|
||||
break
|
||||
if last_user_idx is None:
|
||||
raise ValueError('Nothing to undo.')
|
||||
# Acquire the per-session agent lock as the outermost lock so that the
|
||||
# read-modify-write of s.messages is serialised with the periodic
|
||||
# checkpoint thread, cancel_stream, and all other session writers.
|
||||
# Lock ordering: _agent_lock → LOCK → _write_session_index (LOCK).
|
||||
with _get_session_agent_lock(session_id):
|
||||
s = get_session(session_id) # acquires LOCK transiently
|
||||
with LOCK:
|
||||
# Stale-object guard — see retry_last for the rationale.
|
||||
s = SESSIONS.get(session_id, s)
|
||||
history = s.messages or []
|
||||
last_user_idx = None
|
||||
for i in range(len(history) - 1, -1, -1):
|
||||
if history[i].get('role') == 'user':
|
||||
last_user_idx = i
|
||||
break
|
||||
if last_user_idx is None:
|
||||
raise ValueError('Nothing to undo.')
|
||||
|
||||
removed_text = _extract_text(history[last_user_idx].get('content', ''))
|
||||
removed_count = len(history) - last_user_idx
|
||||
s.messages = history[:last_user_idx]
|
||||
s.save() # outside LOCK -- save() re-acquires LOCK via _write_session_index()
|
||||
removed_text = _extract_text(history[last_user_idx].get('content', ''))
|
||||
removed_count = len(history) - last_user_idx
|
||||
s.messages = history[:last_user_idx]
|
||||
s.save() # outside LOCK -- save() re-acquires LOCK via _write_session_index()
|
||||
preview = (removed_text[:40] + '...') if len(removed_text) > 40 else removed_text
|
||||
return {
|
||||
'removed_count': removed_count,
|
||||
|
||||
576
api/streaming.py
576
api/streaming.py
@@ -2,6 +2,7 @@
|
||||
Hermes Web UI -- SSE streaming engine and agent thread runner.
|
||||
Includes Sprint 10 cancel support via CANCEL_FLAGS.
|
||||
"""
|
||||
import contextlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -20,6 +21,7 @@ from api.config import (
|
||||
STREAMS, STREAMS_LOCK, CANCEL_FLAGS, AGENT_INSTANCES, STREAM_PARTIAL_TEXT,
|
||||
LOCK, SESSIONS, SESSION_DIR,
|
||||
_get_session_agent_lock, _set_thread_env, _clear_thread_env,
|
||||
SESSION_AGENT_LOCKS, SESSION_AGENT_LOCKS_LOCK,
|
||||
resolve_model_provider,
|
||||
)
|
||||
from api.helpers import redact_session_data
|
||||
@@ -534,18 +536,46 @@ def _run_background_title_update(session_id: str, user_text: str, assistant_text
|
||||
if next_title:
|
||||
logger.debug("Using local fallback for session title generation")
|
||||
source = 'fallback'
|
||||
if next_title and next_title != current:
|
||||
s.title = next_title
|
||||
s.llm_title_generated = True
|
||||
# Keep chronological ordering stable in the sidebar.
|
||||
s.save(touch_updated_at=False)
|
||||
wrote_title = False
|
||||
effective_title = current
|
||||
if next_title:
|
||||
# Hold _agent_lock only for in-memory mutation + save so title write
|
||||
# is serialized with checkpoint saves, cancel_stream, and other
|
||||
# session-mutating endpoints. The LLM round-trip above ran outside
|
||||
# the lock to avoid blocking other writers.
|
||||
with _get_session_agent_lock(session_id):
|
||||
# Stale-object guard: rebind to the canonical cached Session
|
||||
# instance under LOCK before checking whether a user rename
|
||||
# landed while the LLM title request was in-flight.
|
||||
with LOCK:
|
||||
s = SESSIONS.get(session_id, s)
|
||||
effective_title = str(s.title or '').strip()
|
||||
invalid_existing_now = _looks_invalid_generated_title(s.title)
|
||||
still_auto = (
|
||||
effective_title == placeholder_title
|
||||
or effective_title in ('Untitled', 'New Chat', '')
|
||||
or _is_provisional_title(effective_title, s.messages)
|
||||
or invalid_existing_now
|
||||
)
|
||||
if not still_auto:
|
||||
_put_title_status(put_event, session_id, 'skipped', 'manual_title', effective_title)
|
||||
return
|
||||
if next_title != effective_title:
|
||||
s.title = next_title
|
||||
s.llm_title_generated = True
|
||||
# Keep chronological ordering stable in the sidebar.
|
||||
s.save(touch_updated_at=False)
|
||||
effective_title = s.title
|
||||
wrote_title = True
|
||||
|
||||
if wrote_title:
|
||||
if source == 'fallback':
|
||||
_put_title_status(put_event, session_id, source, 'local_summary', s.title, raw_preview)
|
||||
_put_title_status(put_event, session_id, source, 'local_summary', effective_title, raw_preview)
|
||||
else:
|
||||
_put_title_status(put_event, session_id, source, llm_status, s.title, raw_preview)
|
||||
put_event('title', {'session_id': session_id, 'title': s.title})
|
||||
_put_title_status(put_event, session_id, source, llm_status, effective_title, raw_preview)
|
||||
put_event('title', {'session_id': session_id, 'title': effective_title})
|
||||
else:
|
||||
_put_title_status(put_event, session_id, 'skipped', source or 'unchanged', current, raw_preview)
|
||||
_put_title_status(put_event, session_id, 'skipped', source or 'unchanged', effective_title, raw_preview)
|
||||
finally:
|
||||
put_event('stream_end', {'session_id': session_id})
|
||||
|
||||
@@ -830,6 +860,8 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
# block can safely check `if _checkpoint_stop is not None` even when an
|
||||
# exception fires before the checkpoint thread is created (Issue #765).
|
||||
_checkpoint_stop = None
|
||||
_ckpt_thread = None
|
||||
_agent_lock = None
|
||||
try:
|
||||
s = get_session(session_id)
|
||||
s.workspace = str(Path(workspace).expanduser().resolve())
|
||||
@@ -974,6 +1006,11 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
_reasoning_text += str(text)
|
||||
put('reasoning', {'text': str(text)})
|
||||
|
||||
# Pre-initialise the activity counter here so on_tool (which
|
||||
# closes over it) never captures an unbound name even if this
|
||||
# block is reordered later (Issue #765).
|
||||
_checkpoint_activity = [0]
|
||||
|
||||
def on_tool(*cb_args, **cb_kwargs):
|
||||
event_type = None
|
||||
name = None
|
||||
@@ -1224,7 +1261,7 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
# response — better than a silent loss of the entire conversation turn.
|
||||
# The final s.save() at task completion handles the full session update + index.
|
||||
# (_checkpoint_stop is pre-initialised at the top of the outer try.)
|
||||
_checkpoint_activity = [0]
|
||||
# (_checkpoint_activity is already initialised before on_tool().)
|
||||
|
||||
def _periodic_checkpoint():
|
||||
last_saved_activity = 0
|
||||
@@ -1232,7 +1269,8 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
try:
|
||||
cur = _checkpoint_activity[0]
|
||||
if cur > last_saved_activity:
|
||||
s.save(skip_index=True)
|
||||
with _agent_lock:
|
||||
s.save(skip_index=True)
|
||||
last_saved_activity = cur
|
||||
except Exception as e:
|
||||
logger.debug("Periodic checkpoint save failed: %s", e)
|
||||
@@ -1251,193 +1289,214 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
task_id=session_id,
|
||||
persist_user_message=msg_text,
|
||||
)
|
||||
s.messages = _restore_reasoning_metadata(
|
||||
_previous_messages,
|
||||
result.get('messages') or s.messages,
|
||||
)
|
||||
# Strip XML tool-call blocks from assistant message content.
|
||||
# DeepSeek and some other providers emit <function_calls>...</function_calls>
|
||||
# in the raw response text; this must be removed before the content is
|
||||
# saved to the session and displayed in the chat bubble. (#702)
|
||||
for _m in s.messages:
|
||||
if isinstance(_m, dict) and _m.get('role') == 'assistant':
|
||||
_raw_content = _m.get('content')
|
||||
if isinstance(_raw_content, str):
|
||||
_cleaned = _strip_xml_tool_calls(_raw_content)
|
||||
if _cleaned != _raw_content:
|
||||
_m['content'] = _cleaned
|
||||
elif isinstance(_raw_content, list):
|
||||
for _part in _raw_content:
|
||||
if isinstance(_part, dict) and isinstance(_part.get('text'), str):
|
||||
_part['text'] = _strip_xml_tool_calls(_part['text'])
|
||||
if _checkpoint_stop is not None:
|
||||
_checkpoint_stop.set()
|
||||
if _ckpt_thread is not None:
|
||||
_ckpt_thread.join(timeout=15)
|
||||
with _agent_lock:
|
||||
s.messages = _restore_reasoning_metadata(
|
||||
_previous_messages,
|
||||
result.get('messages') or s.messages,
|
||||
)
|
||||
# Strip XML tool-call blocks from assistant message content.
|
||||
# DeepSeek and some other providers emit <function_calls>...</function_calls>
|
||||
# in the raw response text; this must be removed before the content is
|
||||
# saved to the session and displayed in the chat bubble. (#702)
|
||||
for _m in s.messages:
|
||||
if isinstance(_m, dict) and _m.get('role') == 'assistant':
|
||||
_raw_content = _m.get('content')
|
||||
if isinstance(_raw_content, str):
|
||||
_cleaned = _strip_xml_tool_calls(_raw_content)
|
||||
if _cleaned != _raw_content:
|
||||
_m['content'] = _cleaned
|
||||
elif isinstance(_raw_content, list):
|
||||
for _part in _raw_content:
|
||||
if isinstance(_part, dict) and isinstance(_part.get('text'), str):
|
||||
_part['text'] = _strip_xml_tool_calls(_part['text'])
|
||||
|
||||
# ── Detect silent agent failure (no assistant reply produced) ──
|
||||
# When the agent catches an auth/network error internally it may return
|
||||
# an empty final_response without raising — the stream would end with
|
||||
# a done event containing zero assistant messages, leaving the user with
|
||||
# no feedback. Emit an apperror so the client shows an inline error.
|
||||
_assistant_added = any(
|
||||
m.get('role') == 'assistant' and str(m.get('content') or '').strip()
|
||||
for m in (result.get('messages') or [])
|
||||
)
|
||||
# _token_sent tracks whether on_token() was called (any streamed text)
|
||||
if not _assistant_added and not _token_sent:
|
||||
_last_err = getattr(agent, '_last_error', None) or result.get('error') or ''
|
||||
_err_str = str(_last_err) if _last_err else ''
|
||||
_err_lower = _err_str.lower()
|
||||
_is_quota = (
|
||||
'insufficient credit' in _err_lower
|
||||
or 'credit balance' in _err_lower
|
||||
or 'credits exhausted' in _err_lower
|
||||
or 'quota_exceeded' in _err_lower
|
||||
or 'quota exceeded' in _err_lower
|
||||
or 'exceeded your current quota' in _err_lower
|
||||
# ── Detect silent agent failure (no assistant reply produced) ──
|
||||
# When the agent catches an auth/network error internally it may return
|
||||
# an empty final_response without raising — the stream would end with
|
||||
# a done event containing zero assistant messages, leaving the user with
|
||||
# no feedback. Emit an apperror so the client shows an inline error.
|
||||
_assistant_added = any(
|
||||
m.get('role') == 'assistant' and str(m.get('content') or '').strip()
|
||||
for m in (result.get('messages') or [])
|
||||
)
|
||||
_is_auth = (
|
||||
not _is_quota and (
|
||||
'401' in _err_str
|
||||
or (_last_err and 'AuthenticationError' in type(_last_err).__name__)
|
||||
or 'authentication' in _err_lower
|
||||
or 'unauthorized' in _err_lower
|
||||
or 'invalid api key' in _err_lower
|
||||
or 'invalid_api_key' in _err_lower
|
||||
# _token_sent tracks whether on_token() was called (any streamed text)
|
||||
if not _assistant_added and not _token_sent:
|
||||
_last_err = getattr(agent, '_last_error', None) or result.get('error') or ''
|
||||
_err_str = str(_last_err) if _last_err else ''
|
||||
_err_lower = _err_str.lower()
|
||||
_is_quota = (
|
||||
'insufficient credit' in _err_lower
|
||||
or 'credit balance' in _err_lower
|
||||
or 'credits exhausted' in _err_lower
|
||||
or 'quota_exceeded' in _err_lower
|
||||
or 'quota exceeded' in _err_lower
|
||||
or 'exceeded your current quota' in _err_lower
|
||||
)
|
||||
_is_auth = (
|
||||
not _is_quota and (
|
||||
'401' in _err_str
|
||||
or (_last_err and 'AuthenticationError' in type(_last_err).__name__)
|
||||
or 'authentication' in _err_lower
|
||||
or 'unauthorized' in _err_lower
|
||||
or 'invalid api key' in _err_lower
|
||||
or 'invalid_api_key' in _err_lower
|
||||
)
|
||||
)
|
||||
if _is_quota:
|
||||
_err_label = 'Out of credits'
|
||||
_err_type = 'quota_exhausted'
|
||||
_err_hint = 'Your provider account is out of credits. Top up your balance or switch providers via `hermes model`.'
|
||||
elif _is_auth:
|
||||
_err_label = 'Authentication failed'
|
||||
_err_type = 'auth_mismatch'
|
||||
_err_hint = (
|
||||
'The selected model may not be supported by your configured provider or '
|
||||
'your API key is invalid. Run `hermes model` in your terminal to '
|
||||
'update credentials, then restart the WebUI.'
|
||||
)
|
||||
else:
|
||||
_err_label = 'No response received'
|
||||
_err_type = 'no_response'
|
||||
_err_hint = 'Verify your API key is valid and the selected model is available for your account.'
|
||||
put('apperror', {
|
||||
'message': _err_str or f'{_err_label}.',
|
||||
'type': _err_type,
|
||||
'hint': _err_hint,
|
||||
})
|
||||
# Clear stream/pending state so the session does not appear
|
||||
# "agent_running" on reload after a silent failure.
|
||||
# Persist the error so it survives page reload.
|
||||
# _error=True ensures _sanitize_messages_for_api excludes it from
|
||||
# subsequent API calls so the LLM never sees its own error as prior context.
|
||||
s.active_stream_id = None
|
||||
s.pending_user_message = None
|
||||
s.pending_attachments = []
|
||||
s.pending_started_at = None
|
||||
s.messages.append({
|
||||
'role': 'assistant',
|
||||
'content': f'**{_err_label}:** {_err_str or _err_label}\n\n*{_err_hint}*',
|
||||
'timestamp': int(time.time()),
|
||||
'_error': True,
|
||||
})
|
||||
try:
|
||||
s.save()
|
||||
except Exception:
|
||||
pass
|
||||
return # apperror already closes the stream on the client side
|
||||
|
||||
# ── Handle context compression side effects ──
|
||||
# If compression fired inside run_conversation, the agent may have
|
||||
# rotated its session_id. Detect and fix the mismatch so the WebUI
|
||||
# continues writing to the correct session file.
|
||||
#
|
||||
# Lock migration: when session_id rotates, we alias the new ID to
|
||||
# the *same* Lock object under SESSION_AGENT_LOCKS so that
|
||||
# subsequent callers using _get_session_agent_lock(new_sid) get the
|
||||
# same Lock the streaming thread is already holding. We then pop
|
||||
# the old-id entry to prevent a leak. This is safe because we
|
||||
# already hold _agent_lock (the Lock object itself), so the
|
||||
# reference stays alive even after the dict entry is removed.
|
||||
# Concurrent readers that already looked up the old ID will still
|
||||
# see the same Lock object until they release it.
|
||||
_agent_sid = getattr(agent, 'session_id', None)
|
||||
_compressed = False
|
||||
if _agent_sid and _agent_sid != session_id:
|
||||
old_sid = session_id
|
||||
new_sid = _agent_sid
|
||||
# Rename the session file
|
||||
old_path = SESSION_DIR / f'{old_sid}.json'
|
||||
new_path = SESSION_DIR / f'{new_sid}.json'
|
||||
s.session_id = new_sid
|
||||
with LOCK:
|
||||
if old_sid in SESSIONS:
|
||||
SESSIONS[new_sid] = SESSIONS.pop(old_sid)
|
||||
# Migrate the per-session lock: alias new_sid to the held
|
||||
# _agent_lock reference directly (not via old_sid lookup),
|
||||
# then remove the old_sid entry to prevent a leak.
|
||||
with SESSION_AGENT_LOCKS_LOCK:
|
||||
SESSION_AGENT_LOCKS[new_sid] = _agent_lock
|
||||
SESSION_AGENT_LOCKS.pop(old_sid, None)
|
||||
if old_path.exists() and not new_path.exists():
|
||||
try:
|
||||
old_path.rename(new_path)
|
||||
except OSError:
|
||||
logger.debug("Failed to rename session file during compression")
|
||||
_compressed = True
|
||||
# Also detect compression via the result dict or compressor state
|
||||
if not _compressed:
|
||||
_compressor = getattr(agent, 'context_compressor', None)
|
||||
if _compressor and getattr(_compressor, 'compression_count', 0) > 0:
|
||||
_compressed = True
|
||||
# Notify the frontend that compression happened
|
||||
if _compressed:
|
||||
put('compressed', {
|
||||
'message': 'Context auto-compressed to continue the conversation',
|
||||
})
|
||||
|
||||
# Stamp 'timestamp' on any messages that don't have one yet
|
||||
_now = time.time()
|
||||
for _m in s.messages:
|
||||
if isinstance(_m, dict) and not _m.get('timestamp') and not _m.get('_ts'):
|
||||
_m['timestamp'] = int(_now)
|
||||
# Only auto-generate title when still default; preserves user renames
|
||||
if s.title == 'Untitled' or s.title == 'New Chat' or not s.title:
|
||||
s.title = title_from(s.messages, s.title)
|
||||
_looks_default = (s.title == 'Untitled' or s.title == 'New Chat' or not s.title)
|
||||
_looks_provisional = _is_provisional_title(s.title, s.messages)
|
||||
_invalid_existing_title = _looks_invalid_generated_title(s.title)
|
||||
_should_bg_title = (
|
||||
(_looks_default or _looks_provisional or _invalid_existing_title)
|
||||
and (not getattr(s, 'llm_title_generated', False) or _invalid_existing_title)
|
||||
)
|
||||
if _is_quota:
|
||||
_err_label = 'Out of credits'
|
||||
_err_type = 'quota_exhausted'
|
||||
_err_hint = 'Your provider account is out of credits. Top up your balance or switch providers via `hermes model`.'
|
||||
elif _is_auth:
|
||||
_err_label = 'Authentication failed'
|
||||
_err_type = 'auth_mismatch'
|
||||
_err_hint = (
|
||||
'The selected model may not be supported by your configured provider or '
|
||||
'your API key is invalid. Run `hermes model` in your terminal to '
|
||||
'update credentials, then restart the WebUI.'
|
||||
)
|
||||
else:
|
||||
_err_label = 'No response received'
|
||||
_err_type = 'no_response'
|
||||
_err_hint = 'Verify your API key is valid and the selected model is available for your account.'
|
||||
put('apperror', {
|
||||
'message': _err_str or f'{_err_label}.',
|
||||
'type': _err_type,
|
||||
'hint': _err_hint,
|
||||
})
|
||||
# Clear stream/pending state so the session does not appear
|
||||
# "agent_running" on reload after a silent failure.
|
||||
_u0 = ''
|
||||
_a0 = ''
|
||||
if _should_bg_title:
|
||||
_u0, _a0 = _first_exchange_snippets(s.messages)
|
||||
# Read token/cost usage from the agent object (if available)
|
||||
input_tokens = getattr(agent, 'session_prompt_tokens', 0) or 0
|
||||
output_tokens = getattr(agent, 'session_completion_tokens', 0) or 0
|
||||
estimated_cost = getattr(agent, 'session_estimated_cost_usd', None)
|
||||
s.input_tokens = (s.input_tokens or 0) + input_tokens
|
||||
s.output_tokens = (s.output_tokens or 0) + output_tokens
|
||||
if estimated_cost:
|
||||
s.estimated_cost = (s.estimated_cost or 0) + estimated_cost
|
||||
# Persist tool-call summaries even when the final message history only
|
||||
# kept bare tool rows and omitted explicit assistant tool_call IDs.
|
||||
tool_calls = _extract_tool_calls_from_messages(
|
||||
s.messages,
|
||||
live_tool_calls=_live_tool_calls,
|
||||
)
|
||||
s.tool_calls = tool_calls
|
||||
s.active_stream_id = None
|
||||
s.pending_user_message = None
|
||||
s.pending_attachments = []
|
||||
s.pending_started_at = None
|
||||
# Persist the error so it survives page reload.
|
||||
# _error=True ensures _sanitize_messages_for_api excludes it from
|
||||
# subsequent API calls so the LLM never sees its own error as prior context.
|
||||
s.messages.append({
|
||||
'role': 'assistant',
|
||||
'content': f'**{_err_label}:** {_err_str or _err_label}\n\n*{_err_hint}*',
|
||||
'timestamp': int(time.time()),
|
||||
'_error': True,
|
||||
})
|
||||
try:
|
||||
s.save()
|
||||
except Exception:
|
||||
pass
|
||||
return # apperror already closes the stream on the client side
|
||||
|
||||
# ── Handle context compression side effects ──
|
||||
# If compression fired inside run_conversation, the agent may have
|
||||
# rotated its session_id. Detect and fix the mismatch so the WebUI
|
||||
# continues writing to the correct session file.
|
||||
_agent_sid = getattr(agent, 'session_id', None)
|
||||
_compressed = False
|
||||
if _agent_sid and _agent_sid != session_id:
|
||||
old_sid = session_id
|
||||
new_sid = _agent_sid
|
||||
# Rename the session file
|
||||
old_path = SESSION_DIR / f'{old_sid}.json'
|
||||
new_path = SESSION_DIR / f'{new_sid}.json'
|
||||
s.session_id = new_sid
|
||||
with LOCK:
|
||||
if old_sid in SESSIONS:
|
||||
SESSIONS[new_sid] = SESSIONS.pop(old_sid)
|
||||
if old_path.exists() and not new_path.exists():
|
||||
try:
|
||||
old_path.rename(new_path)
|
||||
except OSError:
|
||||
logger.debug("Failed to rename session file during compression")
|
||||
_compressed = True
|
||||
# Also detect compression via the result dict or compressor state
|
||||
if not _compressed:
|
||||
_compressor = getattr(agent, 'context_compressor', None)
|
||||
if _compressor and getattr(_compressor, 'compression_count', 0) > 0:
|
||||
_compressed = True
|
||||
# Notify the frontend that compression happened
|
||||
if _compressed:
|
||||
put('compressed', {
|
||||
'message': 'Context auto-compressed to continue the conversation',
|
||||
})
|
||||
|
||||
# Stamp 'timestamp' on any messages that don't have one yet
|
||||
_now = time.time()
|
||||
for _m in s.messages:
|
||||
if isinstance(_m, dict) and not _m.get('timestamp') and not _m.get('_ts'):
|
||||
_m['timestamp'] = int(_now)
|
||||
# Only auto-generate title when still default; preserves user renames
|
||||
if s.title == 'Untitled' or s.title == 'New Chat' or not s.title:
|
||||
s.title = title_from(s.messages, s.title)
|
||||
_looks_default = (s.title == 'Untitled' or s.title == 'New Chat' or not s.title)
|
||||
_looks_provisional = _is_provisional_title(s.title, s.messages)
|
||||
_invalid_existing_title = _looks_invalid_generated_title(s.title)
|
||||
_should_bg_title = (
|
||||
(_looks_default or _looks_provisional or _invalid_existing_title)
|
||||
and (not getattr(s, 'llm_title_generated', False) or _invalid_existing_title)
|
||||
)
|
||||
_u0 = ''
|
||||
_a0 = ''
|
||||
if _should_bg_title:
|
||||
_u0, _a0 = _first_exchange_snippets(s.messages)
|
||||
# Read token/cost usage from the agent object (if available)
|
||||
input_tokens = getattr(agent, 'session_prompt_tokens', 0) or 0
|
||||
output_tokens = getattr(agent, 'session_completion_tokens', 0) or 0
|
||||
estimated_cost = getattr(agent, 'session_estimated_cost_usd', None)
|
||||
s.input_tokens = (s.input_tokens or 0) + input_tokens
|
||||
s.output_tokens = (s.output_tokens or 0) + output_tokens
|
||||
if estimated_cost:
|
||||
s.estimated_cost = (s.estimated_cost or 0) + estimated_cost
|
||||
# Persist tool-call summaries even when the final message history only
|
||||
# kept bare tool rows and omitted explicit assistant tool_call IDs.
|
||||
tool_calls = _extract_tool_calls_from_messages(
|
||||
s.messages,
|
||||
live_tool_calls=_live_tool_calls,
|
||||
)
|
||||
s.tool_calls = tool_calls
|
||||
s.active_stream_id = None
|
||||
s.pending_user_message = None
|
||||
s.pending_attachments = []
|
||||
s.pending_started_at = None
|
||||
# Tag the matching user message with attachment filenames for display on reload
|
||||
# Only tag a user message whose content relates to this turn's text
|
||||
# (msg_text is the full message including the [Attached files: ...] suffix)
|
||||
if attachments:
|
||||
for m in reversed(s.messages):
|
||||
if m.get('role') == 'user':
|
||||
content = str(m.get('content', ''))
|
||||
# Match if content is part of the sent message or vice-versa
|
||||
base_text = msg_text.split('\n\n[Attached files:')[0].strip() if '\n\n[Attached files:' in msg_text else msg_text
|
||||
if base_text[:60] in content or content[:60] in msg_text:
|
||||
m['attachments'] = attachments
|
||||
# Tag the matching user message with attachment filenames for display on reload
|
||||
# Only tag a user message whose content relates to this turn's text
|
||||
# (msg_text is the full message including the [Attached files: ...] suffix)
|
||||
if attachments:
|
||||
for m in reversed(s.messages):
|
||||
if m.get('role') == 'user':
|
||||
content = str(m.get('content', ''))
|
||||
# Match if content is part of the sent message or vice-versa
|
||||
base_text = msg_text.split('\n\n[Attached files:')[0].strip() if '\n\n[Attached files:' in msg_text else msg_text
|
||||
if base_text[:60] in content or content[:60] in msg_text:
|
||||
m['attachments'] = attachments
|
||||
break
|
||||
# Persist reasoning trace in the session so it survives reload.
|
||||
# Must run BEFORE s.save() — otherwise the mutation lives only in
|
||||
# memory until the next turn's save, and the last-turn thinking card
|
||||
# is lost when the user reloads immediately after a response.
|
||||
if _reasoning_text and s.messages:
|
||||
for _rm in reversed(s.messages):
|
||||
if isinstance(_rm, dict) and _rm.get('role') == 'assistant':
|
||||
_rm['reasoning'] = _reasoning_text
|
||||
break
|
||||
# Persist reasoning trace in the session so it survives reload.
|
||||
# Must run BEFORE s.save() — otherwise the mutation lives only in
|
||||
# memory until the next turn's save, and the last-turn thinking card
|
||||
# is lost when the user reloads immediately after a response.
|
||||
if _reasoning_text and s.messages:
|
||||
for _rm in reversed(s.messages):
|
||||
if isinstance(_rm, dict) and _rm.get('role') == 'assistant':
|
||||
_rm['reasoning'] = _reasoning_text
|
||||
break
|
||||
s.save()
|
||||
s.save()
|
||||
# Sync to state.db for /insights (opt-in setting)
|
||||
try:
|
||||
from api.config import load_settings as _load_settings
|
||||
@@ -1543,23 +1602,29 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
else:
|
||||
_exc_label, _exc_type, _exc_hint = 'Error', 'error', ''
|
||||
if s is not None:
|
||||
s.active_stream_id = None
|
||||
s.pending_user_message = None
|
||||
s.pending_attachments = []
|
||||
s.pending_started_at = None
|
||||
if _checkpoint_stop is not None:
|
||||
_checkpoint_stop.set()
|
||||
if _ckpt_thread is not None:
|
||||
_ckpt_thread.join(timeout=15)
|
||||
# Persist the error so it survives page reload.
|
||||
# _error=True ensures _sanitize_messages_for_api excludes it from subsequent
|
||||
# API calls so the LLM never sees its own error as prior context on the next turn.
|
||||
s.messages.append({
|
||||
'role': 'assistant',
|
||||
'content': f'**{_exc_label}:** {err_str}' + (f'\n\n*{_exc_hint}*' if _exc_hint else ''),
|
||||
'timestamp': int(time.time()),
|
||||
'_error': True,
|
||||
})
|
||||
try:
|
||||
s.save()
|
||||
except Exception:
|
||||
pass
|
||||
_lock_ctx = _agent_lock if _agent_lock is not None else contextlib.nullcontext()
|
||||
with _lock_ctx:
|
||||
s.active_stream_id = None
|
||||
s.pending_user_message = None
|
||||
s.pending_attachments = []
|
||||
s.pending_started_at = None
|
||||
s.messages.append({
|
||||
'role': 'assistant',
|
||||
'content': f'**{_exc_label}:** {err_str}' + (f'\n\n*{_exc_hint}*' if _exc_hint else ''),
|
||||
'timestamp': int(time.time()),
|
||||
'_error': True,
|
||||
})
|
||||
try:
|
||||
s.save()
|
||||
except Exception:
|
||||
pass
|
||||
_apperror_payload: dict = {'message': err_str, 'type': _exc_type}
|
||||
if _exc_hint:
|
||||
_apperror_payload['hint'] = _exc_hint
|
||||
@@ -1568,6 +1633,8 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
# Stop periodic checkpoint thread if it was started (Issue #765)
|
||||
if _checkpoint_stop is not None:
|
||||
_checkpoint_stop.set()
|
||||
if _ckpt_thread is not None:
|
||||
_ckpt_thread.join(timeout=15)
|
||||
_clear_thread_env() # TD1: always clear thread-local context
|
||||
with STREAMS_LOCK:
|
||||
STREAMS.pop(stream_id, None)
|
||||
@@ -1662,55 +1729,60 @@ def cancel_stream(stream_id: str) -> bool:
|
||||
_cancel_partial_text = STREAM_PARTIAL_TEXT.get(stream_id, '')
|
||||
|
||||
# Session cleanup outside STREAMS_LOCK to preserve lock ordering.
|
||||
# Acquire the per-session _agent_lock too, mirroring every other session
|
||||
# writer (streaming success/error paths, periodic checkpoint, POST endpoints)
|
||||
# so the cancel-path mutation races neither the checkpoint thread nor
|
||||
# concurrent undo/retry calls.
|
||||
if _cancel_session_id:
|
||||
try:
|
||||
_cs = get_session(_cancel_session_id)
|
||||
_cs.active_stream_id = None
|
||||
_cs.pending_user_message = None
|
||||
_cs.pending_attachments = []
|
||||
_cs.pending_started_at = None
|
||||
# Persist any partial assistant text that was streamed before cancel (#893).
|
||||
# Preserving partial content means the user sees what the agent had
|
||||
# produced rather than losing it entirely. The marker is _partial=True
|
||||
# (for session/UI identification) — NOT _error=True — so the partial
|
||||
# content IS kept in the history sent to the agent on the next user
|
||||
# message, letting the model continue from where it was cut off.
|
||||
# See the inner comment on the append call below for the rationale.
|
||||
partial_text = _cancel_partial_text.strip() if _cancel_partial_text else ''
|
||||
if partial_text:
|
||||
import re as _re
|
||||
# Strip thinking/reasoning markup from partial content before saving.
|
||||
# First pass: remove complete <think>...</think> and <thinking>...</thinking> blocks.
|
||||
_stripped = _re.sub(r'<think(?:ing)?\b[^>]*>.*?</think(?:ing)?>',
|
||||
'', partial_text,
|
||||
flags=_re.DOTALL | _re.IGNORECASE).strip()
|
||||
# Second pass: strip trailing UNCLOSED think/thinking block (the common
|
||||
# cancel case — user stops mid-reasoning before the close tag appears).
|
||||
_stripped = _re.sub(r'<think(?:ing)?\b[^>]*>.*',
|
||||
'', _stripped,
|
||||
flags=_re.DOTALL | _re.IGNORECASE).strip()
|
||||
if _stripped:
|
||||
# Mark _partial=True for session/UI identification only.
|
||||
# Deliberately NOT _error=True — the partial content is real model
|
||||
# output and should be visible in conversation history so the model
|
||||
# can continue from it on the next turn (#893).
|
||||
_cs.messages.append({
|
||||
'role': 'assistant',
|
||||
'content': _stripped,
|
||||
'_partial': True,
|
||||
'timestamp': int(time.time()),
|
||||
})
|
||||
# Cancel marker — flagged _error=True so it is stripped from conversation
|
||||
# history on the next turn (prevents model from seeing "Task cancelled."
|
||||
# as a prior assistant reply).
|
||||
_cs.messages.append({
|
||||
'role': 'assistant',
|
||||
'content': '*Task cancelled.*',
|
||||
'_error': True,
|
||||
'timestamp': int(time.time()),
|
||||
})
|
||||
_cs.save()
|
||||
except Exception:
|
||||
logger.debug("Failed to clear session state on cancel for %s", _cancel_session_id)
|
||||
with _get_session_agent_lock(_cancel_session_id):
|
||||
try:
|
||||
_cs = get_session(_cancel_session_id)
|
||||
_cs.active_stream_id = None
|
||||
_cs.pending_user_message = None
|
||||
_cs.pending_attachments = []
|
||||
_cs.pending_started_at = None
|
||||
# Persist any partial assistant text that was streamed before cancel (#893).
|
||||
# Preserving partial content means the user sees what the agent had
|
||||
# produced rather than losing it entirely. The marker is _partial=True
|
||||
# (for session/UI identification only) — NOT _error=True — so the partial
|
||||
# content IS kept in the history sent to the agent on the next user
|
||||
# message, letting the model continue from where it was cut off.
|
||||
# See the inner comment on the append call below for the rationale.
|
||||
partial_text = _cancel_partial_text.strip() if _cancel_partial_text else ''
|
||||
if partial_text:
|
||||
import re as _re
|
||||
# Strip thinking/reasoning markup from partial content before saving.
|
||||
# First pass: remove complete <thinking>...</thinking> blocks.
|
||||
_stripped = _re.sub(r'<think(?:ing)?\b[^>]*>.*?</think(?:ing)?>',
|
||||
'', partial_text,
|
||||
flags=_re.DOTALL | _re.IGNORECASE).strip()
|
||||
# Second pass: strip trailing UNCLOSED think/thinking block (the common
|
||||
# cancel case — user stops mid-reasoning before the close tag appears).
|
||||
_stripped = _re.sub(r'<think(?:ing)?\b[^>]*>.*',
|
||||
'', _stripped,
|
||||
flags=_re.DOTALL | _re.IGNORECASE).strip()
|
||||
if _stripped:
|
||||
# Mark _partial=True for session/UI identification only.
|
||||
# Deliberately NOT _error=True — the partial content is real model
|
||||
# output and should be visible in conversation history so the model
|
||||
# can continue from it on the next turn (#893).
|
||||
_cs.messages.append({
|
||||
'role': 'assistant',
|
||||
'content': _stripped,
|
||||
'_partial': True,
|
||||
'timestamp': int(time.time()),
|
||||
})
|
||||
# Cancel marker — flagged _error=True so it is stripped from conversation
|
||||
# history on the next turn (prevents model from seeing "Task cancelled."
|
||||
# as a prior assistant reply).
|
||||
_cs.messages.append({
|
||||
'role': 'assistant',
|
||||
'content': '*Task cancelled.*',
|
||||
'_error': True,
|
||||
'timestamp': int(time.time()),
|
||||
})
|
||||
_cs.save()
|
||||
except Exception:
|
||||
logger.debug("Failed to clear session state on cancel for %s", _cancel_session_id)
|
||||
|
||||
return True
|
||||
|
||||
@@ -255,13 +255,101 @@ class TestPeriodicCheckpoint:
|
||||
assert data["updated_at"] > ts_before, "Checkpoint should update updated_at"
|
||||
|
||||
|
||||
class TestCheckpointVariableLifecycle:
|
||||
"""Regression guard: the outer `finally` must not UnboundLocalError when an
|
||||
exception fires before the checkpoint thread is created. _checkpoint_stop
|
||||
is initialised to None at the very top of the outer try block so the
|
||||
finally's `if _checkpoint_stop is not None` branch is always safe.
|
||||
class TestIssue765FollowupHardening:
|
||||
"""Regression tests for the follow-up hardening pass on Issue #765.
|
||||
|
||||
Includes the guard that the outer `finally` must not UnboundLocalError when
|
||||
an exception fires before the checkpoint thread is created.
|
||||
"""
|
||||
|
||||
def test_same_session_concurrent_saves_use_distinct_temp_files(self, monkeypatch):
|
||||
"""Two concurrent saves of the same session must not collide on one tmp path.
|
||||
|
||||
The key regression guard here is that each save call should reach os.replace()
|
||||
with a distinct source tmp path. With the old shared `<sid>.tmp` scheme, both
|
||||
threads would target the same path and the second replace would deterministically
|
||||
fail once the first consume/remove happened.
|
||||
"""
|
||||
s = _make_session("same_sid")
|
||||
s.save(skip_index=True) # seed the file on disk
|
||||
|
||||
original_replace = models.os.replace
|
||||
barrier = threading.Barrier(2)
|
||||
replace_sources = []
|
||||
errors = []
|
||||
|
||||
def _replace_with_barrier(src, dst):
|
||||
replace_sources.append(str(src))
|
||||
barrier.wait(timeout=5)
|
||||
return original_replace(src, dst)
|
||||
|
||||
monkeypatch.setattr(models.os, "replace", _replace_with_barrier)
|
||||
|
||||
def _save_worker():
|
||||
try:
|
||||
s.save(skip_index=True)
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
t1 = threading.Thread(target=_save_worker)
|
||||
t2 = threading.Thread(target=_save_worker)
|
||||
t1.start()
|
||||
t2.start()
|
||||
t1.join(timeout=5)
|
||||
t2.join(timeout=5)
|
||||
|
||||
assert not errors, f"Concurrent same-session saves should not fail: {errors}"
|
||||
assert len(replace_sources) == 2, f"Expected 2 replace calls, got {replace_sources}"
|
||||
assert len(set(replace_sources)) == 2, (
|
||||
"Concurrent same-session saves must use distinct temp files; "
|
||||
f"got {replace_sources}"
|
||||
)
|
||||
data = json.loads(s.path.read_text(encoding="utf-8"))
|
||||
assert data["session_id"] == "same_sid"
|
||||
|
||||
def test_success_path_joins_checkpoint_before_session_mutation(self):
|
||||
"""Static guard: success path must stop/join checkpoint thread before mutating.
|
||||
|
||||
This keeps the post-run_conversation session rewrite serialized relative to the
|
||||
periodic checkpoint worker.
|
||||
"""
|
||||
src = (Path(__file__).parent.parent / "api" / "streaming.py").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
stop_idx = src.find("if _checkpoint_stop is not None:\n _checkpoint_stop.set()")
|
||||
join_idx = src.find("if _ckpt_thread is not None:\n _ckpt_thread.join(timeout=15)")
|
||||
lock_idx = src.find("with _agent_lock:\n s.messages = _restore_reasoning_metadata(")
|
||||
save_idx = src.find("s.messages = _restore_reasoning_metadata(")
|
||||
|
||||
assert stop_idx != -1, "Success path must stop the checkpoint thread"
|
||||
assert join_idx != -1, "Success path must join the checkpoint thread"
|
||||
assert lock_idx != -1, "Success path must serialize mutation with _agent_lock"
|
||||
assert save_idx != -1, "Success path restore/mutation block not found"
|
||||
assert stop_idx < join_idx < lock_idx <= save_idx, (
|
||||
"Checkpoint stop/join must happen before the success-path session mutation block"
|
||||
)
|
||||
|
||||
def test_silent_failure_path_does_not_reacquire_agent_lock(self):
|
||||
"""Silent-failure path must not nest `_agent_lock` inside the success lock.
|
||||
|
||||
Reacquiring the same per-session lock inside the post-run_conversation block
|
||||
deadlocks because `_get_session_agent_lock()` returns a non-reentrant Lock.
|
||||
"""
|
||||
src = (Path(__file__).parent.parent / "api" / "streaming.py").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
outer_lock_idx = src.find("with _agent_lock:\n s.messages = _restore_reasoning_metadata(")
|
||||
silent_failure_idx = src.find("if not _assistant_added and not _token_sent:")
|
||||
inner_lock_idx = src.find("with _agent_lock:", outer_lock_idx + 1)
|
||||
compression_idx = src.find("# ── Handle context compression side effects ──")
|
||||
|
||||
assert outer_lock_idx != -1, "Outer success-path _agent_lock block not found"
|
||||
assert silent_failure_idx != -1, "Silent-failure branch not found"
|
||||
assert compression_idx != -1, "Compression marker not found"
|
||||
assert not (
|
||||
inner_lock_idx != -1 and silent_failure_idx < inner_lock_idx < compression_idx
|
||||
), "Silent-failure path must not reacquire _agent_lock inside the outer lock"
|
||||
|
||||
def test_checkpoint_stop_initialised_before_any_raiseable_code(self):
|
||||
"""Static check: `_checkpoint_stop = None` must appear before any code
|
||||
that could raise inside _run_agent_streaming's outer try."""
|
||||
@@ -271,7 +359,11 @@ class TestCheckpointVariableLifecycle:
|
||||
lines = src.splitlines()
|
||||
try_line = next(
|
||||
i for i, ln in enumerate(lines, 1)
|
||||
if ln.rstrip().endswith("try:") and lines[i - 2].strip().startswith("_checkpoint_stop")
|
||||
if ln.rstrip().endswith("try:")
|
||||
and any(
|
||||
lines[j].strip().startswith("_checkpoint_stop = None")
|
||||
for j in range(max(0, i - 4), i - 1)
|
||||
)
|
||||
)
|
||||
# The assignment must precede the `try:` — not sit inside the nested
|
||||
# block where an earlier line could raise before it runs.
|
||||
@@ -302,3 +394,446 @@ class TestCheckpointVariableLifecycle:
|
||||
|
||||
with pytest.raises(ValueError, match="early failure"):
|
||||
mimic_run_agent_streaming()
|
||||
|
||||
def test_agent_lock_null_guard_in_except_block(self):
|
||||
"""The except block must not crash with AttributeError when _agent_lock
|
||||
is None (e.g. when get_session succeeds but _get_session_agent_lock
|
||||
hasn't been called yet, or _get_session_agent_lock itself raised).
|
||||
|
||||
The code must use a nullcontext fallback rather than unconditionally
|
||||
entering `with _agent_lock:`."""
|
||||
src = (Path(__file__).parent.parent / "api" / "streaming.py").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
# Verify contextlib.nullcontext is used as a fallback
|
||||
assert "contextlib.nullcontext()" in src, (
|
||||
"The except block must guard _agent_lock being None by falling "
|
||||
"back to contextlib.nullcontext() instead of unconditionally "
|
||||
"entering `with _agent_lock:`"
|
||||
)
|
||||
# Verify the except block uses _lock_ctx (the guarded variable)
|
||||
assert "_lock_ctx" in src, (
|
||||
"The except block must assign _agent_lock / nullcontext to a "
|
||||
"variable and use it, not enter `with _agent_lock:` directly"
|
||||
)
|
||||
|
||||
def test_periodic_checkpoint_uses_agent_lock(self):
|
||||
"""The periodic checkpoint thread must hold _agent_lock while saving
|
||||
to prevent concurrent mutation races with other endpoints."""
|
||||
src = (Path(__file__).parent.parent / "api" / "streaming.py").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
# Find the _periodic_checkpoint function
|
||||
ckpt_idx = src.find("def _periodic_checkpoint():")
|
||||
assert ckpt_idx != -1, "_periodic_checkpoint function not found"
|
||||
ckpt_block = src[ckpt_idx:ckpt_idx + 600]
|
||||
assert "with _agent_lock:" in ckpt_block, (
|
||||
"_periodic_checkpoint must hold _agent_lock while calling s.save() "
|
||||
"to prevent race conditions with other session-mutating endpoints"
|
||||
)
|
||||
|
||||
def test_background_title_update_rebinds_to_canonical_session_instance(self):
|
||||
"""Guard against stale Session object mutation after LLM round-trip.
|
||||
|
||||
_run_background_title_update must re-bind `s` to SESSIONS.get(session_id,
|
||||
s) under LOCK before deciding whether a manual rename should block the
|
||||
generated title write.
|
||||
"""
|
||||
src = (Path(__file__).parent.parent / "api" / "streaming.py").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
fn_idx = src.find("def _run_background_title_update(")
|
||||
assert fn_idx != -1, "_run_background_title_update not found"
|
||||
fn_block = src[fn_idx:fn_idx + 3200]
|
||||
assert "with LOCK:" in fn_block, (
|
||||
"_run_background_title_update must acquire LOCK before rebinding "
|
||||
"to canonical cached session instance"
|
||||
)
|
||||
assert "s = SESSIONS.get(session_id, s)" in fn_block, (
|
||||
"_run_background_title_update must rebind to canonical cached "
|
||||
"session instance under LOCK"
|
||||
)
|
||||
|
||||
def test_cancel_stream_uses_agent_lock(self):
|
||||
"""cancel_stream must hold _agent_lock during session cleanup to
|
||||
prevent races with checkpoint saves and other writers."""
|
||||
src = (Path(__file__).parent.parent / "api" / "streaming.py").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
cancel_idx = src.find("def cancel_stream(")
|
||||
assert cancel_idx != -1, "cancel_stream function not found"
|
||||
cancel_block = src[cancel_idx:]
|
||||
# Find the session cleanup section
|
||||
cleanup_idx = cancel_block.find("Session cleanup outside STREAMS_LOCK")
|
||||
assert cleanup_idx != -1, "Session cleanup comment not found in cancel_stream"
|
||||
cleanup_section = cancel_block[cleanup_idx:cleanup_idx + 800]
|
||||
assert "_get_session_agent_lock" in cleanup_section, (
|
||||
"cancel_stream must acquire _get_session_agent_lock during "
|
||||
"session cleanup to serialise with the checkpoint thread and "
|
||||
"other session-mutating endpoints"
|
||||
)
|
||||
|
||||
def test_session_ops_retry_undo_hold_agent_lock(self):
|
||||
"""retry_last and undo_last must hold _get_session_agent_lock for the
|
||||
entire read-modify-save cycle."""
|
||||
src = (Path(__file__).parent.parent / "api" / "session_ops.py").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
assert "_get_session_agent_lock" in src, (
|
||||
"session_ops must import _get_session_agent_lock"
|
||||
)
|
||||
# Both functions must use with _get_session_agent_lock(session_id):
|
||||
for func_name in ("retry_last", "undo_last"):
|
||||
func_idx = src.find(f"def {func_name}(")
|
||||
assert func_idx != -1, f"{func_name} not found in session_ops.py"
|
||||
func_block = src[func_idx:func_idx + 1200]
|
||||
assert "with _get_session_agent_lock" in func_block, (
|
||||
f"{func_name} must wrap its read-modify-save cycle in "
|
||||
f"with _get_session_agent_lock(session_id)"
|
||||
)
|
||||
|
||||
def test_periodic_checkpoint_mutation_race_with_undo_last(self, tmp_path, monkeypatch):
|
||||
"""Run _periodic_checkpoint against a session whose messages list is
|
||||
concurrently truncated by undo_last; the on-disk JSON must remain
|
||||
parseable and internally consistent.
|
||||
|
||||
The simulated checkpoint mirrors production by acquiring
|
||||
_get_session_agent_lock around s.save(), and we assert that every
|
||||
on-disk snapshot's messages list is one of the allowed snapshots
|
||||
(never an interleaving of fields from two different saves).
|
||||
"""
|
||||
session_dir = tmp_path / "sessions_undo_race"
|
||||
session_dir.mkdir()
|
||||
index_file = session_dir / "_index.json"
|
||||
monkeypatch.setattr(models, "SESSION_DIR", session_dir)
|
||||
monkeypatch.setattr(models, "SESSION_INDEX_FILE", index_file)
|
||||
models.SESSIONS.clear()
|
||||
try:
|
||||
s = Session(
|
||||
session_id="race_test",
|
||||
title="Race Test",
|
||||
messages=[
|
||||
{"role": "user", "content": "first"},
|
||||
{"role": "assistant", "content": "reply 1"},
|
||||
{"role": "user", "content": "second"},
|
||||
{"role": "assistant", "content": "reply 2"},
|
||||
{"role": "user", "content": "third"},
|
||||
{"role": "assistant", "content": "reply 3"},
|
||||
],
|
||||
)
|
||||
s.save()
|
||||
models.SESSIONS[s.session_id] = s
|
||||
|
||||
_checkpoint_stop = threading.Event()
|
||||
_checkpoint_activity = [0]
|
||||
errors = []
|
||||
# Collect every on-disk messages snapshot observed by the
|
||||
# checkpoint thread so we can assert atomicity after the run.
|
||||
checkpoint_snapshots = []
|
||||
_lock = threading.Lock()
|
||||
|
||||
from api.config import _get_session_agent_lock
|
||||
_agent_lock = _get_session_agent_lock("race_test")
|
||||
|
||||
def _periodic_checkpoint():
|
||||
last = 0
|
||||
while not _checkpoint_stop.wait(0.01):
|
||||
try:
|
||||
cur = _checkpoint_activity[0]
|
||||
if cur > last:
|
||||
with _agent_lock:
|
||||
s.save(skip_index=True)
|
||||
# Read back the on-disk JSON to verify atomicity
|
||||
try:
|
||||
snap = json.loads(s.path.read_text())
|
||||
with _lock:
|
||||
checkpoint_snapshots.append(snap.get("messages"))
|
||||
except Exception:
|
||||
pass
|
||||
last = cur
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
t = threading.Thread(target=_periodic_checkpoint, daemon=True)
|
||||
t.start()
|
||||
|
||||
from api.session_ops import undo_last
|
||||
# Collect the allowed message snapshots (each state the session
|
||||
# is in at a point where a checkpoint might observe it).
|
||||
allowed_message_snapshots = []
|
||||
# The initial state (before any undo) is a valid checkpoint target.
|
||||
allowed_message_snapshots.append(
|
||||
[dict(m) if isinstance(m, dict) else m for m in s.messages]
|
||||
)
|
||||
for _ in range(5):
|
||||
_checkpoint_activity[0] += 1
|
||||
time.sleep(0.02)
|
||||
try:
|
||||
undo_last("race_test")
|
||||
except ValueError:
|
||||
pass
|
||||
# Record the post-undo state (before appending new messages)
|
||||
# as an allowed snapshot — the checkpoint may observe this.
|
||||
allowed_message_snapshots.append(
|
||||
[dict(m) if isinstance(m, dict) else m for m in s.messages]
|
||||
)
|
||||
# Wrap mutation + save in _agent_lock to mirror production
|
||||
# paths and prevent the checkpoint from observing an
|
||||
# intermediate +1-message snapshot.
|
||||
with _agent_lock:
|
||||
s.messages.append({"role": "user", "content": f"msg-{_}"})
|
||||
s.messages.append({"role": "assistant", "content": f"ans-{_}"})
|
||||
# Record the in-memory messages list *before* save so we
|
||||
# can verify that every checkpoint snapshot matches one
|
||||
# of these.
|
||||
allowed_message_snapshots.append(
|
||||
[dict(m) if isinstance(m, dict) else m for m in s.messages]
|
||||
)
|
||||
s.save()
|
||||
|
||||
_checkpoint_stop.set()
|
||||
t.join(timeout=2)
|
||||
|
||||
assert not errors, f"Checkpoint thread encountered errors: {errors}"
|
||||
# Verify the on-disk JSON is parseable
|
||||
data = json.loads(s.path.read_text())
|
||||
assert data["session_id"] == "race_test"
|
||||
# Messages must be a list (not corrupted by concurrent mutation)
|
||||
assert isinstance(data["messages"], list)
|
||||
# Contract assertion: every checkpoint snapshot's messages must
|
||||
# equal one of the allowed in-memory snapshots, never an
|
||||
# interleaving of fields from two different saves. This assertion
|
||||
# has teeth: if the _agent_lock were removed from the checkpoint
|
||||
# or the undo path, concurrent mutations would produce snapshots
|
||||
# that match no allowed state (e.g. a list with some messages
|
||||
# from before undo and some from after).
|
||||
for snap_msgs in checkpoint_snapshots:
|
||||
if snap_msgs is None:
|
||||
continue
|
||||
# Normalize for comparison (strip display-only metadata)
|
||||
normalized = [
|
||||
{k: v for k, v in m.items() if k in ("role", "content")}
|
||||
if isinstance(m, dict) else m
|
||||
for m in snap_msgs
|
||||
]
|
||||
matched = False
|
||||
for allowed in allowed_message_snapshots:
|
||||
norm_allowed = [
|
||||
{k: v for k, v in m.items() if k in ("role", "content")}
|
||||
if isinstance(m, dict) else m
|
||||
for m in allowed
|
||||
]
|
||||
if normalized == norm_allowed:
|
||||
matched = True
|
||||
break
|
||||
assert matched, (
|
||||
f"Checkpoint snapshot {normalized!r} does not match any "
|
||||
f"allowed state — this indicates a serialization failure "
|
||||
f"(the _agent_lock is not preventing interleaved writes)."
|
||||
)
|
||||
finally:
|
||||
models.SESSIONS.clear()
|
||||
|
||||
def test_cancel_stream_concurrent_checkpoint_produces_valid_json(self, tmp_path, monkeypatch):
|
||||
"""Run cancel_stream while a _periodic_checkpoint thread is concurrently
|
||||
saving the same session; the resulting on-disk JSON must be parseable
|
||||
and active_stream_id must be None.
|
||||
|
||||
The simulated checkpoint mirrors production by acquiring
|
||||
_get_session_agent_lock around s.save(), and we assert that every
|
||||
on-disk snapshot is internally consistent (never an interleaving
|
||||
of fields from two different saves).
|
||||
"""
|
||||
session_dir = tmp_path / "sessions_cancel_race"
|
||||
session_dir.mkdir()
|
||||
index_file = session_dir / "_index.json"
|
||||
monkeypatch.setattr(models, "SESSION_DIR", session_dir)
|
||||
monkeypatch.setattr(models, "SESSION_INDEX_FILE", index_file)
|
||||
models.SESSIONS.clear()
|
||||
try:
|
||||
s = Session(
|
||||
session_id="cancel_race",
|
||||
title="Cancel Race Test",
|
||||
messages=[
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "world"},
|
||||
],
|
||||
active_stream_id="stream-abc",
|
||||
)
|
||||
s.save()
|
||||
models.SESSIONS[s.session_id] = s
|
||||
|
||||
_checkpoint_stop = threading.Event()
|
||||
_checkpoint_activity = [0]
|
||||
errors = []
|
||||
# Collect every on-disk snapshot observed by the checkpoint thread.
|
||||
checkpoint_snapshots = []
|
||||
_snap_lock = threading.Lock()
|
||||
|
||||
from api.config import _get_session_agent_lock
|
||||
_agent_lock = _get_session_agent_lock("cancel_race")
|
||||
|
||||
def _periodic_checkpoint():
|
||||
last = 0
|
||||
while not _checkpoint_stop.wait(0.01):
|
||||
try:
|
||||
cur = _checkpoint_activity[0]
|
||||
if cur > last:
|
||||
with _agent_lock:
|
||||
s.save(skip_index=True)
|
||||
# Read back the on-disk JSON to verify atomicity
|
||||
try:
|
||||
snap = json.loads(s.path.read_text())
|
||||
with _snap_lock:
|
||||
checkpoint_snapshots.append(snap)
|
||||
except Exception:
|
||||
pass
|
||||
last = cur
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
t = threading.Thread(target=_periodic_checkpoint, daemon=True)
|
||||
t.start()
|
||||
|
||||
# Simulate cancel_stream session cleanup directly
|
||||
for i in range(10):
|
||||
_checkpoint_activity[0] += 1
|
||||
time.sleep(0.01)
|
||||
with _get_session_agent_lock("cancel_race"):
|
||||
s.active_stream_id = None
|
||||
s.pending_user_message = None
|
||||
s.pending_attachments = []
|
||||
s.pending_started_at = None
|
||||
s.save()
|
||||
|
||||
_checkpoint_stop.set()
|
||||
t.join(timeout=2)
|
||||
|
||||
assert not errors, f"Checkpoint thread encountered errors: {errors}"
|
||||
data = json.loads(s.path.read_text())
|
||||
assert data["session_id"] == "cancel_race"
|
||||
assert data["active_stream_id"] is None, (
|
||||
"active_stream_id must be None after cancel cleanup"
|
||||
)
|
||||
assert isinstance(data["messages"], list)
|
||||
# Contract assertion: every checkpoint snapshot must be
|
||||
# internally consistent (no interleaving of fields from two
|
||||
# different saves). Because both the cancel cleanup and the
|
||||
# checkpoint hold the same _agent_lock, they are serialized —
|
||||
# but ordering is nondeterministic, so a snapshot taken
|
||||
# *before* cancel will see active_stream_id="stream-abc" and
|
||||
# one taken *after* will see None. The guarantee is that
|
||||
# each snapshot is self-consistent, never a partial mix.
|
||||
#
|
||||
# This assertion has teeth: if the _agent_lock were removed
|
||||
# from either the checkpoint or the cancel path, a snapshot
|
||||
# could see active_stream_id=None while pending_user_message
|
||||
# still holds the pre-cancel value — a partial state that
|
||||
# violates the atomicity contract.
|
||||
for snap in checkpoint_snapshots:
|
||||
assert isinstance(snap.get("messages"), list), (
|
||||
"Checkpoint snapshot messages must be a list"
|
||||
)
|
||||
assert snap.get("active_stream_id") in ("stream-abc", None), (
|
||||
"Checkpoint snapshot active_stream_id must be either "
|
||||
"the initial value or None (serialized, not interleaved), "
|
||||
f"got {snap.get('active_stream_id')!r}"
|
||||
)
|
||||
# When active_stream_id is None, the cancel cleanup must
|
||||
# have run — so all four cancel fields must be cleared
|
||||
# atomically. A partial state (e.g. active_stream_id=None
|
||||
# but pending_user_message still set) would indicate a
|
||||
# serialization failure.
|
||||
if snap.get("active_stream_id") is None:
|
||||
assert snap.get("pending_user_message") is None, (
|
||||
"Snapshot with active_stream_id=None must also have "
|
||||
"pending_user_message=None (atomic cancel cleanup "
|
||||
"under _agent_lock)"
|
||||
)
|
||||
assert snap.get("pending_attachments") == [] or snap.get("pending_attachments") is None, (
|
||||
"Snapshot with active_stream_id=None must also have "
|
||||
"empty pending_attachments (atomic cancel cleanup "
|
||||
"under _agent_lock)"
|
||||
)
|
||||
assert snap.get("pending_started_at") is None, (
|
||||
"Snapshot with active_stream_id=None must also have "
|
||||
"pending_started_at=None (atomic cancel cleanup "
|
||||
"under _agent_lock)"
|
||||
)
|
||||
finally:
|
||||
models.SESSIONS.clear()
|
||||
|
||||
def test_lock_identity_preserved_after_session_id_rotation(self):
|
||||
"""When compression rotates session_id, the per-session lock must be
|
||||
aliased so that _get_session_agent_lock(new_sid) returns the *same*
|
||||
Lock object as _get_session_agent_lock(old_sid).
|
||||
|
||||
This is a static guard: it directly simulates the migration that
|
||||
streaming.py performs inside the compression rotation block.
|
||||
"""
|
||||
from api.config import (
|
||||
_get_session_agent_lock,
|
||||
SESSION_AGENT_LOCKS,
|
||||
SESSION_AGENT_LOCKS_LOCK,
|
||||
)
|
||||
old_sid = "pre-rotation-id"
|
||||
new_sid = "post-rotation-id"
|
||||
|
||||
# Acquire the lock under the old ID
|
||||
old_lock = _get_session_agent_lock(old_sid)
|
||||
|
||||
# Simulate the migration that streaming.py does during compression:
|
||||
# alias new_sid → held _agent_lock reference, then pop old_sid.
|
||||
_agent_lock = old_lock
|
||||
with SESSION_AGENT_LOCKS_LOCK:
|
||||
SESSION_AGENT_LOCKS[new_sid] = _agent_lock
|
||||
SESSION_AGENT_LOCKS.pop(old_sid, None)
|
||||
|
||||
# Now looking up the new ID must return the exact same Lock object
|
||||
new_lock = _get_session_agent_lock(new_sid)
|
||||
assert new_lock is old_lock, (
|
||||
f"After rotation, _get_session_agent_lock({new_sid!r}) must "
|
||||
f"return the same Lock object as _get_session_agent_lock({old_sid!r}); "
|
||||
f"got {new_lock!r} vs {old_lock!r}"
|
||||
)
|
||||
|
||||
# The old ID entry must no longer exist (it was popped)
|
||||
with SESSION_AGENT_LOCKS_LOCK:
|
||||
assert old_sid not in SESSION_AGENT_LOCKS, (
|
||||
f"Old session ID {old_sid!r} must be removed from "
|
||||
f"SESSION_AGENT_LOCKS after rotation"
|
||||
)
|
||||
|
||||
# Cleanup
|
||||
with SESSION_AGENT_LOCKS_LOCK:
|
||||
SESSION_AGENT_LOCKS.pop(new_sid, None)
|
||||
|
||||
def test_lock_rotation_migration_survives_old_id_already_pruned(self):
|
||||
"""Compression lock migration must not require old_sid to exist in dict.
|
||||
|
||||
A concurrent /api/session/delete can prune old_sid before rotation code
|
||||
runs. The migration must still succeed by assigning the held _agent_lock
|
||||
reference directly.
|
||||
"""
|
||||
from api.config import (
|
||||
_get_session_agent_lock,
|
||||
SESSION_AGENT_LOCKS,
|
||||
SESSION_AGENT_LOCKS_LOCK,
|
||||
)
|
||||
old_sid = "pre-rotation-pruned"
|
||||
new_sid = "post-rotation-pruned"
|
||||
|
||||
_agent_lock = _get_session_agent_lock(old_sid)
|
||||
with SESSION_AGENT_LOCKS_LOCK:
|
||||
SESSION_AGENT_LOCKS.pop(old_sid, None) # simulate concurrent prune
|
||||
|
||||
# Must not raise KeyError even though old_sid is absent.
|
||||
with SESSION_AGENT_LOCKS_LOCK:
|
||||
SESSION_AGENT_LOCKS[new_sid] = _agent_lock
|
||||
SESSION_AGENT_LOCKS.pop(old_sid, None)
|
||||
|
||||
new_lock = _get_session_agent_lock(new_sid)
|
||||
assert new_lock is _agent_lock
|
||||
|
||||
with SESSION_AGENT_LOCKS_LOCK:
|
||||
SESSION_AGENT_LOCKS.pop(new_sid, None)
|
||||
|
||||
@@ -382,6 +382,56 @@ def test_deadlock_guard_on_fallback():
|
||||
assert isinstance(index, list)
|
||||
|
||||
|
||||
def test_incremental_index_disk_io_runs_outside_lock(monkeypatch):
|
||||
"""Fast-path disk I/O (fsync/replace) must run after releasing LOCK."""
|
||||
index_file = models.SESSION_INDEX_FILE
|
||||
|
||||
sA = _make_session("sess_a", "Alpha", updated_at=100.0)
|
||||
sA.path.write_text(json.dumps(sA.__dict__, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
_write_session_index(updates=None) # seed index
|
||||
|
||||
sA.title = "Alpha V2"
|
||||
sA.updated_at = 200.0
|
||||
|
||||
fsync_lock_states = []
|
||||
original_fsync = models.os.fsync
|
||||
|
||||
def _observing_fsync(fd):
|
||||
fsync_lock_states.append(models.LOCK.locked())
|
||||
return original_fsync(fd)
|
||||
|
||||
monkeypatch.setattr(models.os, "fsync", _observing_fsync)
|
||||
|
||||
_write_session_index(updates=[sA])
|
||||
|
||||
assert fsync_lock_states, "Expected at least one fsync call during index write"
|
||||
assert not any(fsync_lock_states), (
|
||||
"_write_session_index fast path must not hold LOCK during fsync/disk I/O"
|
||||
)
|
||||
|
||||
|
||||
def test_full_rebuild_index_disk_io_runs_outside_lock(monkeypatch):
|
||||
"""Full-rebuild disk I/O (fsync/replace) must run after releasing LOCK."""
|
||||
sA = _make_session("sess_a", "Alpha", updated_at=100.0)
|
||||
sA.path.write_text(json.dumps(sA.__dict__, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
fsync_lock_states = []
|
||||
original_fsync = models.os.fsync
|
||||
|
||||
def _observing_fsync(fd):
|
||||
fsync_lock_states.append(models.LOCK.locked())
|
||||
return original_fsync(fd)
|
||||
|
||||
monkeypatch.setattr(models.os, "fsync", _observing_fsync)
|
||||
|
||||
_write_session_index(updates=None)
|
||||
|
||||
assert fsync_lock_states, "Expected at least one fsync call during index write"
|
||||
assert not any(fsync_lock_states), (
|
||||
"_write_session_index full rebuild must not hold LOCK during fsync/disk I/O"
|
||||
)
|
||||
|
||||
|
||||
def test_all_sessions_ignores_stale_index_entries():
|
||||
"""Reading via all_sessions() must not surface ghost rows from _index.json."""
|
||||
index_file = models.SESSION_INDEX_FILE
|
||||
|
||||
@@ -164,7 +164,7 @@ class TestIssue495TitleStreaming(unittest.TestCase):
|
||||
# After the stream_end fix, title uses original session_id param (not s.session_id
|
||||
# which can be rotated during context compression — see #652 fix)
|
||||
self.assertIn(
|
||||
"put_event('title', {'session_id': session_id, 'title': s.title})",
|
||||
"put_event('title', {'session_id': session_id, 'title': effective_title})",
|
||||
STREAMING_PY,
|
||||
"streaming.py should emit a title SSE event when title is updated",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user