fix(streaming): quota error detection, error persistence, stream_end session_id fix (#767)

- quota_exhausted error type: distinguishes credit exhaustion from rate limits
- Streaming errors persisted to session file so they survive page reload
- _error flag excludes persisted errors from subsequent LLM API calls
- stream_end and title SSE events use original session_id (not s.session_id which rotates during context compaction)

Fixes #739, #652, #653
This commit is contained in:
nesquena-hermes
2026-04-20 15:48:19 -07:00
committed by GitHub
parent 76e602af25
commit 765d8520d4
7 changed files with 278 additions and 70 deletions

View File

@@ -0,0 +1,124 @@
"""
Tests for streaming error handling fixes:
#739 — quota/credit exhaustion detected as distinct error type + persisted to session
#652 — context compaction session_id rotation: stream_end uses original session_id
#653 — bad tool call hang: same stream_end fix applies
All static tests (no live server required).
"""
import ast
import re
import pathlib
STREAMING = pathlib.Path(__file__).parent.parent / 'api' / 'streaming.py'
MESSAGES_JS = pathlib.Path(__file__).parent.parent / 'static' / 'messages.js'
streaming_src = STREAMING.read_text(encoding='utf-8')
messages_js_src = MESSAGES_JS.read_text(encoding='utf-8')
# ── #739: Quota exhaustion detection ─────────────────────────────────────────
class TestQuotaDetection:
"""Quota-exhausted errors must be classified separately from rate limits."""
def test_quota_patterns_present_in_silent_failure_path(self):
"""The silent-failure path checks for credit/quota strings."""
block = streaming_src
assert 'insufficient credit' in block
assert 'credit balance' in block
assert 'credits exhausted' in block
assert 'quota_exceeded' in block
assert 'exceeded your current quota' in block
def test_quota_type_emitted_as_quota_exhausted(self):
"""The apperror type is 'quota_exhausted', not 'error' or 'rate_limit'."""
assert "'quota_exhausted'" in streaming_src or '"quota_exhausted"' in streaming_src
def test_quota_checked_before_rate_limit(self):
"""Quota check must appear before the rate-limit check in the exception path.
OpenAI billing 429s overlap with rate-limit patterns."""
quota_pos = streaming_src.find('_exc_is_quota')
rate_pos = streaming_src.find('_exc_is_rate_limit')
assert quota_pos != -1, '_exc_is_quota not found in exception path'
assert rate_pos != -1, '_exc_is_rate_limit not found in exception path'
assert quota_pos < rate_pos, 'Quota check must appear before rate-limit check'
def test_rate_limit_excludes_quota(self):
"""Rate-limit detection must be guarded so quota errors don't also match."""
# The pattern: _exc_is_rate_limit = (not _exc_is_quota) and (...)
assert '(not _exc_is_quota)' in streaming_src
def test_js_quota_label_present(self):
"""messages.js renders a 'quota_exhausted' apperror with a distinct label."""
assert "quota_exhausted" in messages_js_src
assert "Out of credits" in messages_js_src
# ── #739: Error persistence across reload ─────────────────────────────────────
class TestErrorPersistence:
"""Errors must be saved to the session so they survive page reload."""
def test_silent_failure_appends_error_message(self):
"""Silent-failure path appends an _error-marked message before returning."""
# Must append to s.messages with _error key
assert "s.messages.append(" in streaming_src
assert "'_error': True" in streaming_src
def test_silent_failure_calls_save_before_return(self):
"""save() must be called after appending the error message."""
# Find the silent failure block area and verify save precedes return
pattern = re.compile(
r"s\.messages\.append\(.*?'_error': True.*?\).*?s\.save\(\).*?return",
re.DOTALL
)
assert pattern.search(streaming_src), \
"save() must be called after appending the error message in the silent-failure path"
def test_exception_path_appends_error_message(self):
"""Exception path also persists the error to the session."""
# Both paths should have _error persistence
count = streaming_src.count("'_error': True")
assert count >= 2, f"Expected at least 2 _error persistence sites, found {count}"
def test_sanitize_skips_error_messages(self):
"""_sanitize_messages_for_api must not send _error messages to the LLM."""
assert "msg.get('_error')" in streaming_src or 'msg.get("_error")' in streaming_src
# The skip must come before the role/tool filtering logic
error_skip_pos = streaming_src.find("msg.get('_error')")
tool_filter_pos = streaming_src.find("if role == 'tool':")
assert error_skip_pos < tool_filter_pos, \
"_error skip must appear before the tool-role filter in _sanitize_messages_for_api"
# ── #652/#653: Context compaction stream_end fix ──────────────────────────────
class TestStreamEndSessionId:
"""stream_end must use the original session_id param, not s.session_id."""
def test_non_bg_title_stream_end_uses_session_id_param(self):
"""When no background title is spawned, stream_end should use original session_id."""
# The fixed code: put('stream_end', {'session_id': session_id})
# Not: put('stream_end', {'session_id': s.session_id})
# Verify the pattern appears in the non-background-title branch
assert "put('stream_end', {'session_id': session_id})" in streaming_src
def test_background_title_thread_stream_end_uses_session_id_param(self):
"""Background title thread also emits stream_end with original session_id."""
# In _run_background_title_update: put_event('stream_end', {'session_id': session_id})
# The session_id param is passed from the caller with the original value
assert "put_event('stream_end', {'session_id': session_id})" in streaming_src
def test_s_session_id_not_used_in_stream_end(self):
"""s.session_id (which may be rotated after compaction) must not appear in stream_end."""
# Find all stream_end emissions and verify none use s.session_id
for match in re.finditer(r"put[_a-z]*\('stream_end',[^)]+\)", streaming_src):
assert 's.session_id' not in match.group(), \
f"stream_end uses s.session_id (may be rotated): {match.group()}"
def test_title_event_uses_original_session_id(self):
"""title event in background title thread uses original session_id, not s.session_id."""
# Client guard: if((d.session_id||activeSid)!==activeSid) return;
# So title must be emitted with the original id
assert "put_event('title', {'session_id': session_id," in streaming_src

View File

@@ -37,7 +37,8 @@ class TestSilentErrorDetection:
"""streaming.py must return after emitting apperror (not also emit done)."""
# The return statement must come after the put('apperror') for no_response
no_resp_pos = STREAMING_PY.find("'no_response'")
return_pos = STREAMING_PY.find("return # Don't emit done", no_resp_pos)
# Comment updated: "apperror already closes the stream on the client side"
return_pos = STREAMING_PY.find("return # apperror already closes the stream", no_resp_pos)
assert no_resp_pos != -1, "no_response type not found in streaming.py"
assert return_pos != -1, (
"streaming.py must return after emitting apperror to prevent also emitting done (#373)"

View File

@@ -37,30 +37,33 @@ class TestStreamingAuthErrorDetection:
)
def test_is_auth_error_flag_defined(self):
"""is_auth_error variable must exist in the error handler."""
"""auth error variable must exist in the error handler (exception path and silent-failure path)."""
src = _read("api/streaming.py")
assert "is_auth_error" in src, (
"is_auth_error flag not found in streaming.py"
# Variable renamed to _exc_is_auth in exception path, _is_auth in silent-failure path
assert "_exc_is_auth" in src or "_is_auth" in src, (
"auth error flag not found in streaming.py"
)
def test_auth_error_detects_401(self):
"""'401' must be part of the auth error detection logic."""
src = _read("api/streaming.py")
# Find the is_auth_error block
idx = src.find("is_auth_error")
# Variable renamed to _exc_is_auth in exception path, _is_auth in silent-failure path
idx = src.find("_exc_is_auth")
assert idx != -1
block = src[idx:idx + 400]
block = src[idx:idx + 500]
assert "'401'" in block or '"401"' in block, (
"'401' not in is_auth_error detection block"
"'401' not in auth error detection block"
)
def test_auth_error_detects_unauthorized(self):
"""'unauthorized' must be part of the auth error detection logic."""
src = _read("api/streaming.py")
idx = src.find("is_auth_error")
block = src[idx:idx + 400]
# Variable renamed to _exc_is_auth in exception path
idx = src.find("_exc_is_auth")
block = src[idx:idx + 500]
assert "unauthorized" in block.lower(), (
"'unauthorized' not in is_auth_error detection block"
"'unauthorized' not in auth error detection block"
)
def test_auth_error_hint_mentions_hermes_model(self):
@@ -77,11 +80,14 @@ class TestStreamingAuthErrorDetection:
def test_auth_error_does_not_catch_rate_limit(self):
"""Rate limit errors must not be reclassified as auth_mismatch."""
src = _read("api/streaming.py")
# is_rate_limit must come before is_auth_error in the elif chain
rl_idx = src.find("is_rate_limit")
ae_idx = src.find("is_auth_error")
# Variables renamed: _exc_is_rate_limit / _exc_is_auth in exception path
# Quota check comes first (before rate limit), then rate limit, then auth
rl_idx = src.find("_exc_is_rate_limit")
ae_idx = src.find("_exc_is_auth")
assert rl_idx != -1, "_exc_is_rate_limit not found in streaming.py exception path"
assert ae_idx != -1, "_exc_is_auth not found in streaming.py exception path"
assert rl_idx < ae_idx, (
"is_rate_limit check should precede is_auth_error"
"_exc_is_rate_limit check should precede _exc_is_auth — "
"rate limit errors must not be mistaken for auth errors"
)

View File

@@ -156,8 +156,10 @@ class TestIssue495TitleStreaming(unittest.TestCase):
)
def test_streaming_emits_title_sse_event(self):
# After the stream_end fix, title uses original session_id param (not s.session_id
# which can be rotated during context compression — see #652 fix)
self.assertIn(
"put_event('title', {'session_id': s.session_id, 'title': s.title})",
"put_event('title', {'session_id': session_id, 'title': s.title})",
STREAMING_PY,
"streaming.py should emit a title SSE event when title is updated",
)