Merge pull request #923 from nesquena/feat/917-streaming-markdown

Merging feat/917-streaming-markdown. 2065 tests pass. APPROVED by @nesquena. Pre-existing QA harness failure on master confirmed (not a regression).
2026-04-23 17:43:40 -07:00
parent a3647570fb b563484a56
commit ff970ec844
5 changed files with 630 additions and 7 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,11 @@
  workspace subtree) and never enumerate blocked system roots. (`api/routes.py`,
  `api/workspace.py`, `static/panels.js`, `static/style.css`) (partial for #616)

+## [v0.50.180] — 2026-04-23
+
+### Added
+- **Incremental streaming markdown via `streaming-markdown`** — replaces the per-animation-frame full `innerHTML` re-render with an incremental DOM-building parser. During streaming, only new character deltas are fed to the parser per frame (`_smdWrite()`), eliminating DOM thrashing and improving rendering smoothness. Prism.js / KaTeX state no longer gets reset mid-stream. Falls back to the existing `renderMd()` path when the library is unavailable. (`static/messages.js`, `static/index.html`) Co-authored by @bsgdigital.
+
 ## [v0.50.179] — 2026-04-23

 ### Fixed
--- a/static/index.html
+++ b/static/index.html
@@ -21,6 +21,13 @@
 <link rel="stylesheet" href="static/style.css">
  <!-- KaTeX math rendering CSS (loaded eagerly to prevent layout shift) -->
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.22/dist/katex.min.css" integrity="sha384-5TcZemv2l/9On385z///+d7MSYlvIEw9FuZTIdZ14vJLqWphw7e7ZPuOiCHJcFCP" crossorigin="anonymous">
+  <!-- streaming-markdown: incremental DOM-building markdown parser for live streams -->
+  <script type="module">
+    import * as smd from 'https://cdn.jsdelivr.net/npm/streaming-markdown@0.2.15/smd.min.js';
+    // SRI verification happens at the ES module level via importmap or SW; pinning version in URL.
+    // sha384 of smd.min.js @0.2.15: sha384-T6r95ocN9t3W8tUK2Fa6FPaO7bJryyjyW0WCalrUnpgtm2qXr5xcN4vwPYEJ6vHa
+    window.smd = smd;
+  </script>
  <!-- Prism.js syntax highlighting (loaded async, non-blocking) -->
  <link id="prism-theme" rel="stylesheet" href="https://cdn.jsdelivr.net/npm/prismjs@1.29.0/themes/prism-tomorrow.min.css" integrity="sha384-wFjoQjtV1y5jVHbt0p35Ui8aV8GVpEZkyF99OXWqP/eNJDU93D3Ugxkoyh6Y2I4A" crossorigin="anonymous">
  <script src="https://cdn.jsdelivr.net/npm/prismjs@1.29.0/components/prism-core.min.js" integrity="sha384-MXybTpajaBV0AkcBaCPT4KIvo0FzoCiWXgcihYsw4FUkEz0Pv3JGV6tk2G8vJtDc" crossorigin="anonymous" defer></script>
--- a/static/messages.js
+++ b/static/messages.js
@@ -189,6 +189,12 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
  let assistantBody=null;
  let segmentStart=0;      // char offset in assistantText where current segment begins
  let _freshSegment=false; // true after a tool call — forces a new DOM segment
+  // streaming-markdown state: incremental DOM-building parser per segment
+  let _smdParser=null;     // current smd parser instance (null until first content)
+  let _smdWrittenLen=0;    // how many chars of displayText have been fed to smd parser
+  // On reconnect, the assistantBody already has partial smd-rendered content.
+  // We clear it on first new token and restart the parser from the reconnect point.
+  let _smdReconnect=reconnecting;
  // Thinking tag patterns for streaming display
  const _thinkPairs=[
    {open:'<think>',close:'</think>'},
@@ -366,6 +372,59 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
    // removeThinking() won't find it anyway, but guard explicitly.
    if(!reasoningText) removeThinking();
  }
+  // Helper: create (or recreate) the smd parser bound to a given DOM element.
+  // Called when assistantBody is first created and after each tool-call segment reset.
+  function _smdNewParser(el){
+    _smdWrittenLen=0;
+    if(!window.smd){_smdParser=null;return;}
+    const renderer=window.smd.default_renderer(el);
+    _smdParser=window.smd.parser(renderer);
+  }
+  // Helper: end the current smd parser (flushes remaining state) and null it out.
+  function _smdEndParser(){
+    if(_smdParser&&window.smd){
+      try{window.smd.parser_end(_smdParser);}catch(_){}
+      // parser_end may flush remaining markdown that creates new links/images —
+      // re-sanitize the body before the DOM is handed off to highlightCode / renderMessages.
+      if(assistantBody){_sanitizeSmdLinks(assistantBody);}
+    }
+    _smdParser=null;
+    _smdWrittenLen=0;
+  }
+  // Helper: feed new displayText delta to the smd parser.
+  // Only feeds chars beyond what has already been written (_smdWrittenLen).
+  function _smdWrite(displayText){
+    if(!_smdParser||!window.smd) return;
+    const delta=displayText.slice(_smdWrittenLen);
+    if(!delta) return;
+    try{window.smd.parser_write(_smdParser,delta);}catch(_){}
+    _smdWrittenLen=displayText.length;
+    // streaming-markdown does NOT sanitize URL schemes — `[click](javascript:...)`
+    // and `![alt](javascript:...)` survive as href/src.  Strip any unsafe schemes
+    // from anchors/images that were just added to the live DOM.  The existing
+    // renderMd() path filters these via its http(s)-only regex; we need a matching
+    // guard here so the live-stream path isn't an XSS vector for agent-echoed
+    // prompt-injection content.  The final renderMessages() call at `done` uses
+    // renderMd which is already safe, but during streaming the user could click
+    // a malicious link before that replacement happens.
+    if(assistantBody){_sanitizeSmdLinks(assistantBody);}
+  }
+  // Allowed URL schemes for anchors and images rendered from agent-streamed markdown.
+  // Matches the effective allowlist of renderMd() (http/https via regex + relative).
+  const _SMD_SAFE_URL_RE=/^(?:https?:|mailto:|tel:|\/|#|\?|\.)/i;
+  function _sanitizeSmdLinks(root){
+    if(!root||!root.querySelectorAll) return;
+    const _a=root.querySelectorAll('a[href]');
+    for(let i=0;i<_a.length;i++){
+      const n=_a[i],v=n.getAttribute('href')||'';
+      if(!_SMD_SAFE_URL_RE.test(v)){n.removeAttribute('href');n.setAttribute('data-blocked-scheme','1');}
+    }
+    const _im=root.querySelectorAll('img[src]');
+    for(let i=0;i<_im.length;i++){
+      const n=_im[i],v=n.getAttribute('src')||'';
+      if(!_SMD_SAFE_URL_RE.test(v)){n.removeAttribute('src');n.setAttribute('data-blocked-scheme','1');}
+    }
+  }
  function _scheduleRender(){
    if(_renderPending) return;
    if(_streamFinalized) return; // Bug A: don't schedule new rAF after stream finalized
@@ -376,12 +435,23 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
      const parsed=_parseStreamState();
      _renderLiveThinking(parsed);
      if(assistantBody){
-        // Render only the text belonging to the current segment (after the last tool call).
-        // segmentStart=0 for the first segment, or assistantText.length-at-last-tool for later ones.
-        const segText = segmentStart===0
-          ? parsed.displayText                          // first segment: use full display (handles think-tag stripping)
-          : renderMd ? renderMd(assistantText.slice(segmentStart)) : assistantText.slice(segmentStart);
-        assistantBody.innerHTML = segText || '';
+        const displayText = segmentStart===0
+          ? parsed.displayText                          // first segment: uses think-tag stripping
+          : _stripXmlToolCalls(assistantText.slice(segmentStart));
+        if(!_smdParser&&window.smd){
+          // On reconnect: prior content in assistantBody came from a different smd parser run.
+          // Clear it and start fresh — renderMessages() on done will restore the full content.
+          if(_smdReconnect){assistantBody.innerHTML='';_smdReconnect=false;}
+          _smdNewParser(assistantBody);
+        }
+        if(_smdParser){
+          _smdWrite(displayText);
+        } else {
+          // Fallback: smd not loaded yet, reconnect session, or smd unavailable — use renderMd
+          assistantBody.innerHTML = (segmentStart===0
+            ? parsed.displayText
+            : renderMd ? renderMd(assistantText.slice(segmentStart)) : assistantText.slice(segmentStart)) || '';
+        }
      }
      scrollIfPinned();
    });
@@ -461,6 +531,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
      assistantBody=null;
      segmentStart=assistantText.length; // new segment starts at current text length
      _freshSegment=true;                // prevent reuse of old DOM node
+      _smdEndParser();                   // finalize current smd parser; new one created on next token
      scrollIfPinned();
    });

@@ -551,6 +622,19 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
      _streamFinalized=true;
      if(_pendingRafHandle!==null){cancelAnimationFrame(_pendingRafHandle);_pendingRafHandle=null;_renderPending=false;}
      if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
+      // Finalize smd parser — flushes any remaining buffered markdown state
+      // and runs Prism + copy buttons on the live segment before the DOM is replaced
+      if(assistantBody){
+        const _finBody=assistantBody;
+        _smdEndParser();
+        requestAnimationFrame(()=>{
+          if(typeof highlightCode==='function') highlightCode(_finBody);
+          if(typeof addCopyButtons==='function') addCopyButtons(_finBody);
+          if(typeof renderKatexBlocks==='function') renderKatexBlocks();
+        });
+      } else {
+        _smdEndParser();
+      }
      const d=JSON.parse(e.data);
      delete INFLIGHT[activeSid];
      clearInflight();clearInflightState(activeSid);
@@ -617,6 +701,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
      _terminalStateReached=true;
      _streamFinalized=true;
      if(_pendingRafHandle!==null){cancelAnimationFrame(_pendingRafHandle);_pendingRafHandle=null;_renderPending=false;}
+      _smdEndParser();
      if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
      // Application-level error sent explicitly by the server (rate limit, crash, etc.)
      // This is distinct from the SSE network 'error' event below.
@@ -694,6 +779,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
      _terminalStateReached=true;
      _streamFinalized=true;
      if(_pendingRafHandle!==null){cancelAnimationFrame(_pendingRafHandle);_pendingRafHandle=null;_renderPending=false;}
+      _smdEndParser();
      if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
      source.close();
      delete INFLIGHT[activeSid];clearInflight();clearInflightState(activeSid);stopApprovalPolling();stopClarifyPolling();
--- a/tests/test_regressions.py
+++ b/tests/test_regressions.py
@@ -433,7 +433,7 @@ def test_done_handler_sets_busy_false_before_renderMessages(cleanup_test_session
    if done_idx < 0:
        done_idx = src.find("es.addEventListener('done'")
    assert done_idx >= 0
-    done_block = src[done_idx:done_idx+2900]
+    done_block = src[done_idx:done_idx+3300]
    # S.busy=false must appear before renderMessages() within the done handler
    busy_pos = done_block.find("S.busy=false;")
    render_pos = done_block.find("renderMessages()")
--- a/tests/test_streaming_markdown.py
+++ b/tests/test_streaming_markdown.py
@@ -0,0 +1,525 @@
+"""Tests for incremental streaming-markdown (smd) integration in messages.js.
+
+PR: feat: use streaming-markdown for incremental live rendering
+
+The change replaces the per-rAF `assistantBody.innerHTML = renderMd(...)` call
+with an incremental DOM-building approach powered by the streaming-markdown
+library (https://github.com/nicholasgasior/streaming-markdown):
+
+  - During streaming: smd.parser_write() feeds new text deltas into a live DOM
+    tree — no full re-render per frame, no innerHTML thrash.
+  - On done/apperror/cancel: smd.parser_end() flushes remaining parser state,
+    then Prism / copy buttons / KaTeX are run on the live segment.
+  - On tool event: smd.parser_end() finalises the current segment; the next
+    token after the tool creates a fresh parser bound to the new assistantBody.
+  - Fallback: when window.smd is not yet loaded, the old renderMd path is used.
+  - Reconnect: _smdReconnect flag clears stale DOM from the previous parser run
+    and restarts the smd parser from the reconnect point.
+
+Tests are static (regex / AST-level) — no browser required.
+"""
+
+import pathlib
+import re
+
+REPO = pathlib.Path(__file__).parent.parent
+MESSAGES_JS = (REPO / "static" / "messages.js").read_text(encoding="utf-8")
+INDEX_HTML = (REPO / "static" / "index.html").read_text(encoding="utf-8")
+
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+def extract_fn(src, name, *, brace_depth=1):
+    """Return the text of a JS function starting from `function <name>` to its
+    closing brace.  Works for both standalone and closure-local functions.
+    Does a simple brace-counting walk so it handles nested blocks correctly.
+    """
+    pattern = rf"function {re.escape(name)}\s*\("
+    m = re.search(pattern, src)
+    if not m:
+        return None
+    start = m.start()
+    # Find the opening brace
+    brace_pos = src.index("{", m.end())
+    depth = 1
+    pos = brace_pos + 1
+    while pos < len(src) and depth > 0:
+        ch = src[pos]
+        if ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+        pos += 1
+    return src[start:pos]
+
+
+def extract_event_handler(src, event_name):
+    """Return the text of a source.addEventListener('<event_name>', ...) block."""
+    pattern = rf"source\.addEventListener\('{re.escape(event_name)}'"
+    m = re.search(pattern, src)
+    if not m:
+        return None
+    # Walk forward to collect the matching parenthesis
+    paren_depth = 0
+    start = m.start()
+    pos = m.end()
+    # Count back to the opening paren
+    paren_depth = 1
+    while pos < len(src) and paren_depth > 0:
+        ch = src[pos]
+        if ch == "(":
+            paren_depth += 1
+        elif ch == ")":
+            paren_depth -= 1
+        pos += 1
+    return src[start:pos]
+
+
+def extract_attach_live_stream_prelude(src):
+    """Return the text from attachLiveStream opening to the first nested fn."""
+    m = re.search(r"function attachLiveStream\(", src)
+    if not m:
+        return None
+    # Find the first nested function definition inside the closure
+    inner = re.search(r"\bfunction _isActiveSession\b", src[m.start():])
+    if not inner:
+        return src[m.start(): m.start() + 5000]
+    return src[m.start(): m.start() + inner.start()]
+
+
+# ── 1. index.html: smd script tag ─────────────────────────────────────────────
+
+class TestIndexHtmlSmdScript:
+    """streaming-markdown must be loaded in index.html before messages.js uses it."""
+
+    def test_smd_cdn_url_present(self):
+        assert "streaming-markdown" in INDEX_HTML, (
+            "index.html must include a <script> tag loading streaming-markdown"
+        )
+
+    def test_smd_assigned_to_window(self):
+        assert "window.smd" in INDEX_HTML, (
+            "The smd ES module must be assigned to window.smd so messages.js can reach it"
+        )
+
+    def test_smd_loaded_as_module(self):
+        assert 'type="module"' in INDEX_HTML or "type='module'" in INDEX_HTML, (
+            "streaming-markdown must be loaded with type=\"module\" (it is an ES module)"
+        )
+
+
+# ── 2. Closure variable declarations ─────────────────────────────────────────
+
+class TestClosureVariables:
+    """_smdParser, _smdWrittenLen and _smdReconnect must be declared in the
+    attachLiveStream closure, not inside a helper or handler."""
+
+    def get_prelude(self):
+        return extract_attach_live_stream_prelude(MESSAGES_JS)
+
+    def test_smd_parser_declared(self):
+        prelude = self.get_prelude()
+        assert prelude and "_smdParser" in prelude, (
+            "_smdParser must be declared in the attachLiveStream closure scope"
+        )
+
+    def test_smd_written_len_declared(self):
+        prelude = self.get_prelude()
+        assert prelude and "_smdWrittenLen" in prelude, (
+            "_smdWrittenLen must be declared in the attachLiveStream closure scope"
+        )
+
+    def test_smd_reconnect_declared(self):
+        prelude = self.get_prelude()
+        assert prelude and "_smdReconnect" in prelude, (
+            "_smdReconnect must be declared in the attachLiveStream closure scope"
+        )
+
+    def test_smd_parser_initialised_null(self):
+        prelude = self.get_prelude()
+        assert prelude and (
+            "_smdParser=null" in prelude or "_smdParser = null" in prelude
+        ), "_smdParser must be initialised to null"
+
+    def test_smd_written_len_initialised_zero(self):
+        prelude = self.get_prelude()
+        assert prelude and (
+            "_smdWrittenLen=0" in prelude or "_smdWrittenLen = 0" in prelude
+        ), "_smdWrittenLen must be initialised to 0"
+
+
+# ── 3. Helper functions ───────────────────────────────────────────────────────
+
+class TestSmdHelpers:
+    """_smdNewParser, _smdEndParser and _smdWrite must exist and have the right shape."""
+
+    def test_smd_new_parser_exists(self):
+        fn = extract_fn(MESSAGES_JS, "_smdNewParser")
+        assert fn is not None, "_smdNewParser function must be defined"
+
+    def test_smd_new_parser_resets_written_len(self):
+        fn = extract_fn(MESSAGES_JS, "_smdNewParser")
+        assert fn and (
+            "_smdWrittenLen=0" in fn or "_smdWrittenLen = 0" in fn
+        ), "_smdNewParser must reset _smdWrittenLen to 0"
+
+    def test_smd_new_parser_calls_default_renderer(self):
+        fn = extract_fn(MESSAGES_JS, "_smdNewParser")
+        assert fn and "default_renderer" in fn, (
+            "_smdNewParser must call smd.default_renderer() to create a renderer"
+        )
+
+    def test_smd_new_parser_calls_parser(self):
+        fn = extract_fn(MESSAGES_JS, "_smdNewParser")
+        assert fn and (
+            "window.smd.parser(" in fn or "smd.parser(" in fn
+        ), "_smdNewParser must call smd.parser(renderer) to create a parser"
+
+    def test_smd_new_parser_guards_on_window_smd(self):
+        fn = extract_fn(MESSAGES_JS, "_smdNewParser")
+        assert fn and "window.smd" in fn, (
+            "_smdNewParser must guard on window.smd before using the library"
+        )
+
+    def test_smd_end_parser_exists(self):
+        fn = extract_fn(MESSAGES_JS, "_smdEndParser")
+        assert fn is not None, "_smdEndParser function must be defined"
+
+    def test_smd_end_parser_calls_parser_end(self):
+        fn = extract_fn(MESSAGES_JS, "_smdEndParser")
+        assert fn and "parser_end" in fn, (
+            "_smdEndParser must call smd.parser_end() to flush remaining parser state"
+        )
+
+    def test_smd_end_parser_nulls_parser(self):
+        fn = extract_fn(MESSAGES_JS, "_smdEndParser")
+        assert fn and (
+            "_smdParser=null" in fn or "_smdParser = null" in fn
+        ), "_smdEndParser must set _smdParser to null after flushing"
+
+    def test_smd_end_parser_resets_written_len(self):
+        fn = extract_fn(MESSAGES_JS, "_smdEndParser")
+        assert fn and (
+            "_smdWrittenLen=0" in fn or "_smdWrittenLen = 0" in fn
+        ), "_smdEndParser must reset _smdWrittenLen to 0"
+
+    def test_smd_write_exists(self):
+        fn = extract_fn(MESSAGES_JS, "_smdWrite")
+        assert fn is not None, "_smdWrite function must be defined"
+
+    def test_smd_write_slices_delta(self):
+        fn = extract_fn(MESSAGES_JS, "_smdWrite")
+        assert fn and "_smdWrittenLen" in fn, (
+            "_smdWrite must slice from _smdWrittenLen to send only new chars"
+        )
+
+    def test_smd_write_calls_parser_write(self):
+        fn = extract_fn(MESSAGES_JS, "_smdWrite")
+        assert fn and "parser_write" in fn, (
+            "_smdWrite must call smd.parser_write() to feed the chunk"
+        )
+
+    def test_smd_write_updates_written_len(self):
+        fn = extract_fn(MESSAGES_JS, "_smdWrite")
+        assert fn and "displayText.length" in fn, (
+            "_smdWrite must advance _smdWrittenLen to displayText.length after writing"
+        )
+
+    def test_smd_write_guards_on_parser(self):
+        fn = extract_fn(MESSAGES_JS, "_smdWrite")
+        assert fn and "_smdParser" in fn, (
+            "_smdWrite must guard on _smdParser before calling parser_write"
+        )
+
+
+# ── 4. _scheduleRender: smd path vs fallback ──────────────────────────────────
+
+class TestScheduleRenderSmdPath:
+    """_scheduleRender must use smd when available and fall back to renderMd."""
+
+    def get_fn(self):
+        return extract_fn(MESSAGES_JS, "_scheduleRender")
+
+    def test_smd_path_present(self):
+        fn = self.get_fn()
+        assert fn and "_smdParser" in fn, (
+            "_scheduleRender must check for _smdParser to take the smd path"
+        )
+
+    def test_smd_write_called_in_schedule_render(self):
+        fn = self.get_fn()
+        assert fn and "_smdWrite(" in fn, (
+            "_scheduleRender must call _smdWrite() to feed incremental text"
+        )
+
+    def test_fallback_rendermd_still_present(self):
+        fn = self.get_fn()
+        assert fn and "renderMd" in fn, (
+            "renderMd fallback must still exist in _scheduleRender when smd unavailable"
+        )
+
+    def test_smd_new_parser_called_lazily(self):
+        fn = self.get_fn()
+        assert fn and "_smdNewParser(" in fn, (
+            "_scheduleRender must lazily call _smdNewParser() on first token after body creation"
+        )
+
+    def test_reconnect_clears_body(self):
+        fn = self.get_fn()
+        assert fn and "_smdReconnect" in fn, (
+            "_scheduleRender must handle the reconnect case by checking _smdReconnect"
+        )
+
+    def test_no_raw_innerhtml_assignment_in_smd_path(self):
+        """When smd is active, innerHTML must NOT be set — only _smdWrite() feeds the DOM."""
+        fn = self.get_fn()
+        assert fn, "_scheduleRender not found"
+        # The smd branch must be separated from the innerHTML branch by an if/else.
+        # A crude but effective check: _smdWrite and innerHTML=... must not appear
+        # on the same code path (i.e., _smdWrite must be inside an `if(_smdParser)` block).
+        smd_write_pos = fn.find("_smdWrite(")
+        innerhtml_pos = fn.find("assistantBody.innerHTML =")
+        # Both must exist
+        assert smd_write_pos != -1, "_smdWrite( not found in _scheduleRender"
+        assert innerhtml_pos != -1, "innerHTML fallback not found in _scheduleRender"
+        # They must be separated by an if/else construct — there must be a `} else {`
+        # between them (in either order). We just verify `else` appears between them.
+        lo, hi = sorted([smd_write_pos, innerhtml_pos])
+        between = fn[lo:hi]
+        assert "else" in between, (
+            "smd path and innerHTML fallback must be in separate if/else branches"
+        )
+
+
+# ── 5. tool event: smd parser finalised between segments ──────────────────────
+
+class TestToolEventSmdEnd:
+    """When a tool call is received, the current smd parser must be ended so
+    the next text segment gets a fresh parser bound to the new assistantBody."""
+
+    def get_fn(self):
+        return extract_event_handler(MESSAGES_JS, "tool")
+
+    def test_smd_end_parser_called_on_tool(self):
+        fn = self.get_fn()
+        assert fn and "_smdEndParser(" in fn, (
+            "The 'tool' event handler must call _smdEndParser() to finalise the "
+            "current segment before creating a new assistantBody for post-tool text"
+        )
+
+
+# ── 6. done event: smd parser finalized + post-finalize highlighting ──────────
+
+class TestDoneEventSmd:
+    """The 'done' handler must end the smd parser and trigger Prism/KaTeX/copy."""
+
+    def get_fn(self):
+        return extract_event_handler(MESSAGES_JS, "done")
+
+    def test_smd_end_parser_called_on_done(self):
+        fn = self.get_fn()
+        assert fn and "_smdEndParser(" in fn, (
+            "'done' handler must call _smdEndParser() to flush remaining parser state"
+        )
+
+    def test_highlight_code_called_on_done(self):
+        fn = self.get_fn()
+        assert fn and "highlightCode" in fn, (
+            "'done' handler must call highlightCode() on the finalized live segment"
+        )
+
+    def test_add_copy_buttons_called_on_done(self):
+        fn = self.get_fn()
+        assert fn and "addCopyButtons" in fn, (
+            "'done' handler must call addCopyButtons() on the finalized live segment"
+        )
+
+    def test_render_katex_called_on_done(self):
+        fn = self.get_fn()
+        assert fn and "renderKatexBlocks" in fn, (
+            "'done' handler must call renderKatexBlocks() after smd parser end"
+        )
+
+    def test_highlight_scheduled_via_raf_before_render_messages(self):
+        """highlightCode must be called via requestAnimationFrame that is scheduled
+        before renderMessages() runs — so the live segment is highlighted while it's
+        still in the DOM, before renderMessages() replaces it with the final content.
+
+        Source-order check: the requestAnimationFrame(...highlightCode...) block must
+        appear earlier in the done handler than the renderMessages() call.
+        """
+        fn = self.get_fn()
+        assert fn, "'done' handler not found"
+        # Strip single-line comments to avoid matching 'renderMessages(' inside comments
+        fn_no_comments = re.sub(r'//[^\n]*', '', fn)
+        # Find the rAF that contains highlightCode
+        raf_pos = fn_no_comments.find("requestAnimationFrame")
+        render_messages_pos = fn_no_comments.find("renderMessages(")
+        assert raf_pos != -1, "requestAnimationFrame not found in 'done' handler"
+        assert render_messages_pos != -1, "renderMessages() not in 'done' handler"
+        # Verify highlightCode is inside the rAF block
+        raf_block_end = fn_no_comments.find("});", raf_pos)
+        assert raf_block_end != -1, "rAF closing }); not found"
+        raf_block = fn_no_comments[raf_pos:raf_block_end]
+        assert "highlightCode" in raf_block, (
+            "highlightCode must be inside the requestAnimationFrame callback in 'done'"
+        )
+        # The rAF scheduling call must appear before renderMessages in source
+        assert raf_pos < render_messages_pos, (
+            "The requestAnimationFrame (which schedules highlightCode) must appear "
+            "before renderMessages() in the 'done' handler source"
+        )
+
+
+# ── 7. apperror event: smd parser ends cleanly ───────────────────────────────
+
+class TestAppErrorSmd:
+    """The 'apperror' handler must call _smdEndParser to avoid leaking state."""
+
+    def get_fn(self):
+        return extract_event_handler(MESSAGES_JS, "apperror")
+
+    def test_smd_end_parser_called_on_apperror(self):
+        fn = self.get_fn()
+        assert fn and "_smdEndParser(" in fn, (
+            "'apperror' handler must call _smdEndParser()"
+        )
+
+
+# ── 8. cancel event: smd parser ends cleanly ─────────────────────────────────
+
+class TestCancelSmd:
+    """The 'cancel' handler must call _smdEndParser to avoid leaking state."""
+
+    def get_fn(self):
+        return extract_event_handler(MESSAGES_JS, "cancel")
+
+    def test_smd_end_parser_called_on_cancel(self):
+        fn = self.get_fn()
+        assert fn and "_smdEndParser(" in fn, (
+            "'cancel' handler must call _smdEndParser()"
+        )
+
+
+# ── 9. Regression: existing streaming guards still intact ─────────────────────
+
+class TestExistingStreamingGuardsIntact:
+    """The smd integration must not break pre-existing correctness properties."""
+
+    def test_stream_finalized_still_guards_schedule_render(self):
+        fn = extract_fn(MESSAGES_JS, "_scheduleRender")
+        assert fn and "_streamFinalized" in fn, (
+            "_streamFinalized guard must still be present in _scheduleRender"
+        )
+
+    def test_done_still_sets_stream_finalized(self):
+        fn = extract_event_handler(MESSAGES_JS, "done")
+        assert fn and (
+            "_streamFinalized=true" in fn or "_streamFinalized = true" in fn
+        ), "'done' must still set _streamFinalized=true"
+
+    def test_apperror_still_sets_stream_finalized(self):
+        fn = extract_event_handler(MESSAGES_JS, "apperror")
+        assert fn and (
+            "_streamFinalized=true" in fn or "_streamFinalized = true" in fn
+        ), "'apperror' must still set _streamFinalized=true"
+
+    def test_cancel_still_sets_stream_finalized(self):
+        fn = extract_event_handler(MESSAGES_JS, "cancel")
+        assert fn and (
+            "_streamFinalized=true" in fn or "_streamFinalized = true" in fn
+        ), "'cancel' must still set _streamFinalized=true"
+
+    def test_wire_sse_does_not_reset_accumulators(self):
+        fn = extract_fn(MESSAGES_JS, "_wireSSE")
+        assert fn is not None, "_wireSSE not found"
+        assert "assistantText=''" not in fn and 'assistantText=""' not in fn, (
+            "_wireSSE must NOT reset assistantText on reconnect"
+        )
+
+    def test_segment_start_still_tracked(self):
+        src = MESSAGES_JS
+        assert "segmentStart=assistantText.length" in src or \
+               "segmentStart = assistantText.length" in src, (
+            "segmentStart must still be advanced on tool events"
+        )
+
+    def test_fresh_segment_flag_still_set_on_tool(self):
+        fn = extract_event_handler(MESSAGES_JS, "tool")
+        assert fn and (
+            "_freshSegment=true" in fn or "_freshSegment = true" in fn
+        ), "_freshSegment must still be set on tool events"
+
+
+# ── XSS: smd does NOT sanitize URL schemes — we must do it ourselves ──────────
+
+class TestSmdUrlSchemeSanitization:
+    """streaming-markdown@0.2.15 preserves `javascript:`, `vbscript:`, and dangerous
+    `data:` URLs in href/src attributes. Verified via Node + jsdom harness:
+
+        [click](javascript:alert(1))  →  <a href="javascript:alert(1">click</a>
+
+    The existing renderMd() path filters these via its http(s)-only regex. When
+    streaming with smd, we must walk the live DOM after each parser_write and
+    remove unsafe schemes, otherwise agent-echoed prompt-injection content
+    becomes a click-to-XSS vector in the webui origin.
+    """
+
+    def test_sanitize_helper_exists(self):
+        assert "_sanitizeSmdLinks" in MESSAGES_JS, (
+            "messages.js must define _sanitizeSmdLinks() to strip javascript:/data:/vbscript: "
+            "URLs from smd-rendered anchors and images (agent output is untrusted)"
+        )
+
+    def test_sanitize_uses_scheme_allowlist(self):
+        # The allowlist regex must permit the safe schemes that the legacy
+        # renderMd path emitted (http/https + relative/anchor paths + mailto/tel)
+        # and reject everything else — including javascript:, data:, vbscript:, file:.
+        assert "_SMD_SAFE_URL_RE" in MESSAGES_JS, (
+            "Expected a _SMD_SAFE_URL_RE regex defining the safe-scheme allowlist"
+        )
+        # Find the regex definition
+        import re as _re
+        m = _re.search(r"_SMD_SAFE_URL_RE\s*=\s*/([^/]+)/i?", MESSAGES_JS)
+        assert m, "_SMD_SAFE_URL_RE regex literal not found in messages.js"
+        pattern = m.group(1)
+        # Must mention https? and must NOT mention javascript/vbscript/data
+        assert "https?" in pattern, "allowlist must permit https?:"
+        for bad in ("javascript", "vbscript", "data:"):
+            assert bad not in pattern, (
+                f"allowlist must NOT mention {bad!r} — schemes are denied by default"
+            )
+
+    def test_sanitize_called_after_smd_write(self):
+        # _smdWrite must invoke _sanitizeSmdLinks on assistantBody after feeding the parser,
+        # so anchors/images created mid-stream get their javascript:/data:/vbscript:
+        # hrefs/srcs stripped before the user can click them.
+        fn = extract_fn(MESSAGES_JS, "_smdWrite")
+        assert fn, "_smdWrite function not found"
+        assert "_sanitizeSmdLinks" in fn, (
+            "_smdWrite must call _sanitizeSmdLinks(assistantBody) after parser_write "
+            "so unsafe URL schemes are stripped from newly-added anchors/images "
+            "before the user can click them"
+        )
+
+    def test_sanitize_called_at_parser_end(self):
+        # _smdEndParser flushes any remaining markdown — that flush can create new links,
+        # so we must re-sanitize before the DOM is handed off to highlightCode / renderMessages.
+        fn = extract_fn(MESSAGES_JS, "_smdEndParser")
+        assert fn, "_smdEndParser function not found"
+        assert "_sanitizeSmdLinks" in fn, (
+            "_smdEndParser must call _sanitizeSmdLinks(assistantBody) after parser_end "
+            "so any links flushed at end-of-stream are also scheme-sanitized"
+        )
+
+    def test_sanitize_strips_href_and_src(self):
+        # The sanitizer must guard BOTH <a href> and <img src> — smd uses the same
+        # href/src pipeline for markdown links and images respectively, and images
+        # with javascript: src (e.g., ![alt](javascript:...)) are equally risky.
+        fn = extract_fn(MESSAGES_JS, "_sanitizeSmdLinks")
+        assert fn, "_sanitizeSmdLinks function not found"
+        assert "a[href]" in fn, "_sanitizeSmdLinks must query for a[href]"
+        assert "img[src]" in fn, "_sanitizeSmdLinks must query for img[src]"
+        assert "removeAttribute" in fn, (
+            "_sanitizeSmdLinks must removeAttribute('href'/'src') on unsafe schemes"
+        )