diff --git a/static/messages.js b/static/messages.js
index 4c8c93b..470ef7b 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -384,6 +384,9 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
function _smdEndParser(){
if(_smdParser&&window.smd){
try{window.smd.parser_end(_smdParser);}catch(_){}
+ // parser_end may flush remaining markdown that creates new links/images —
+ // re-sanitize the body before the DOM is handed off to highlightCode / renderMessages.
+ if(assistantBody){_sanitizeSmdLinks(assistantBody);}
}
_smdParser=null;
_smdWrittenLen=0;
@@ -396,6 +399,31 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
if(!delta) return;
try{window.smd.parser_write(_smdParser,delta);}catch(_){}
_smdWrittenLen=displayText.length;
+ // streaming-markdown does NOT sanitize URL schemes — `[click](javascript:...)`
+ // and `` survive as href/src. Strip any unsafe schemes
+ // from anchors/images that were just added to the live DOM. The existing
+ // renderMd() path filters these via its http(s)-only regex; we need a matching
+ // guard here so the live-stream path isn't an XSS vector for agent-echoed
+ // prompt-injection content. The final renderMessages() call at `done` uses
+ // renderMd which is already safe, but during streaming the user could click
+ // a malicious link before that replacement happens.
+ if(assistantBody){_sanitizeSmdLinks(assistantBody);}
+ }
+ // Allowed URL schemes for anchors and images rendered from agent-streamed markdown.
+ // Matches the effective allowlist of renderMd() (http/https via regex + relative).
+ const _SMD_SAFE_URL_RE=/^(?:https?:|mailto:|tel:|\/|#|\?|\.)/i;
+ function _sanitizeSmdLinks(root){
+ if(!root||!root.querySelectorAll) return;
+ const _a=root.querySelectorAll('a[href]');
+ for(let i=0;i<_a.length;i++){
+ const n=_a[i],v=n.getAttribute('href')||'';
+ if(!_SMD_SAFE_URL_RE.test(v)){n.removeAttribute('href');n.setAttribute('data-blocked-scheme','1');}
+ }
+ const _im=root.querySelectorAll('img[src]');
+ for(let i=0;i<_im.length;i++){
+ const n=_im[i],v=n.getAttribute('src')||'';
+ if(!_SMD_SAFE_URL_RE.test(v)){n.removeAttribute('src');n.setAttribute('data-blocked-scheme','1');}
+ }
}
function _scheduleRender(){
if(_renderPending) return;
diff --git a/tests/test_streaming_markdown.py b/tests/test_streaming_markdown.py
index 0e8aac0..c8c615c 100644
--- a/tests/test_streaming_markdown.py
+++ b/tests/test_streaming_markdown.py
@@ -449,3 +449,77 @@ class TestExistingStreamingGuardsIntact:
assert fn and (
"_freshSegment=true" in fn or "_freshSegment = true" in fn
), "_freshSegment must still be set on tool events"
+
+
+# ── XSS: smd does NOT sanitize URL schemes — we must do it ourselves ──────────
+
+class TestSmdUrlSchemeSanitization:
+ """streaming-markdown@0.2.15 preserves `javascript:`, `vbscript:`, and dangerous
+ `data:` URLs in href/src attributes. Verified via Node + jsdom harness:
+
+ [click](javascript:alert(1)) → click
+
+ The existing renderMd() path filters these via its http(s)-only regex. When
+ streaming with smd, we must walk the live DOM after each parser_write and
+ remove unsafe schemes, otherwise agent-echoed prompt-injection content
+ becomes a click-to-XSS vector in the webui origin.
+ """
+
+ def test_sanitize_helper_exists(self):
+ assert "_sanitizeSmdLinks" in MESSAGES_JS, (
+ "messages.js must define _sanitizeSmdLinks() to strip javascript:/data:/vbscript: "
+ "URLs from smd-rendered anchors and images (agent output is untrusted)"
+ )
+
+ def test_sanitize_uses_scheme_allowlist(self):
+ # The allowlist regex must permit the safe schemes that the legacy
+ # renderMd path emitted (http/https + relative/anchor paths + mailto/tel)
+ # and reject everything else — including javascript:, data:, vbscript:, file:.
+ assert "_SMD_SAFE_URL_RE" in MESSAGES_JS, (
+ "Expected a _SMD_SAFE_URL_RE regex defining the safe-scheme allowlist"
+ )
+ # Find the regex definition
+ import re as _re
+ m = _re.search(r"_SMD_SAFE_URL_RE\s*=\s*/([^/]+)/i?", MESSAGES_JS)
+ assert m, "_SMD_SAFE_URL_RE regex literal not found in messages.js"
+ pattern = m.group(1)
+ # Must mention https? and must NOT mention javascript/vbscript/data
+ assert "https?" in pattern, "allowlist must permit https?:"
+ for bad in ("javascript", "vbscript", "data:"):
+ assert bad not in pattern, (
+ f"allowlist must NOT mention {bad!r} — schemes are denied by default"
+ )
+
+ def test_sanitize_called_after_smd_write(self):
+ # _smdWrite must invoke _sanitizeSmdLinks on assistantBody after feeding the parser,
+ # so anchors/images created mid-stream get their javascript:/data:/vbscript:
+ # hrefs/srcs stripped before the user can click them.
+ fn = extract_fn(MESSAGES_JS, "_smdWrite")
+ assert fn, "_smdWrite function not found"
+ assert "_sanitizeSmdLinks" in fn, (
+ "_smdWrite must call _sanitizeSmdLinks(assistantBody) after parser_write "
+ "so unsafe URL schemes are stripped from newly-added anchors/images "
+ "before the user can click them"
+ )
+
+ def test_sanitize_called_at_parser_end(self):
+ # _smdEndParser flushes any remaining markdown — that flush can create new links,
+ # so we must re-sanitize before the DOM is handed off to highlightCode / renderMessages.
+ fn = extract_fn(MESSAGES_JS, "_smdEndParser")
+ assert fn, "_smdEndParser function not found"
+ assert "_sanitizeSmdLinks" in fn, (
+ "_smdEndParser must call _sanitizeSmdLinks(assistantBody) after parser_end "
+ "so any links flushed at end-of-stream are also scheme-sanitized"
+ )
+
+ def test_sanitize_strips_href_and_src(self):
+ # The sanitizer must guard BOTH and
— smd uses the same
+ # href/src pipeline for markdown links and images respectively, and images
+ # with javascript: src (e.g., ) are equally risky.
+ fn = extract_fn(MESSAGES_JS, "_sanitizeSmdLinks")
+ assert fn, "_sanitizeSmdLinks function not found"
+ assert "a[href]" in fn, "_sanitizeSmdLinks must query for a[href]"
+ assert "img[src]" in fn, "_sanitizeSmdLinks must query for img[src]"
+ assert "removeAttribute" in fn, (
+ "_sanitizeSmdLinks must removeAttribute('href'/'src') on unsafe schemes"
+ )