fix(smd): strip javascript:/data:/vbscript: URLs — smd does not sanitize schemes
streaming-markdown@0.2.15 preserves arbitrary URL schemes in href/src. Verified with a Node + jsdom harness: IN : [click](javascript:alert(1)) OUT: <p><a href="javascript:alert(1">click</a>)</p> ← XSS vector Confirmed unsafe for: javascript:, vbscript:, data:text/html, file://. The library uses only safe DOM primitives (createElement/appendChild/ createTextNode — no innerHTML/eval), so <script> tags are escaped as text, but URL-scheme filtering is absent. The existing renderMd() path implicitly filtered to http(s) via its regex, so this is a regression the moment streaming markdown is enabled. Attack path: agent echoes prompt-injection content containing a markdown link with javascript: href → smd renders it live → user clicks during the streaming window → JS executes in webui origin → session cookie, API calls, etc. Fix: walk the live DOM after each parser_write (and again after parser_end) and remove href/src attributes whose scheme isn't on the safe allowlist (http, https, mailto, tel, and relative/anchor paths). Blocked anchors keep their text content but lose href; blocked images lose src and get data-blocked-scheme="1" for debugging. Harness confirms all 10 tested cases behave correctly — javascript:, vbscript:, data:text/html, file:// all stripped; https://, /path, #anchor, mailto:, tel: all preserved. Added 5 regression tests in TestSmdUrlSchemeSanitization that lock: - the sanitize helper exists - the allowlist regex permits https? and forbids javascript/vbscript/data: - _smdWrite invokes sanitize after parser_write - _smdEndParser invokes sanitize after parser_end - the sanitizer covers both <a href> and <img src> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -384,6 +384,9 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
|
|||||||
function _smdEndParser(){
|
function _smdEndParser(){
|
||||||
if(_smdParser&&window.smd){
|
if(_smdParser&&window.smd){
|
||||||
try{window.smd.parser_end(_smdParser);}catch(_){}
|
try{window.smd.parser_end(_smdParser);}catch(_){}
|
||||||
|
// parser_end may flush remaining markdown that creates new links/images —
|
||||||
|
// re-sanitize the body before the DOM is handed off to highlightCode / renderMessages.
|
||||||
|
if(assistantBody){_sanitizeSmdLinks(assistantBody);}
|
||||||
}
|
}
|
||||||
_smdParser=null;
|
_smdParser=null;
|
||||||
_smdWrittenLen=0;
|
_smdWrittenLen=0;
|
||||||
@@ -396,6 +399,31 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
|
|||||||
if(!delta) return;
|
if(!delta) return;
|
||||||
try{window.smd.parser_write(_smdParser,delta);}catch(_){}
|
try{window.smd.parser_write(_smdParser,delta);}catch(_){}
|
||||||
_smdWrittenLen=displayText.length;
|
_smdWrittenLen=displayText.length;
|
||||||
|
// streaming-markdown does NOT sanitize URL schemes — `[click](javascript:...)`
|
||||||
|
// and `` survive as href/src. Strip any unsafe schemes
|
||||||
|
// from anchors/images that were just added to the live DOM. The existing
|
||||||
|
// renderMd() path filters these via its http(s)-only regex; we need a matching
|
||||||
|
// guard here so the live-stream path isn't an XSS vector for agent-echoed
|
||||||
|
// prompt-injection content. The final renderMessages() call at `done` uses
|
||||||
|
// renderMd which is already safe, but during streaming the user could click
|
||||||
|
// a malicious link before that replacement happens.
|
||||||
|
if(assistantBody){_sanitizeSmdLinks(assistantBody);}
|
||||||
|
}
|
||||||
|
// Allowed URL schemes for anchors and images rendered from agent-streamed markdown.
|
||||||
|
// Matches the effective allowlist of renderMd() (http/https via regex + relative).
|
||||||
|
const _SMD_SAFE_URL_RE=/^(?:https?:|mailto:|tel:|\/|#|\?|\.)/i;
|
||||||
|
function _sanitizeSmdLinks(root){
|
||||||
|
if(!root||!root.querySelectorAll) return;
|
||||||
|
const _a=root.querySelectorAll('a[href]');
|
||||||
|
for(let i=0;i<_a.length;i++){
|
||||||
|
const n=_a[i],v=n.getAttribute('href')||'';
|
||||||
|
if(!_SMD_SAFE_URL_RE.test(v)){n.removeAttribute('href');n.setAttribute('data-blocked-scheme','1');}
|
||||||
|
}
|
||||||
|
const _im=root.querySelectorAll('img[src]');
|
||||||
|
for(let i=0;i<_im.length;i++){
|
||||||
|
const n=_im[i],v=n.getAttribute('src')||'';
|
||||||
|
if(!_SMD_SAFE_URL_RE.test(v)){n.removeAttribute('src');n.setAttribute('data-blocked-scheme','1');}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
function _scheduleRender(){
|
function _scheduleRender(){
|
||||||
if(_renderPending) return;
|
if(_renderPending) return;
|
||||||
|
|||||||
@@ -449,3 +449,77 @@ class TestExistingStreamingGuardsIntact:
|
|||||||
assert fn and (
|
assert fn and (
|
||||||
"_freshSegment=true" in fn or "_freshSegment = true" in fn
|
"_freshSegment=true" in fn or "_freshSegment = true" in fn
|
||||||
), "_freshSegment must still be set on tool events"
|
), "_freshSegment must still be set on tool events"
|
||||||
|
|
||||||
|
|
||||||
|
# ── XSS: smd does NOT sanitize URL schemes — we must do it ourselves ──────────
|
||||||
|
|
||||||
|
class TestSmdUrlSchemeSanitization:
|
||||||
|
"""streaming-markdown@0.2.15 preserves `javascript:`, `vbscript:`, and dangerous
|
||||||
|
`data:` URLs in href/src attributes. Verified via Node + jsdom harness:
|
||||||
|
|
||||||
|
[click](javascript:alert(1)) → <a href="javascript:alert(1">click</a>
|
||||||
|
|
||||||
|
The existing renderMd() path filters these via its http(s)-only regex. When
|
||||||
|
streaming with smd, we must walk the live DOM after each parser_write and
|
||||||
|
remove unsafe schemes, otherwise agent-echoed prompt-injection content
|
||||||
|
becomes a click-to-XSS vector in the webui origin.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_sanitize_helper_exists(self):
|
||||||
|
assert "_sanitizeSmdLinks" in MESSAGES_JS, (
|
||||||
|
"messages.js must define _sanitizeSmdLinks() to strip javascript:/data:/vbscript: "
|
||||||
|
"URLs from smd-rendered anchors and images (agent output is untrusted)"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_sanitize_uses_scheme_allowlist(self):
|
||||||
|
# The allowlist regex must permit the safe schemes that the legacy
|
||||||
|
# renderMd path emitted (http/https + relative/anchor paths + mailto/tel)
|
||||||
|
# and reject everything else — including javascript:, data:, vbscript:, file:.
|
||||||
|
assert "_SMD_SAFE_URL_RE" in MESSAGES_JS, (
|
||||||
|
"Expected a _SMD_SAFE_URL_RE regex defining the safe-scheme allowlist"
|
||||||
|
)
|
||||||
|
# Find the regex definition
|
||||||
|
import re as _re
|
||||||
|
m = _re.search(r"_SMD_SAFE_URL_RE\s*=\s*/([^/]+)/i?", MESSAGES_JS)
|
||||||
|
assert m, "_SMD_SAFE_URL_RE regex literal not found in messages.js"
|
||||||
|
pattern = m.group(1)
|
||||||
|
# Must mention https? and must NOT mention javascript/vbscript/data
|
||||||
|
assert "https?" in pattern, "allowlist must permit https?:"
|
||||||
|
for bad in ("javascript", "vbscript", "data:"):
|
||||||
|
assert bad not in pattern, (
|
||||||
|
f"allowlist must NOT mention {bad!r} — schemes are denied by default"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_sanitize_called_after_smd_write(self):
|
||||||
|
# _smdWrite must invoke _sanitizeSmdLinks on assistantBody after feeding the parser,
|
||||||
|
# so anchors/images created mid-stream get their javascript:/data:/vbscript:
|
||||||
|
# hrefs/srcs stripped before the user can click them.
|
||||||
|
fn = extract_fn(MESSAGES_JS, "_smdWrite")
|
||||||
|
assert fn, "_smdWrite function not found"
|
||||||
|
assert "_sanitizeSmdLinks" in fn, (
|
||||||
|
"_smdWrite must call _sanitizeSmdLinks(assistantBody) after parser_write "
|
||||||
|
"so unsafe URL schemes are stripped from newly-added anchors/images "
|
||||||
|
"before the user can click them"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_sanitize_called_at_parser_end(self):
|
||||||
|
# _smdEndParser flushes any remaining markdown — that flush can create new links,
|
||||||
|
# so we must re-sanitize before the DOM is handed off to highlightCode / renderMessages.
|
||||||
|
fn = extract_fn(MESSAGES_JS, "_smdEndParser")
|
||||||
|
assert fn, "_smdEndParser function not found"
|
||||||
|
assert "_sanitizeSmdLinks" in fn, (
|
||||||
|
"_smdEndParser must call _sanitizeSmdLinks(assistantBody) after parser_end "
|
||||||
|
"so any links flushed at end-of-stream are also scheme-sanitized"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_sanitize_strips_href_and_src(self):
|
||||||
|
# The sanitizer must guard BOTH <a href> and <img src> — smd uses the same
|
||||||
|
# href/src pipeline for markdown links and images respectively, and images
|
||||||
|
# with javascript: src (e.g., ) are equally risky.
|
||||||
|
fn = extract_fn(MESSAGES_JS, "_sanitizeSmdLinks")
|
||||||
|
assert fn, "_sanitizeSmdLinks function not found"
|
||||||
|
assert "a[href]" in fn, "_sanitizeSmdLinks must query for a[href]"
|
||||||
|
assert "img[src]" in fn, "_sanitizeSmdLinks must query for img[src]"
|
||||||
|
assert "removeAttribute" in fn, (
|
||||||
|
"_sanitizeSmdLinks must removeAttribute('href'/'src') on unsafe schemes"
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user