fix: poll /health after update instead of blind setTimeout — v0.50.158 (closes #874)

Replaces blind setTimeout reload with /health polling loop. Banner shows restart status with manual Reload button. Works behind reverse proxies. 25 regression tests.
This commit is contained in:
nesquena-hermes
2026-04-22 17:51:12 -07:00
committed by GitHub
parent a72208eaf6
commit e3607855b1
3 changed files with 98 additions and 12 deletions

View File

@@ -1,5 +1,10 @@
# Hermes Web UI -- Changelog
## [v0.50.158] — 2026-04-23
### Fixed
- **Post-update page reload no longer races against server restart** — `applyUpdates()` and `forceUpdate()` now poll `/health` every 500ms (up to 15 seconds) instead of firing a blind 2500ms `setTimeout`. The existing reconnect banner shows "⏳ Restarting… please wait" during the poll window, giving users a visible status and a manual Reload button. If the server is still down after 15s, the banner message changes to prompt a manual reload. Fixes 502 errors seen when the server restart outpaces the fixed delay, especially behind reverse proxies. (`static/ui.js`) (closes #874)
## [v0.50.157] — 2026-04-22
### Fixed

View File

@@ -1076,6 +1076,10 @@ function dismissReconnect() {
clearInflight();
}
async function refreshSession() {
// When the banner is in post-update restart mode, the "Reload" button
// should do a full page reload — a session refresh would just 502 while
// the server is still restarting.
if (window._restartingForUpdate) { location.reload(); return; }
dismissReconnect();
if (!S.session) return;
try {
@@ -1127,10 +1131,10 @@ async function applyUpdates(){
return;
}
}
showToast('Updated! Restarting\u2026');
showToast('Update applied — restarting');
sessionStorage.removeItem('hermes-update-checked');
sessionStorage.removeItem('hermes-update-dismissed');
setTimeout(()=>location.reload(),2500);
_waitForServerThenReload();
}catch(e){
if(errEl){errEl.textContent='Update failed: '+e.message;errEl.style.display='block';}
else showToast('Update failed: '+e.message);
@@ -1174,16 +1178,49 @@ async function forceUpdate(btn){
btn.disabled=false;btn.textContent='Force update';
return;
}
showToast('Force updated! Restarting\u2026');
showToast('Force update applied — restarting');
sessionStorage.removeItem('hermes-update-checked');
sessionStorage.removeItem('hermes-update-dismissed');
setTimeout(()=>location.reload(),2500);
_waitForServerThenReload();
}catch(e){
if(errEl){errEl.textContent='Force update failed: '+e.message;errEl.style.display='block';}
btn.disabled=false;btn.textContent='Force update';
}
}
// Poll /health after an update-triggered restart, then reload. Replaces the
// blind setTimeout(reload, 2500) that race-lost against slow hardware or
// reverse proxies that 502 immediately when the upstream socket closes (#874).
async function _waitForServerThenReload(opts){
opts=opts||{};
const interval=opts.interval||500;
const maxMs=opts.maxMs||15000;
window._restartingForUpdate=true;
const msgEl=$('reconnectMsg');
const banner=$('reconnectBanner');
if(msgEl) msgEl.textContent='⏳ Restarting… please wait';
if(banner) banner.classList.add('visible');
const deadline=Date.now()+maxMs;
// Give the server a moment to actually begin its restart before the first
// probe — otherwise the old process may still respond ok on the first poll.
await new Promise(r=>setTimeout(r, interval));
while(Date.now()<deadline){
try{
const r=await fetch('/health',{cache:'no-store'});
if(r.ok){
let data={};
try{ data=await r.json(); }catch(_){}
if(data && data.status==='ok'){
location.reload();
return;
}
}
}catch(_){ /* socket closed during restart — retry */ }
await new Promise(r=>setTimeout(r, interval));
}
if(msgEl) msgEl.textContent='⚠️ Server is taking longer than expected — click Reload when ready';
}
function getPendingSessionMessage(session){
const text=String(session?.pending_user_message||'').trim();
if(!text) return null;

View File

@@ -251,23 +251,67 @@ class TestUiJsUpdateBanner:
m = re.search(r'function applyUpdates\b.*?\n\}', src, re.DOTALL)
assert m, "applyUpdates() not found"
fn = m.group(0)
assert 'Restarting' in fn, (
"success toast must say 'Restarting' (server self-restarts after update)"
assert 'restarting' in fn.lower(), (
"success toast must mention 'restarting' (server self-restarts after update)"
)
assert 'Reloading' not in fn, (
"success toast must not say 'Reloading' — server restarts, page reloads after"
)
def test_reload_timeout_at_least_2000ms(self):
"""Reload delay must be >= 2000 ms to give the server time to restart."""
def test_reload_uses_health_poll_not_blind_timeout(self):
"""applyUpdates must use _waitForServerThenReload() instead of a blind setTimeout.
A fixed setTimeout race-loses against slow hardware or reverse proxies
that return 502 immediately when the upstream socket is down.
The polling approach retries until /health responds OK.
"""
src = read('static/ui.js')
m = re.search(r'function applyUpdates\b.*?\n\}', src, re.DOTALL)
assert m, "applyUpdates() not found"
fn = m.group(0)
timeouts = re.findall(r'setTimeout\(.*?(\d+)\)', fn)
assert timeouts, "applyUpdates must have a setTimeout for page reload"
assert any(int(t) >= 2000 for t in timeouts), (
f"reload timeout must be >= 2000 ms to survive server restart; found: {timeouts}"
assert '_waitForServerThenReload' in fn, (
"applyUpdates() must call _waitForServerThenReload() instead of a blind "
"setTimeout reload — blind timeouts race-lose against slow restarts and "
"reverse proxies that 502 immediately on restart."
)
assert 'setTimeout(()=>location.reload' not in fn, (
"applyUpdates() must not use a fixed setTimeout reload — use _waitForServerThenReload()."
)
def test_wait_for_server_then_reload_is_defined(self):
"""_waitForServerThenReload() must actually exist — the original PR
referenced it from applyUpdates()/forceUpdate() without defining it,
which would have thrown ReferenceError on 'Update Now'."""
src = read('static/ui.js')
assert re.search(r'(async\s+)?function\s+_waitForServerThenReload\b', src), (
"_waitForServerThenReload() is called but not defined — this breaks "
"the Update Now flow entirely (ReferenceError at runtime)."
)
def test_wait_for_server_polls_health(self):
"""_waitForServerThenReload() must fetch /health to determine readiness."""
src = read('static/ui.js')
m = re.search(r'function\s+_waitForServerThenReload\b.*?\n\}', src, re.DOTALL)
assert m, "_waitForServerThenReload() not found"
fn = m.group(0)
assert '/health' in fn, (
"_waitForServerThenReload must poll /health to detect server readiness"
)
assert 'location.reload' in fn, (
"_waitForServerThenReload must call location.reload() once the server is ready"
)
def test_refresh_session_handles_restart_mode(self):
"""When _restartingForUpdate flag is set, refreshSession() must do a
full page reload rather than hit /api/session (which will 502 while
the server is down)."""
src = read('static/ui.js')
m = re.search(r'async function refreshSession\b.*?\n\}', src, re.DOTALL)
assert m, "refreshSession() not found"
fn = m.group(0)
assert '_restartingForUpdate' in fn and 'location.reload' in fn, (
"refreshSession() must check the restart flag and bypass /api/session "
"when the server is mid-restart."
)
def test_conflict_response_shows_force_button(self):