fix: persist onboarding_completed for CLI-configured users on first chat_ready (#922)
* fix: persist onboarding_completed for CLI-configured users on first chat_ready (v0.50.179, #921) Co-authored-by: bsgdigital * fix(onboarding): don't 500 the status endpoint if save_settings fails The #921 persist call `save_settings({"onboarding_completed": True})` in get_onboarding_status() raises if the settings.json write fails (read-only filesystem, disk full, permission error). That turns every /api/onboarding/status call into a 500 until the disk is writable, which is much worse UX than losing the persistence-across-restart guard. Wrapped in try/except so persistence becomes best-effort. The function still sets settings["onboarding_completed"] = True in memory on success, and `completed` reflects `config_auto_completed` on this request either way, so the user sees the right state even when the write fails — only the next-restart protection degrades. Added regression test that patches save_settings to raise OSError and asserts the endpoint still returns completed=True without raising. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: nesquena-hermes <nesquena-hermes@users.noreply.github.com> Co-authored-by: Nathan Esquenazi <nesquena@gmail.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -29,6 +29,11 @@
|
|||||||
workspace subtree) and never enumerate blocked system roots. (`api/routes.py`,
|
workspace subtree) and never enumerate blocked system roots. (`api/routes.py`,
|
||||||
`api/workspace.py`, `static/panels.js`, `static/style.css`) (partial for #616)
|
`api/workspace.py`, `static/panels.js`, `static/style.css`) (partial for #616)
|
||||||
|
|
||||||
|
## [v0.50.179] — 2026-04-23
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **Onboarding wizard clobbering CLI users' config after server restart** — CLI-configured users (who set up via `hermes model` / `hermes auth`) had no `onboarding_completed` flag in `settings.json`. After a git branch switch or server restart, `verify_hermes_imports()` could momentarily return `imports_ok=False`, making `chat_ready=False` and causing the wizard to reappear with a destructive dropdown default (openrouter). Fixed by writing `onboarding_completed: True` to `settings.json` the first time `config_auto_completed` evaluates to `True`, so the flag survives future transient import failures. (`api/onboarding.py`) Co-authored by @bsgdigital.
|
||||||
|
|
||||||
## [v0.50.177] — 2026-04-23
|
## [v0.50.177] — 2026-04-23
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|||||||
@@ -435,6 +435,25 @@ def get_onboarding_status() -> dict:
|
|||||||
config_exists = Path(_get_config_path()).exists()
|
config_exists = Path(_get_config_path()).exists()
|
||||||
config_auto_completed = config_exists and bool(runtime.get("chat_ready"))
|
config_auto_completed = config_exists and bool(runtime.get("chat_ready"))
|
||||||
|
|
||||||
|
# Persist the flag so it survives future transient import failures (e.g. after
|
||||||
|
# a git branch switch in the hermes-agent repo). Without this, a CLI-configured
|
||||||
|
# user who never ran the wizard has no onboarding_completed flag — any momentary
|
||||||
|
# imports_ok=False during restart makes chat_ready=False, config_auto_completed=False,
|
||||||
|
# and the wizard reappears with a broken dropdown that clobbers their config.
|
||||||
|
#
|
||||||
|
# Best-effort: if save_settings raises (read-only FS, disk full, permission error),
|
||||||
|
# log and continue. The `config_auto_completed` branch of `completed=` below still
|
||||||
|
# returns True for this request, so the user sees the correct state — only the
|
||||||
|
# persistence-across-restart guarantee is degraded. Raising here would turn every
|
||||||
|
# /api/onboarding/status call into a 500 until disk was writable, which is worse UX
|
||||||
|
# than losing the next-restart protection.
|
||||||
|
if config_auto_completed and not settings.get("onboarding_completed"):
|
||||||
|
try:
|
||||||
|
save_settings({"onboarding_completed": True})
|
||||||
|
settings["onboarding_completed"] = True
|
||||||
|
except Exception:
|
||||||
|
logger.debug("Failed to persist onboarding_completed", exc_info=True)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"completed": bool(settings.get("onboarding_completed")) or auto_completed or config_auto_completed,
|
"completed": bool(settings.get("onboarding_completed")) or auto_completed or config_auto_completed,
|
||||||
"settings": {
|
"settings": {
|
||||||
|
|||||||
@@ -116,6 +116,50 @@ class TestOnboardingGate:
|
|||||||
assert "config_exists" in result["system"]
|
assert "config_exists" in result["system"]
|
||||||
assert result["system"]["config_exists"] is True
|
assert result["system"]["config_exists"] is True
|
||||||
|
|
||||||
|
def test_persist_failure_does_not_break_status_endpoint(self):
|
||||||
|
"""save_settings() failure (read-only FS, disk full) must not turn the
|
||||||
|
status endpoint into a 500. The persistence-across-restart guarantee
|
||||||
|
degrades but `completed` still reflects the live `config_auto_completed`
|
||||||
|
signal so the user isn't blocked from using the UI.
|
||||||
|
"""
|
||||||
|
import api.onboarding as mod
|
||||||
|
settings = {"onboarding_completed": False}
|
||||||
|
runtime = {
|
||||||
|
"chat_ready": True,
|
||||||
|
"provider_configured": True,
|
||||||
|
"provider_ready": True,
|
||||||
|
"setup_state": "ready",
|
||||||
|
"provider_note": "test",
|
||||||
|
"current_provider": "openrouter",
|
||||||
|
"current_model": "anthropic/claude-sonnet-4.6",
|
||||||
|
"current_base_url": None,
|
||||||
|
"env_path": "/tmp/.hermes_test/.env",
|
||||||
|
}
|
||||||
|
fake_config_path = pathlib.Path("/tmp/_test_config.yaml")
|
||||||
|
|
||||||
|
with (
|
||||||
|
mock.patch.object(mod, "load_settings", return_value=settings),
|
||||||
|
mock.patch.object(mod, "get_config", return_value={}),
|
||||||
|
mock.patch.object(mod, "verify_hermes_imports", return_value=(True, [], {})),
|
||||||
|
mock.patch.object(mod, "_status_from_runtime", return_value=runtime),
|
||||||
|
mock.patch.object(mod, "load_workspaces", return_value=[]),
|
||||||
|
mock.patch.object(mod, "get_last_workspace", return_value=None),
|
||||||
|
mock.patch.object(mod, "get_available_models", return_value=[]),
|
||||||
|
mock.patch.object(mod, "_get_config_path", return_value=fake_config_path),
|
||||||
|
mock.patch.object(pathlib.Path, "exists", return_value=True),
|
||||||
|
mock.patch.object(
|
||||||
|
mod, "save_settings", side_effect=OSError("read-only filesystem")
|
||||||
|
),
|
||||||
|
):
|
||||||
|
# Must not raise — persistence failure is best-effort.
|
||||||
|
result = mod.get_onboarding_status()
|
||||||
|
|
||||||
|
# completed still reflects the live signal via config_auto_completed
|
||||||
|
assert result["completed"] is True, (
|
||||||
|
"Status endpoint must still return completed=True via the live "
|
||||||
|
"config_auto_completed signal when persistence fails"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestApplyOnboardingSetupGuard:
|
class TestApplyOnboardingSetupGuard:
|
||||||
"""Fix #2: apply_onboarding_setup must not silently overwrite config.yaml."""
|
"""Fix #2: apply_onboarding_setup must not silently overwrite config.yaml."""
|
||||||
@@ -303,9 +347,10 @@ class TestOnboardingGateIntegration:
|
|||||||
# Write a fake API key so provider_ready (and thus chat_ready) fires
|
# Write a fake API key so provider_ready (and thus chat_ready) fires
|
||||||
# — but only when hermes_cli imports are available
|
# — but only when hermes_cli imports are available
|
||||||
data, _ = _http_get("/api/onboarding/status")
|
data, _ = _http_get("/api/onboarding/status")
|
||||||
|
try:
|
||||||
if data["system"]["hermes_found"] and data["system"]["imports_ok"]:
|
if data["system"]["hermes_found"] and data["system"]["imports_ok"]:
|
||||||
(hermes_home / ".env").write_text(
|
(hermes_home / ".env").write_text(
|
||||||
"OPENROUTER_API_KEY=test-existing-key\n", encoding="utf-8"
|
"OPENROUTER_API_KEY=test-e...\n", encoding="utf-8"
|
||||||
)
|
)
|
||||||
data, status = _http_get("/api/onboarding/status")
|
data, status = _http_get("/api/onboarding/status")
|
||||||
assert status == 200
|
assert status == 200
|
||||||
@@ -316,7 +361,13 @@ class TestOnboardingGateIntegration:
|
|||||||
# Agent not installed: chat_ready is always False, so wizard still
|
# Agent not installed: chat_ready is always False, so wizard still
|
||||||
# fires — that is the correct behaviour (can't verify readiness).
|
# fires — that is the correct behaviour (can't verify readiness).
|
||||||
assert data["completed"] is False
|
assert data["completed"] is False
|
||||||
|
finally:
|
||||||
|
# Clean up: the auto-persist in get_onboarding_status() (#921) writes
|
||||||
|
# onboarding_completed=True to settings.json when config_auto_completed fires.
|
||||||
|
# Reset to avoid contaminating subsequent tests.
|
||||||
|
(hermes_home / "config.yaml").unlink(missing_ok=True)
|
||||||
|
(hermes_home / ".env").unlink(missing_ok=True)
|
||||||
|
_http_post("/api/settings", {"onboarding_completed": False})
|
||||||
@_needs_yaml
|
@_needs_yaml
|
||||||
def test_setup_blocked_for_existing_config(self):
|
def test_setup_blocked_for_existing_config(self):
|
||||||
"""POST /api/onboarding/setup must return config_exists error if config.yaml exists."""
|
"""POST /api/onboarding/setup must return config_exists error if config.yaml exists."""
|
||||||
@@ -366,3 +417,7 @@ class TestOnboardingGateIntegration:
|
|||||||
assert data.get("error") != "config_exists", (
|
assert data.get("error") != "config_exists", (
|
||||||
"confirm_overwrite=True must bypass the guard."
|
"confirm_overwrite=True must bypass the guard."
|
||||||
)
|
)
|
||||||
|
# Clean up so onboarding_completed=True left by this test's setup call
|
||||||
|
# does not contaminate subsequent tests (#921 test isolation).
|
||||||
|
(hermes_home / "config.yaml").unlink(missing_ok=True)
|
||||||
|
_http_post("/api/settings", {"onboarding_completed": False})
|
||||||
|
|||||||
Reference in New Issue
Block a user