From a3647570fb1aab31d172e34dc70958a009f78170 Mon Sep 17 00:00:00 2001 From: nesquena-hermes Date: Thu, 23 Apr 2026 15:46:02 -0700 Subject: [PATCH] fix: persist onboarding_completed for CLI-configured users on first chat_ready (#922) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: persist onboarding_completed for CLI-configured users on first chat_ready (v0.50.179, #921) Co-authored-by: bsgdigital * fix(onboarding): don't 500 the status endpoint if save_settings fails The #921 persist call `save_settings({"onboarding_completed": True})` in get_onboarding_status() raises if the settings.json write fails (read-only filesystem, disk full, permission error). That turns every /api/onboarding/status call into a 500 until the disk is writable, which is much worse UX than losing the persistence-across-restart guard. Wrapped in try/except so persistence becomes best-effort. The function still sets settings["onboarding_completed"] = True in memory on success, and `completed` reflects `config_auto_completed` on this request either way, so the user sees the right state even when the write fails — only the next-restart protection degrades. Added regression test that patches save_settings to raise OSError and asserts the endpoint still returns completed=True without raising. Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: nesquena-hermes Co-authored-by: Nathan Esquenazi Co-authored-by: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 5 ++ api/onboarding.py | 19 ++++++ tests/test_onboarding_existing_config.py | 83 ++++++++++++++++++++---- 3 files changed, 93 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b768977..d6f4662 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,11 @@ workspace subtree) and never enumerate blocked system roots. (`api/routes.py`, `api/workspace.py`, `static/panels.js`, `static/style.css`) (partial for #616) +## [v0.50.179] — 2026-04-23 + +### Fixed +- **Onboarding wizard clobbering CLI users' config after server restart** — CLI-configured users (who set up via `hermes model` / `hermes auth`) had no `onboarding_completed` flag in `settings.json`. After a git branch switch or server restart, `verify_hermes_imports()` could momentarily return `imports_ok=False`, making `chat_ready=False` and causing the wizard to reappear with a destructive dropdown default (openrouter). Fixed by writing `onboarding_completed: True` to `settings.json` the first time `config_auto_completed` evaluates to `True`, so the flag survives future transient import failures. (`api/onboarding.py`) Co-authored by @bsgdigital. + ## [v0.50.177] — 2026-04-23 ### Fixed diff --git a/api/onboarding.py b/api/onboarding.py index f58bf9c..d0ac572 100644 --- a/api/onboarding.py +++ b/api/onboarding.py @@ -435,6 +435,25 @@ def get_onboarding_status() -> dict: config_exists = Path(_get_config_path()).exists() config_auto_completed = config_exists and bool(runtime.get("chat_ready")) + # Persist the flag so it survives future transient import failures (e.g. after + # a git branch switch in the hermes-agent repo). Without this, a CLI-configured + # user who never ran the wizard has no onboarding_completed flag — any momentary + # imports_ok=False during restart makes chat_ready=False, config_auto_completed=False, + # and the wizard reappears with a broken dropdown that clobbers their config. + # + # Best-effort: if save_settings raises (read-only FS, disk full, permission error), + # log and continue. The `config_auto_completed` branch of `completed=` below still + # returns True for this request, so the user sees the correct state — only the + # persistence-across-restart guarantee is degraded. Raising here would turn every + # /api/onboarding/status call into a 500 until disk was writable, which is worse UX + # than losing the next-restart protection. + if config_auto_completed and not settings.get("onboarding_completed"): + try: + save_settings({"onboarding_completed": True}) + settings["onboarding_completed"] = True + except Exception: + logger.debug("Failed to persist onboarding_completed", exc_info=True) + return { "completed": bool(settings.get("onboarding_completed")) or auto_completed or config_auto_completed, "settings": { diff --git a/tests/test_onboarding_existing_config.py b/tests/test_onboarding_existing_config.py index 76d831b..423df84 100644 --- a/tests/test_onboarding_existing_config.py +++ b/tests/test_onboarding_existing_config.py @@ -116,6 +116,50 @@ class TestOnboardingGate: assert "config_exists" in result["system"] assert result["system"]["config_exists"] is True + def test_persist_failure_does_not_break_status_endpoint(self): + """save_settings() failure (read-only FS, disk full) must not turn the + status endpoint into a 500. The persistence-across-restart guarantee + degrades but `completed` still reflects the live `config_auto_completed` + signal so the user isn't blocked from using the UI. + """ + import api.onboarding as mod + settings = {"onboarding_completed": False} + runtime = { + "chat_ready": True, + "provider_configured": True, + "provider_ready": True, + "setup_state": "ready", + "provider_note": "test", + "current_provider": "openrouter", + "current_model": "anthropic/claude-sonnet-4.6", + "current_base_url": None, + "env_path": "/tmp/.hermes_test/.env", + } + fake_config_path = pathlib.Path("/tmp/_test_config.yaml") + + with ( + mock.patch.object(mod, "load_settings", return_value=settings), + mock.patch.object(mod, "get_config", return_value={}), + mock.patch.object(mod, "verify_hermes_imports", return_value=(True, [], {})), + mock.patch.object(mod, "_status_from_runtime", return_value=runtime), + mock.patch.object(mod, "load_workspaces", return_value=[]), + mock.patch.object(mod, "get_last_workspace", return_value=None), + mock.patch.object(mod, "get_available_models", return_value=[]), + mock.patch.object(mod, "_get_config_path", return_value=fake_config_path), + mock.patch.object(pathlib.Path, "exists", return_value=True), + mock.patch.object( + mod, "save_settings", side_effect=OSError("read-only filesystem") + ), + ): + # Must not raise — persistence failure is best-effort. + result = mod.get_onboarding_status() + + # completed still reflects the live signal via config_auto_completed + assert result["completed"] is True, ( + "Status endpoint must still return completed=True via the live " + "config_auto_completed signal when persistence fails" + ) + class TestApplyOnboardingSetupGuard: """Fix #2: apply_onboarding_setup must not silently overwrite config.yaml.""" @@ -303,20 +347,27 @@ class TestOnboardingGateIntegration: # Write a fake API key so provider_ready (and thus chat_ready) fires # — but only when hermes_cli imports are available data, _ = _http_get("/api/onboarding/status") - if data["system"]["hermes_found"] and data["system"]["imports_ok"]: - (hermes_home / ".env").write_text( - "OPENROUTER_API_KEY=test-existing-key\n", encoding="utf-8" - ) - data, status = _http_get("/api/onboarding/status") - assert status == 200 - assert data["completed"] is True, ( - "Existing config + chat_ready must auto-complete onboarding." - ) - else: - # Agent not installed: chat_ready is always False, so wizard still - # fires — that is the correct behaviour (can't verify readiness). - assert data["completed"] is False - + try: + if data["system"]["hermes_found"] and data["system"]["imports_ok"]: + (hermes_home / ".env").write_text( + "OPENROUTER_API_KEY=test-e...\n", encoding="utf-8" + ) + data, status = _http_get("/api/onboarding/status") + assert status == 200 + assert data["completed"] is True, ( + "Existing config + chat_ready must auto-complete onboarding." + ) + else: + # Agent not installed: chat_ready is always False, so wizard still + # fires — that is the correct behaviour (can't verify readiness). + assert data["completed"] is False + finally: + # Clean up: the auto-persist in get_onboarding_status() (#921) writes + # onboarding_completed=True to settings.json when config_auto_completed fires. + # Reset to avoid contaminating subsequent tests. + (hermes_home / "config.yaml").unlink(missing_ok=True) + (hermes_home / ".env").unlink(missing_ok=True) + _http_post("/api/settings", {"onboarding_completed": False}) @_needs_yaml def test_setup_blocked_for_existing_config(self): """POST /api/onboarding/setup must return config_exists error if config.yaml exists.""" @@ -366,3 +417,7 @@ class TestOnboardingGateIntegration: assert data.get("error") != "config_exists", ( "confirm_overwrite=True must bypass the guard." ) + # Clean up so onboarding_completed=True left by this test's setup call + # does not contaminate subsequent tests (#921 test isolation). + (hermes_home / "config.yaml").unlink(missing_ok=True) + _http_post("/api/settings", {"onboarding_completed": False})