fix(desktop): surface expired login instead of silent "Starting" daemon (MUL-2973)

When the local daemon's cached PAT is expired/revoked, the daemon 401s during
startup and exits before it serves /health. The desktop polled /health forever
and kept reporting "starting", so the runtime sat at "Starting…" with no hint
that re-login was the fix (GitHub #3512).

Detect this in the layer that owns the daemon's credential: when a start fails
to reach "running", probe the token against GET /api/me. A 401 (or missing
token) surfaces a new "auth_expired" daemon state; a 2xx means the token is
fine (non-auth failure) and a network error stays inconclusive — so a network
blip is never misclassified as expired login.

The desktop then shows a "Sign-in expired · Sign in again" prompt on the
runtimes card and a banner in Daemon settings. The action drops the stale
cached PAT, re-mints a fresh one from the current session, and restarts the
daemon; if minting also 401s (the session token is dead) it falls back to the
standard re-login flow. No daemon/CLI behavior change.

Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
J
2026-06-04 12:47:16 +08:00
parent 5eba94ee25
commit ca04db5a82
12 changed files with 363 additions and 6 deletions

View File

@@ -0,0 +1,34 @@
import { describe, expect, it } from "vitest";
import { classifyAuthProbe } from "./daemon-auth-probe";
describe("classifyAuthProbe", () => {
it("treats a 401 as expired login", () => {
expect(classifyAuthProbe({ status: 401 })).toBe("auth_expired");
});
it("treats a missing token as expired login", () => {
expect(classifyAuthProbe({ noToken: true })).toBe("auth_expired");
});
it("treats a 2xx as a valid token (failure is non-auth)", () => {
expect(classifyAuthProbe({ status: 200 })).toBe("ok");
expect(classifyAuthProbe({ status: 204 })).toBe("ok");
});
// The headline guard: a network failure must never be reported as an auth
// problem — the daemon is just as unreachable for non-auth reasons.
it("does NOT classify a network error as expired login", () => {
expect(classifyAuthProbe({ networkError: true })).toBe("unknown");
});
it("leaves 5xx and other statuses inconclusive", () => {
expect(classifyAuthProbe({ status: 500 })).toBe("unknown");
expect(classifyAuthProbe({ status: 503 })).toBe("unknown");
expect(classifyAuthProbe({ status: 403 })).toBe("unknown");
});
it("is inconclusive when nothing is known", () => {
expect(classifyAuthProbe({})).toBe("unknown");
});
});

View File

@@ -0,0 +1,40 @@
/**
* Pure classification for the daemon auth probe. Kept free of Electron imports
* so it can be unit-tested in jsdom.
*
* When the local daemon fails to reach "running" shortly after a start, the
* main process probes the daemon's token against the backend (GET /api/me) to
* tell "the daemon can't authenticate" apart from "the daemon is slow / the
* network is down / it crashed for another reason". Misclassifying a network
* blip as an auth failure would be worse than the original silent-Starting bug,
* so the rules below are deliberately conservative: only an explicit 401 (or a
* missing credential) is treated as auth-expired.
*/
export interface AuthProbeOutcome {
/** HTTP status code returned by the probe request, if one completed. */
status?: number;
/** The daemon profile has no token at all — there is nothing to validate. */
noToken?: boolean;
/** The probe request threw (timeout, connection refused, DNS, TLS). */
networkError?: boolean;
}
export type AuthProbeResult = "auth_expired" | "ok" | "unknown";
export function classifyAuthProbe(outcome: AuthProbeOutcome): AuthProbeResult {
// No credential to validate → the user must sign in.
if (outcome.noToken) return "auth_expired";
// Couldn't reach the server → this is a network problem, not an auth one.
// Stay "unknown" so the caller keeps showing "starting"/"stopped" instead of
// wrongly prompting for re-login.
if (outcome.networkError) return "unknown";
// The server explicitly rejected the token.
if (outcome.status === 401) return "auth_expired";
// The token is accepted — the daemon is failing for some other reason.
if (outcome.status !== undefined && outcome.status >= 200 && outcome.status < 300) {
return "ok";
}
// 5xx and everything else are inconclusive about the token's validity.
return "unknown";
}

View File

@@ -19,12 +19,18 @@ import { homedir, hostname } from "os";
import type { DaemonStatus, DaemonPrefs } from "../shared/daemon-types";
import { ensureManagedCli, managedCliPath } from "./cli-bootstrap";
import { decideVersionAction } from "./version-decision";
import { classifyAuthProbe, type AuthProbeResult } from "./daemon-auth-probe";
const DEFAULT_HEALTH_PORT = 19514;
const POLL_INTERVAL_MS = 5_000;
const PREFS_PATH = join(homedir(), ".multica", "desktop_prefs.json");
const LOG_TAIL_RETRY_MS = 2_000;
const LOG_TAIL_MAX_RETRIES = 5;
// How long a start may sit in "starting" (with no /health) before we probe the
// token to find out whether login expired. The daemon's own startup can legitimately
// take a while (it renews the PAT and lists workspaces before serving /health), so we
// wait past the common case to avoid probing healthy-but-slow starts.
const AUTH_PROBE_GRACE_MS = 10_000;
const DEFAULT_PREFS: DaemonPrefs = { autoStart: true, autoStop: false };
@@ -48,6 +54,15 @@ let pendingVersionRestart = false;
let targetApiBaseUrl: string | null = null;
let activeProfile: ActiveProfile | null = null;
// Auth-probe state for the current start attempt. When a start fails to reach
// "running", we probe the daemon's token once (after AUTH_PROBE_GRACE_MS) to
// decide whether the cause is an expired/invalid login. `authExpired` is sticky
// until the next start attempt or a successful /health, so the UI keeps showing
// the re-login prompt instead of flapping back to "starting". See #3512.
let startingSince: number | null = null;
let authProbeDone = false;
let authExpired = false;
// Serialize all writes to any profile config file. Multiple paths
// (syncToken, resolveActiveProfile, clearToken, watch/unwatch handlers)
// may try to write concurrently; chaining them avoids interleaved writes
@@ -161,6 +176,36 @@ async function fetchHealthAtPort(
}
}
/**
* Validates the daemon profile's token against the backend to find out whether
* a stuck start is an auth problem. Hits the same endpoint `multica auth status`
* uses (GET /api/me) with the exact token the daemon loads from config.json, so
* the verdict matches what the daemon itself would get from the server.
*
* Only the HTTP status is inspected (never the body) so a future change to the
* /api/me response shape can't break this — a 401 means the token is rejected,
* a 2xx means it's fine, and a thrown request means the network is the problem,
* not auth. See classifyAuthProbe for the full rule set.
*/
async function probeTokenValidity(profile: string): Promise<AuthProbeResult> {
if (!targetApiBaseUrl) return "unknown";
const cfg = await readProfileConfig(profile);
const token = typeof cfg.token === "string" ? cfg.token : "";
if (!token) return classifyAuthProbe({ noToken: true });
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 4_000);
const res = await fetch(`${targetApiBaseUrl.replace(/\/+$/, "")}/api/me`, {
headers: { Authorization: `Bearer ${token}` },
signal: controller.signal,
});
clearTimeout(timeout);
return classifyAuthProbe({ status: res.status });
} catch {
return classifyAuthProbe({ networkError: true });
}
}
// Desktop owns a dedicated CLI profile named after the target API host, so it
// never reads or writes the user's hand-configured profiles. Profile dir:
// ~/.multica/profiles/desktop-<host>/
@@ -249,12 +294,40 @@ async function fetchHealth(): Promise<DaemonStatus> {
const data = await fetchHealthAtPort(active.port);
if (!data || data.status !== "running") {
// A start that never reaches "running" is the symptom; an expired/invalid
// login is the most common cause and the one with no other signal (the
// daemon exits before it can serve /health, so we can't read the reason
// from it). Probe the token once per attempt, after a grace period, to
// surface a re-login prompt instead of spinning on "starting" forever.
if (
currentState === "starting" &&
!authExpired &&
!authProbeDone &&
startingSince !== null &&
Date.now() - startingSince >= AUTH_PROBE_GRACE_MS
) {
authProbeDone = true;
if ((await probeTokenValidity(active.name)) === "auth_expired") {
authExpired = true;
}
}
// Sticky: once login is known-expired, keep reporting it (even after
// currentState flips away from "starting") until the next start attempt or
// a successful /health clears the flag.
if (authExpired) {
return { state: "auth_expired", profile: active.name };
}
return {
state: currentState === "starting" ? "starting" : "stopped",
profile: active.name,
};
}
// A live, authenticated daemon clears any prior auth-failure verdict so the
// re-login prompt disappears once the user reconnects.
authExpired = false;
startingSince = null;
// Safety: if we have a target URL and the daemon on our port reports a
// different server_url, it's not "our" daemon — drop it and re-resolve.
if (
@@ -657,6 +730,10 @@ async function startDaemon(): Promise<{ success: boolean; error?: string }> {
}
currentState = "starting";
// Begin a fresh auth-probe window for this attempt.
startingSince = Date.now();
authProbeDone = false;
authExpired = false;
sendStatus({ state: "starting" });
const args = ["daemon", "start", ...profileArgs(active)];
@@ -689,6 +766,9 @@ async function stopDaemon(): Promise<{ success: boolean; error?: string }> {
const active = await ensureActiveProfile();
currentState = "stopping";
// An explicit stop is a clean reset — drop any pending auth-failure verdict.
authExpired = false;
startingSince = null;
sendStatus({ state: "stopping" });
const args = ["daemon", "stop", ...profileArgs(active)];

View File

@@ -74,7 +74,14 @@ interface DesktopAPI {
}
interface DaemonStatus {
state: "running" | "stopped" | "starting" | "stopping" | "installing_cli" | "cli_not_found";
state:
| "running"
| "stopped"
| "starting"
| "stopping"
| "installing_cli"
| "cli_not_found"
| "auth_expired";
pid?: number;
uptime?: string;
daemonId?: string;

View File

@@ -165,7 +165,14 @@ const desktopAPI = {
};
interface DaemonStatus {
state: "running" | "stopped" | "starting" | "stopping" | "installing_cli" | "cli_not_found";
state:
| "running"
| "stopped"
| "starting"
| "stopping"
| "installing_cli"
| "cli_not_found"
| "auth_expired";
pid?: number;
uptime?: string;
daemonId?: string;

View File

@@ -6,6 +6,7 @@ import {
RotateCw,
Activity,
ScrollText,
LogIn,
} from "lucide-react";
import { useQuery } from "@tanstack/react-query";
import { useWorkspaceId } from "@multica/core/hooks";
@@ -22,6 +23,7 @@ import {
} from "@multica/ui/components/ui/dialog";
import { toast } from "sonner";
import { DaemonPanel } from "./daemon-panel";
import { reauthenticateDaemon } from "../platform/daemon-reauth";
import type { DaemonStatus } from "../../../shared/daemon-types";
import { DAEMON_STATE_LABELS } from "../../../shared/daemon-types";
@@ -115,9 +117,18 @@ export function DaemonRuntimeActions() {
}
}, []);
const handleReauth = useCallback(async () => {
setActionLoading(true);
await reauthenticateDaemon();
// onStatusChange resets actionLoading on the next status push; reset here
// too in case reauth logged out (unmount) or produced no status change.
setActionLoading(false);
}, []);
const isRunning = status.state === "running";
const isStopped = status.state === "stopped";
const isCliMissing = status.state === "cli_not_found";
const isAuthExpired = status.state === "auth_expired";
const isTransitioning =
status.state === "starting" || status.state === "stopping";
const isInstalling = status.state === "installing_cli";
@@ -175,6 +186,23 @@ export function DaemonRuntimeActions() {
</Button>
)}
{isAuthExpired && (
<>
<span className="inline-flex items-center gap-1.5 text-xs text-destructive">
<AlertCircle className="size-3.5 shrink-0" />
Sign-in expired
</span>
<Button size="sm" onClick={handleReauth} disabled={actionLoading}>
{actionLoading ? (
<Activity className="size-3.5 mr-1.5 animate-pulse" />
) : (
<LogIn className="size-3.5 mr-1.5" />
)}
Sign in again
</Button>
</>
)}
{(isTransitioning || isInstalling) && (
<Button size="sm" variant="outline" disabled>
<Activity className="size-3.5 mr-1.5 animate-pulse" />

View File

@@ -1,7 +1,9 @@
import { useState, useEffect, useCallback, type ReactNode } from "react";
import { AlertCircle, LogIn } from "lucide-react";
import { Button } from "@multica/ui/components/ui/button";
import { Switch } from "@multica/ui/components/ui/switch";
import { cn } from "@multica/ui/lib/utils";
import { reauthenticateDaemon } from "../platform/daemon-reauth";
import type { DaemonPrefs, DaemonStatus } from "../../../shared/daemon-types";
import {
DAEMON_STATE_COLORS,
@@ -61,6 +63,7 @@ export function DaemonSettingsTab() {
const [cliInstalled, setCliInstalled] = useState<boolean | null>(null);
const [saving, setSaving] = useState(false);
const [status, setStatus] = useState<DaemonStatus>({ state: "stopped" });
const [reauthLoading, setReauthLoading] = useState(false);
useEffect(() => {
window.daemonAPI.getPrefs().then(setPrefs);
@@ -69,6 +72,12 @@ export function DaemonSettingsTab() {
return window.daemonAPI.onStatusChange(setStatus);
}, []);
const handleReauth = useCallback(async () => {
setReauthLoading(true);
await reauthenticateDaemon();
setReauthLoading(false);
}, []);
const updatePref = useCallback(
async (key: keyof DaemonPrefs, value: boolean) => {
setSaving(true);
@@ -86,6 +95,30 @@ export function DaemonSettingsTab() {
Configure how the local agent daemon behaves with the desktop app.
</p>
{status.state === "auth_expired" && (
<div className="mt-4 flex items-start gap-3 rounded-lg border border-destructive/40 bg-destructive/5 px-4 py-3">
<AlertCircle className="mt-0.5 size-4 shrink-0 text-destructive" />
<div className="min-w-0 flex-1">
<p className="text-sm font-medium text-destructive">
Sign-in expired
</p>
<p className="mt-0.5 text-sm text-muted-foreground">
The local daemon couldn&apos;t authenticate, so this device
can&apos;t take tasks. Sign in again to restore it.
</p>
</div>
<Button
size="sm"
className="shrink-0"
onClick={handleReauth}
disabled={reauthLoading}
>
<LogIn className="size-3.5 mr-1.5" />
Sign in again
</Button>
</div>
)}
<div className="mt-6 divide-y">
<SettingRow
label="Auto-start on launch"

View File

@@ -11,7 +11,14 @@ import type { AgentRuntime } from "@multica/core/types";
* to the desktop preload typings (which live in apps/desktop/src/preload).
*/
interface DaemonStatusLike {
state: "running" | "stopped" | "starting" | "stopping" | "installing_cli" | "cli_not_found";
state:
| "running"
| "stopped"
| "starting"
| "stopping"
| "installing_cli"
| "cli_not_found"
| "auth_expired";
daemonId?: string;
}
@@ -25,7 +32,11 @@ interface DaemonStatusLike {
* within 75s.
*/
function mergeDaemonStatus(rt: AgentRuntime, status: DaemonStatusLike): AgentRuntime {
if (status.state === "stopped" || status.state === "stopping") {
if (
status.state === "stopped" ||
status.state === "stopping" ||
status.state === "auth_expired"
) {
return { ...rt, status: "offline" };
}
if (status.state === "running") {

View File

@@ -0,0 +1,69 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const { mockGetState, logout } = vi.hoisted(() => ({
mockGetState: vi.fn(),
logout: vi.fn(),
}));
vi.mock("@multica/core/auth", () => ({
useAuthStore: { getState: mockGetState },
}));
import { reauthenticateDaemon } from "./daemon-reauth";
const daemonAPI = {
clearToken: vi.fn(),
syncToken: vi.fn(),
restart: vi.fn(),
};
beforeEach(() => {
vi.clearAllMocks();
localStorage.clear();
daemonAPI.clearToken.mockResolvedValue(undefined);
daemonAPI.syncToken.mockResolvedValue(undefined);
daemonAPI.restart.mockResolvedValue({ success: true });
(window as unknown as { daemonAPI: typeof daemonAPI }).daemonAPI = daemonAPI;
mockGetState.mockReturnValue({ user: { id: "user-1" }, logout });
});
describe("reauthenticateDaemon", () => {
it("re-mints a fresh PAT and restarts the daemon when signed in", async () => {
localStorage.setItem("multica_token", "jwt-abc");
await reauthenticateDaemon();
expect(daemonAPI.clearToken).toHaveBeenCalledOnce();
expect(daemonAPI.syncToken).toHaveBeenCalledWith("jwt-abc", "user-1");
expect(daemonAPI.restart).toHaveBeenCalledOnce();
expect(logout).not.toHaveBeenCalled();
});
it("falls back to full logout when minting fails (session token is dead)", async () => {
localStorage.setItem("multica_token", "jwt-abc");
daemonAPI.syncToken.mockRejectedValueOnce(new Error("mint PAT failed: 401"));
await reauthenticateDaemon();
expect(logout).toHaveBeenCalledOnce();
expect(daemonAPI.restart).not.toHaveBeenCalled();
});
it("logs out without touching the daemon when there is no session token", async () => {
await reauthenticateDaemon();
expect(logout).toHaveBeenCalledOnce();
expect(daemonAPI.clearToken).not.toHaveBeenCalled();
expect(daemonAPI.syncToken).not.toHaveBeenCalled();
});
it("logs out when there is no signed-in user", async () => {
localStorage.setItem("multica_token", "jwt-abc");
mockGetState.mockReturnValue({ user: null, logout });
await reauthenticateDaemon();
expect(logout).toHaveBeenCalledOnce();
expect(daemonAPI.clearToken).not.toHaveBeenCalled();
});
});

View File

@@ -0,0 +1,33 @@
import { useAuthStore } from "@multica/core/auth";
/**
* Re-establish the local daemon's credentials after it failed to authenticate
* (daemon state "auth_expired", surfaced by daemon-manager's token probe — see
* #3512).
*
* The desktop owns the daemon's PAT: it mints one from the user's session token
* and caches it per profile. A stale/revoked cached PAT is the common cause of
* the failure (and merely restarting the app reuses the same bad PAT), so we
* drop the cached token and mint a fresh one from the current session, then
* restart the daemon so it loads the new credential.
*
* If minting fails the session token itself is dead — fall back to the standard
* re-login flow (the same `logout()` the API client uses on a 401), which lands
* the user on the login page and re-mints a PAT on the next sign-in.
*/
export async function reauthenticateDaemon(): Promise<void> {
const user = useAuthStore.getState().user;
const token = localStorage.getItem("multica_token");
if (!user || !token) {
useAuthStore.getState().logout();
return;
}
try {
await window.daemonAPI.clearToken();
await window.daemonAPI.syncToken(token, user.id);
await window.daemonAPI.restart();
} catch {
// Session token is also invalid (mint returned 401) — full re-login.
useAuthStore.getState().logout();
}
}

View File

@@ -4,7 +4,11 @@ export type DaemonState =
| "starting"
| "stopping"
| "installing_cli"
| "cli_not_found";
| "cli_not_found"
// The daemon can't start because the server rejected its credentials (the
// cached PAT expired / was revoked, or the session token is dead). Without
// this, an auth failure silently sticks at "starting" forever — see #3512.
| "auth_expired";
export interface DaemonStatus {
state: DaemonState;
@@ -32,6 +36,7 @@ export const DAEMON_STATE_COLORS: Record<DaemonState, string> = {
stopping: "bg-amber-500 animate-pulse",
installing_cli: "bg-sky-500 animate-pulse",
cli_not_found: "bg-red-500",
auth_expired: "bg-red-500",
};
export const DAEMON_STATE_LABELS: Record<DaemonState, string> = {
@@ -41,6 +46,7 @@ export const DAEMON_STATE_LABELS: Record<DaemonState, string> = {
stopping: "Stopping…",
installing_cli: "Setting up…",
cli_not_found: "Setup Failed",
auth_expired: "Sign-in required",
};
export function formatUptime(uptime?: string): string {
@@ -81,5 +87,7 @@ export function daemonStateDescription(state: DaemonState, runtimeCount: number)
return "Setting up the runtime for the first time. Only happens once.";
case "cli_not_found":
return "Setup failed · couldn't download the runtime. Check your network.";
case "auth_expired":
return "Sign-in expired · sign in again to bring this device back online.";
}
}

View File

@@ -11,7 +11,14 @@ export interface LocalDaemonStatus {
}
interface DaemonStatusLike {
state: "running" | "stopped" | "starting" | "stopping" | "installing_cli" | "cli_not_found";
state:
| "running"
| "stopped"
| "starting"
| "stopping"
| "installing_cli"
| "cli_not_found"
| "auth_expired";
daemonId?: string;
deviceName?: string;
}