feat(chat): presence v4 — status pill, failure bubble, elapsed timing (#1856)

A complete UX upgrade for chat sending → receiving → recovering. * StatusPill replaces the orphan spinner — stage-aware copy ("Reading files · 12s", "Searching the web · 14s", "Typing · 24s"), shimmer text, monotonic timer, derived effective status, > 60s warning tone, > 5min cancel button. * WS writethrough on task:queued / task:dispatch / task:cancelled so pendingTask cache stays in sync with the daemon state machine without invalidate-refetch latency. broadcastTaskDispatch now includes chat_session_id when the task is for a chat session — the existing payload only carried it on the generic task: events, leaving the pill stuck at "Queued" until completion. * Failure fallback — FailTask writes a chat_message tagged with failure_reason (mirrors the issue path's system comment, gated on retried==nil). Front-end renders an inline note ("Connection failed", with a Show details collapsible) instead of the previous black hole. * Elapsed timing — chat_message.elapsed_ms persists task.completed_at - task.created_at on success/failure rows. UI shows "Replied in 38s" / "Failed after 12s" beneath assistant bubbles. Format helper shared between StatusPill and the persisted caption so the live timer and final reading never disagree. * Optimistic burst rebalanced — pendingTask seed + created_at moved before the HTTP roundtrip so the pill appears the instant the user hits send; handleStop is fire-and-forget so cancel feels immediate (server confirmation arrives via task:cancelled WS). * Presence integration — chat avatars use ActorAvatar (status dot + hover card); OfflineBanner above the input on offline/unstable; SessionDropdown shows per-row in-flight/unread pip plus a cross-session aggregate pip on the closed trigger. * Editor blur on send so the caret stops competing with the StatusPill / streaming reply for the user's attention. * Chat panel isOpen now persists globally; defaults to OPEN for new users (storage key absence) so the feature is discoverable. Existing users' prior choice is respected. * DB: migrations 062 (failure_reason) + 063 (elapsed_ms), both ADD COLUMN NULL — fast, non-blocking, backwards compatible. * WS: task:failed chat path now invalidates chatKeys.messages — fixes a pre-existing bug where the failure bubble required a page refresh to appear. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-17 11:48:42 +02:00 · 2026-04-29 18:29:46 +08:00
parent 1fd583ef65
commit 4ad0a0b847
27 changed files with 936 additions and 94 deletions
--- a/packages/core/chat/store.ts
+++ b/packages/core/chat/store.ts
@@ -16,6 +16,14 @@ const CHAT_HEIGHT_KEY = "multica:chat:height";
 const CHAT_EXPANDED_KEY = "multica:chat:expanded";
 /** Focus mode is a personal preference — global across workspaces/sessions. */
 const FOCUS_MODE_KEY = "multica:chat:focusMode";
+/**
+ * Open/closed preference, persisted globally (not per-workspace) — most users
+ * have one habitual chat-panel preference across workspaces. Missing key =
+ * new user (or cleared storage); default to OPEN so the chat is discoverable.
+ * Once the user toggles even once, their explicit choice is respected on
+ * every subsequent reload.
+ */
+const OPEN_KEY = "multica:chat:isOpen";

 function readDrafts(storage: StorageAdapter, key: string): Record<string, string> {
  const raw = storage.getItem(key);
@@ -118,8 +126,14 @@ export function createChatStore(options: ChatStoreOptions) {
    return slug ? `${base}:${slug}` : base;
  };

+  // Resolve initial isOpen from storage. The three-state read (null /
+  // "true" / "false") is what enables the "new user → open" default while
+  // still honouring an explicit "I closed it" choice on every reload.
+  const storedOpen = storage.getItem(OPEN_KEY);
+  const initialIsOpen = storedOpen === null ? true : storedOpen === "true";
+
  const store = create<ChatState>((set, get) => ({
-    isOpen: false,
+    isOpen: initialIsOpen,
    activeSessionId: storage.getItem(wsKey(SESSION_STORAGE_KEY)),
    selectedAgentId: storage.getItem(wsKey(AGENT_STORAGE_KEY)),
    showHistory: false,
@@ -130,11 +144,13 @@ export function createChatStore(options: ChatStoreOptions) {
    isExpanded: storage.getItem(wsKey(CHAT_EXPANDED_KEY)) === "true",
    setOpen: (open) => {
      logger.debug("setOpen", { from: get().isOpen, to: open });
+      storage.setItem(OPEN_KEY, String(open));
      set({ isOpen: open });
    },
    toggle: () => {
      const next = !get().isOpen;
      logger.debug("toggle", { to: next });
+      storage.setItem(OPEN_KEY, String(next));
      set({ isOpen: next });
    },
    setActiveSession: (id) => {
--- a/packages/core/realtime/use-realtime-sync.ts
+++ b/packages/core/realtime/use-realtime-sync.ts
@@ -51,9 +51,13 @@ import type {
  SubscriberAddedPayload,
  SubscriberRemovedPayload,
  TaskMessagePayload,
+  TaskQueuedPayload,
+  TaskDispatchPayload,
  TaskCompletedPayload,
  TaskFailedPayload,
+  TaskCancelledPayload,
  ChatDonePayload,
+  ChatPendingTask,
  InvitationCreatedPayload,
 } from "../types";

@@ -525,6 +529,64 @@ export function useRealtimeSync(
      invalidateSessionLists();
    });

+    // Chat task lifecycle writethrough: keep `chatKeys.pendingTask(sessionId)`
+    // synchronized with the server state machine via setQueryData rather than
+    // invalidate-refetch. Same pattern as task:message — the WS payload
+    // carries everything we need, and an HTTP roundtrip just to read what we
+    // already know would add latency to every stage transition.
+    //
+    // task:queued is emitted by EnqueueChatTask. The optimistic seed in
+    // chat-window.tsx may have already populated the cache with a temporary
+    // id; this handler upgrades it to the real task_id (and reaffirms status
+    // when reconnect replays the event for an already-running task).
+    const unsubTaskQueued = ws.on("task:queued", (p) => {
+      const payload = p as TaskQueuedPayload;
+      if (!payload.chat_session_id) return;
+      qc.setQueryData<ChatPendingTask>(
+        chatKeys.pendingTask(payload.chat_session_id),
+        (old) => ({
+          ...(old ?? {}),
+          task_id: payload.task_id,
+          status: "queued",
+        }),
+      );
+      invalidatePendingAggregate();
+    });
+
+    // task:dispatch fires when the daemon claims the queued task. The daemon
+    // immediately follows with StartTask, so dispatched→running is sub-second.
+    // We collapse that window by writing "running" directly — the pill jumps
+    // from "Queued" straight to "Thinking", skipping a meaningless "Starting"
+    // frame. Stage decision in TaskStatusPill maps "running" + empty
+    // taskMessages → "Thinking · Ns".
+    const unsubTaskDispatch = ws.on("task:dispatch", (p) => {
+      const payload = p as TaskDispatchPayload;
+      if (!payload.chat_session_id) return;
+      qc.setQueryData<ChatPendingTask>(
+        chatKeys.pendingTask(payload.chat_session_id),
+        (old) => {
+          if (!old || old.task_id !== payload.task_id) return old;
+          return { ...old, status: "running" };
+        },
+      );
+    });
+
+    // task:cancelled reaches us when:
+    //   1. handleStop already cleared the cache locally (this is a no-op confirm)
+    //   2. another tab / admin / system cancels — this is the only path that
+    //      drops the pending pill in those cases. Without it the pill spins
+    //      forever in the second-tab scenario.
+    const unsubTaskCancelled = ws.on("task:cancelled", (p) => {
+      const payload = p as TaskCancelledPayload;
+      if (!payload.chat_session_id) return;
+      chatWsLogger.info("task:cancelled (global, chat)", {
+        task_id: payload.task_id,
+        chat_session_id: payload.chat_session_id,
+      });
+      qc.setQueryData(chatKeys.pendingTask(payload.chat_session_id), {});
+      invalidatePendingAggregate();
+    });
+
    const unsubTaskCompleted = ws.on("task:completed", (p) => {
      const payload = p as TaskCompletedPayload;
      if (!payload.chat_session_id) return; // issue tasks handled elsewhere
@@ -545,8 +607,14 @@ export function useRealtimeSync(
        task_id: payload.task_id,
        chat_session_id: payload.chat_session_id,
      });
-      // No new message; just flip the pending signal.
+      // FailTask writes a failure chat_message (mirroring CompleteTask's
+      // success message), so this path mirrors the task:completed handler:
+      // clear the pending signal AND invalidate the messages list so the
+      // failure bubble shows up without requiring a page refresh. Pre-#1823
+      // this branch only flipped pending — the comment "No new message"
+      // was true then, but FailTask now persists a row.
      qc.setQueryData(chatKeys.pendingTask(payload.chat_session_id), {});
+      qc.invalidateQueries({ queryKey: chatKeys.messages(payload.chat_session_id) });
      qc.invalidateQueries({ queryKey: chatKeys.pendingTask(payload.chat_session_id) });
      invalidatePendingAggregate();
    });
@@ -584,6 +652,9 @@ export function useRealtimeSync(
      unsubTaskMessage();
      unsubChatMessage();
      unsubChatDone();
+      unsubTaskQueued();
+      unsubTaskDispatch();
+      unsubTaskCancelled();
      unsubTaskCompleted();
      unsubTaskFailed();
      unsubChatSessionRead();
--- a/packages/core/types/chat.ts
+++ b/packages/core/types/chat.ts
@@ -28,18 +28,48 @@ export interface ChatMessage {
  content: string;
  task_id: string | null;
  created_at: string;
+  /**
+   * When set, this is an assistant message synthesized by the server's
+   * FailTask fallback (mirrors the issue path's failure system comment).
+   * `content` carries the raw daemon-reported errMsg; the front-end maps
+   * `failure_reason` (an enum like "agent_error" / "connection_error" /
+   * "timeout") to a user-facing label and renders a destructive bubble.
+   * Null on success messages and on user messages.
+   */
+  failure_reason?: string | null;
+  /**
+   * Wall-clock duration from `task.created_at` (user hit send) to terminal
+   * state (completed/failed). Set by the server on assistant messages
+   * synthesized by CompleteTask/FailTask. UI renders it as "Replied in
+   * 38s" / "Failed after 12s" beneath the bubble. Null on user messages
+   * and on legacy assistant messages predating migration 063.
+   */
+  elapsed_ms?: number | null;
 }

 export interface SendChatMessageResponse {
  message_id: string;
  task_id: string;
+  /**
+   * Server-authoritative task creation time. Optimistic StatusPill seed
+   * uses this as its anchor so the timer starts from the real `0s` —
+   * without it the front-end falls back to its local clock and the
+   * timer "snaps backwards" later when WS events update the cache.
+   */
+  created_at: string;
 }

 /**
 * Response from GET /api/chat/sessions/{id}/pending-task.
- * Both fields are absent when the session has no in-flight task.
+ * All fields are absent when the session has no in-flight task.
+ *
+ * `created_at` is the server-authoritative anchor for the chat StatusPill's
+ * elapsed-seconds timer — the optimistic seed in chat-window.tsx fills in
+ * task_id/status only, then this query catches up with the real created_at
+ * so the timer survives refresh / reopen without "resetting to 0s".
 */
 export interface ChatPendingTask {
  task_id?: string;
  status?: string;
+  created_at?: string;
 }
--- a/packages/core/types/events.ts
+++ b/packages/core/types/events.ts
@@ -196,6 +196,22 @@ export interface TaskMessagePayload {
  output?: string;
 }

+export interface TaskQueuedPayload {
+  task_id: string;
+  agent_id: string;
+  issue_id: string;
+  chat_session_id?: string;
+  status: string;
+}
+
+export interface TaskDispatchPayload {
+  task_id: string;
+  agent_id: string;
+  issue_id: string;
+  runtime_id: string;
+  chat_session_id?: string;
+}
+
 export interface TaskCompletedPayload {
  task_id: string;
  agent_id: string;
--- a/packages/ui/components/common/unicode-spinner.tsx
+++ b/packages/ui/components/common/unicode-spinner.tsx
@@ -0,0 +1,47 @@
+"use client";
+
+import { useEffect, useState } from "react";
+import spinners, { type BrailleSpinnerName } from "unicode-animations";
+
+interface Props {
+  name?: BrailleSpinnerName;
+  className?: string;
+  /** Stop advancing frames without unmounting (e.g., when an outer state freezes). */
+  paused?: boolean;
+}
+
+// Inline-rendered braille spinner. Each frame is a unicode string from the
+// `unicode-animations` package; we tick frames on the spinner's own `interval`
+// and render the current one inside a fixed-width monospace span so different
+// frames never reflow neighbouring text. Width-jitter is the main reason this
+// component exists rather than dropping the raw strings into Tailwind classes.
+export function UnicodeSpinner({ name = "braille", className, paused }: Props) {
+  const spec = spinners[name];
+  const [frame, setFrame] = useState(0);
+
+  useEffect(() => {
+    if (paused) return;
+    setFrame(0);
+    const timer = setInterval(
+      () => setFrame((f) => (f + 1) % spec.frames.length),
+      spec.interval,
+    );
+    return () => clearInterval(timer);
+  }, [name, paused, spec]);
+
+  return (
+    <span
+      aria-hidden="true"
+      className={className}
+      style={{
+        fontFamily: "ui-monospace, SFMono-Regular, Menlo, Consolas, monospace",
+        display: "inline-block",
+        minWidth: "1ch",
+        textAlign: "center",
+        fontVariantNumeric: "tabular-nums",
+      }}
+    >
+      {spec.frames[frame]}
+    </span>
+  );
+}
--- a/packages/ui/package.json
+++ b/packages/ui/package.json
@@ -48,6 +48,7 @@
    "sonner": "^2.0.7",
    "tailwind-merge": "catalog:",
    "tw-animate-css": "^1.4.0",
+    "unicode-animations": "catalog:",
    "vaul": "^1.1.2"
  },
  "peerDependencies": {
--- a/packages/ui/styles/base.css
+++ b/packages/ui/styles/base.css
@@ -83,6 +83,37 @@
  animation: chat-impulse 1.6s ease-in-out infinite;
 }

+/* ChatGPT-style "thinking" shimmer for inline text — a soft light sweep
+ * runs across the glyphs, signalling "the agent is doing something" without
+ * a separate spinner. Pure CSS: linear-gradient clipped to the text shape,
+ * the gradient slid across via background-position. Uses the same muted →
+ * foreground tokens chat copy normally uses, so the effect adapts to light
+ * and dark mode without per-mode overrides.
+ *
+ * Apply to a <span> wrapping the label only — not the whole pill, since
+ * the timer counter and Cancel button shouldn't shimmer. */
+@keyframes chat-text-shimmer {
+  0% { background-position: 200% 0; }
+  100% { background-position: -200% 0; }
+}
+
+.animate-chat-text-shimmer {
+  background-image: linear-gradient(
+    90deg,
+    var(--muted-foreground) 0%,
+    var(--muted-foreground) 35%,
+    var(--foreground) 50%,
+    var(--muted-foreground) 65%,
+    var(--muted-foreground) 100%
+  );
+  background-size: 200% 100%;
+  background-clip: text;
+  -webkit-background-clip: text;
+  color: transparent;
+  -webkit-text-fill-color: transparent;
+  animation: chat-text-shimmer 2.5s linear infinite;
+}
+
 /* Sidebar: open triggers (dropdown/popover) get active background */
 [data-sidebar="menu-button"][data-popup-open] {
  background-color: var(--sidebar-accent);
--- a/packages/views/chat/components/chat-input.tsx
+++ b/packages/views/chat/components/chat-input.tsx
@@ -69,6 +69,14 @@ export function ChatInput({
    logger.info("input.send", { contentLength: content.length, draftKey: keyAtSend });
    onSend(content);
    editorRef.current?.clearContent();
+    // Drop focus so the caret doesn't keep blinking under the StatusPill /
+    // streaming reply that's about to take over the user's attention. The
+    // input is also `disabled` once isRunning flips, and a focused-but-
+    // disabled editor reads as a stale cursor. We deliberately don't auto-
+    // refocus on completion — that would interrupt the user if they're
+    // selecting text from the assistant reply; one click to refocus is
+    // a fair price for not stealing focus mid-action.
+    editorRef.current?.blur();
    clearInputDraft(keyAtSend);
    setIsEmpty(true);
  };
--- a/packages/views/chat/components/chat-message-list.tsx
+++ b/packages/views/chat/components/chat-message-list.tsx
@@ -9,36 +9,49 @@ import {
  CollapsibleContent,
  CollapsibleTrigger,
 } from "@multica/ui/components/ui/collapsible";
-import { Loader2, ChevronRight, ChevronDown, Brain, AlertCircle } from "lucide-react";
+import { ChevronRight, ChevronDown, Brain, AlertCircle, AlertTriangle } from "lucide-react";
 import { useScrollFade } from "@multica/ui/hooks/use-scroll-fade";
 import { useAutoScroll } from "@multica/ui/hooks/use-auto-scroll";
 import { taskMessagesOptions } from "@multica/core/chat/queries";
 import { Markdown } from "@multica/views/common/markdown";
-import type { ChatMessage, TaskMessagePayload } from "@multica/core/types";
+import type { AgentAvailability } from "@multica/core/agents";
+import type { ChatMessage, ChatPendingTask, TaskMessagePayload, TaskFailureReason } from "@multica/core/types";
 import type { ChatTimelineItem } from "@multica/core/chat";
+import { failureReasonLabel } from "../../agents/components/tabs/task-failure";
+import { TaskStatusPill } from "./task-status-pill";
+import { formatElapsedMs } from "../lib/format";

 // ─── Public component ────────────────────────────────────────────────────

 interface ChatMessageListProps {
  messages: ChatMessage[];
-  /** When set, streams the live timeline for this task from task-messages cache. */
-  pendingTaskId: string | null;
-  isWaiting: boolean;
+  /**
+   * Server-authoritative pending-task snapshot. `null` / undefined means
+   * no in-flight task — list renders without StatusPill.
+   */
+  pendingTask: ChatPendingTask | null | undefined;
+  /** Resolved presence; pass `undefined` while loading to keep the pill copy neutral. */
+  availability: AgentAvailability | undefined;
+  /** Cancel handler exposed by the StatusPill once the task crosses the long-run threshold. */
+  onCancel?: () => void;
 }

 export function ChatMessageList({
  messages,
-  pendingTaskId,
-  isWaiting,
+  pendingTask,
+  availability,
+  onCancel,
 }: ChatMessageListProps) {
  const scrollRef = useRef<HTMLDivElement>(null);
  const fadeStyle = useScrollFade(scrollRef);
  useAutoScroll(scrollRef);

+  const pendingTaskId = pendingTask?.task_id ?? null;
+
  // Once the assistant message for this pending task has landed in the
  // messages list, AssistantMessage owns its rendering — suppress the live
-  // timeline to avoid rendering the same content in two places during the
-  // invalidate → refetch window.
+  // timeline (and pill) to avoid rendering the same content in two places
+  // during the invalidate → refetch window.
  const pendingAlreadyPersisted = !!pendingTaskId && messages.some(
    (m) => m.role === "assistant" && m.task_id === pendingTaskId,
  );
@@ -52,6 +65,7 @@ export function ChatMessageList({
  });
  const liveTimeline: ChatTimelineItem[] = (liveTaskMessages ?? []).map(toTimelineItem);
  const hasLive = showLiveTimeline && liveTimeline.length > 0;
+  const showStatusPill = !!pendingTaskId && !pendingAlreadyPersisted && !!pendingTask;

  return (
    <div ref={scrollRef} style={fadeStyle} className="flex-1 overflow-y-auto">
@@ -68,8 +82,13 @@ export function ChatMessageList({
            <TimelineView items={liveTimeline} />
          </div>
        )}
-        {isWaiting && !hasLive && !pendingAlreadyPersisted && (
-          <Loader2 className="size-4 animate-spin text-muted-foreground" />
+        {showStatusPill && pendingTask && (
+          <TaskStatusPill
+            pendingTask={pendingTask}
+            taskMessages={liveTaskMessages ?? []}
+            availability={availability}
+            onCancel={onCancel}
+          />
        )}
      </div>
    </div>
@@ -153,6 +172,21 @@ function AssistantMessage({

  const timeline: ChatTimelineItem[] = (taskMessages ?? []).map(toTimelineItem);

+  // Failure bubble path: when the server's FailTask wrote a failure
+  // chat_message (failure_reason set), render a destructive bubble with the
+  // human-readable reason label + collapsible raw errMsg + the same timeline
+  // so the user can see exactly where the run broke.
+  if (message.failure_reason) {
+    return (
+      <FailureBubble
+        reason={message.failure_reason}
+        rawError={message.content}
+        timeline={timeline}
+        elapsedMs={message.elapsed_ms}
+      />
+    );
+  }
+
  return (
    <div className="w-full space-y-1.5">
      {timeline.length > 0 ? (
@@ -162,6 +196,86 @@ function AssistantMessage({
          <Markdown>{message.content}</Markdown>
        </div>
      )}
+      {message.elapsed_ms != null && (
+        <ElapsedCaption verb="Replied in" elapsedMs={message.elapsed_ms} />
+      )}
+    </div>
+  );
+}
+
+// Persisted "Replied in 38s" / "Failed after 12s" line under the assistant
+// bubble. Reads `elapsed_ms` straight off the chat_message — server computes
+// it once at task completion, so this caption is identical across reloads
+// and devices. Skipped silently when null (legacy messages predating
+// migration 063 + user messages).
+function ElapsedCaption({
+  verb,
+  elapsedMs,
+  className,
+}: {
+  verb: string;
+  elapsedMs: number;
+  className?: string;
+}) {
+  return (
+    <div className={cn("text-[11px] text-muted-foreground/80", className)}>
+      {verb} {formatElapsedMs(elapsedMs)}
+    </div>
+  );
+}
+
+function FailureBubble({
+  reason,
+  rawError,
+  timeline,
+  elapsedMs,
+}: {
+  reason: string;
+  rawError: string;
+  timeline: ChatTimelineItem[];
+  elapsedMs?: number | null;
+}) {
+  const [open, setOpen] = useState(false);
+  // Map the back-end enum to copy via the shared label table; an unknown
+  // reason (e.g. a future enum value the front-end doesn't ship yet)
+  // falls back to a generic "Task failed" so we never render a bare slug.
+  const label =
+    failureReasonLabel[reason as TaskFailureReason] ?? "Task failed";
+
+  return (
+    <div className="w-full space-y-1.5">
+      {/* Failure read as an inline, low-key note — not a destructive
+       *  alert. Intentionally borderless / no background tint: a chat
+       *  failure is informational ("this didn't work"), not a system
+       *  error. The icon + muted destructive text are signal enough,
+       *  the rest stays in the normal reply rhythm. */}
+      <div className="flex items-start gap-1.5 text-sm">
+        <AlertTriangle className="size-3.5 shrink-0 text-destructive/80 mt-0.5" />
+        <div className="flex-1 min-w-0">
+          <div className="text-destructive/90">{label}</div>
+          {rawError.trim() && (
+            <Collapsible open={open} onOpenChange={setOpen}>
+              <CollapsibleTrigger className="mt-0.5 flex items-center gap-1 text-[11px] text-muted-foreground hover:text-foreground transition-colors">
+                {open ? (
+                  <ChevronDown className="size-3" />
+                ) : (
+                  <ChevronRight className="size-3" />
+                )}
+                <span>Show details</span>
+              </CollapsibleTrigger>
+              <CollapsibleContent>
+                <pre className="mt-1 max-h-40 overflow-auto rounded bg-muted/40 p-2 text-[11px] text-muted-foreground whitespace-pre-wrap break-all">
+                  {rawError}
+                </pre>
+              </CollapsibleContent>
+            </Collapsible>
+          )}
+        </div>
+      </div>
+      {timeline.length > 0 && <TimelineView items={timeline} />}
+      {elapsedMs != null && (
+        <ElapsedCaption verb="Failed after" elapsedMs={elapsedMs} />
+      )}
    </div>
  );
 }
--- a/packages/views/chat/components/chat-window.tsx
+++ b/packages/views/chat/components/chat-window.tsx
@@ -2,8 +2,7 @@

 import React, { useCallback, useEffect, useMemo, useRef } from "react";
 import { useQuery, useQueryClient } from "@tanstack/react-query";
-import { Minus, Maximize2, Minimize2, ChevronDown, Bot, Plus, Check } from "lucide-react";
-import { Avatar, AvatarFallback, AvatarImage } from "@multica/ui/components/ui/avatar";
+import { Minus, Maximize2, Minimize2, ChevronDown, Plus, Check } from "lucide-react";
 import { Button } from "@multica/ui/components/ui/button";
 import { Tooltip, TooltipTrigger, TooltipContent } from "@multica/ui/components/ui/tooltip";
 import {
@@ -20,11 +19,15 @@ import { useAuthStore } from "@multica/core/auth";
 import { agentListOptions, memberListOptions } from "@multica/core/workspace/queries";
 import { canAssignAgent } from "@multica/views/issues/components";
 import { api } from "@multica/core/api";
+import { useAgentPresenceDetail } from "@multica/core/agents";
+import { ActorAvatar } from "../../common/actor-avatar";
+import { OfflineBanner } from "./offline-banner";
 import {
  chatSessionsOptions,
  allChatSessionsOptions,
  chatMessagesOptions,
  pendingChatTaskOptions,
+  pendingChatTasksOptions,
  chatKeys,
 } from "@multica/core/chat/queries";
 import { useCreateChatSession, useMarkChatSessionRead } from "@multica/core/chat/mutations";
@@ -40,7 +43,7 @@ import {
 import { ChatResizeHandles } from "./chat-resize-handles";
 import { useChatResize } from "./use-chat-resize";
 import { createLogger } from "@multica/core/logger";
-import type { Agent, ChatMessage, ChatSession } from "@multica/core/types";
+import type { Agent, ChatMessage, ChatPendingTask, ChatSession } from "@multica/core/types";

 const uiLogger = createLogger("chat.ui");
 const apiLogger = createLogger("chat.api");
@@ -100,6 +103,15 @@ export function ChatWindow() {
    availableAgents[0] ??
    null;

+  // Presence drives both the avatar status dot (via ActorAvatar) and the
+  // OfflineBanner / TaskStatusPill availability copy. `useAgentPresenceDetail`
+  // returns "loading" while queries are still resolving — pass `undefined`
+  // downstream so banners and pill copy stay silent during loading rather
+  // than flash speculative offline text.
+  const presenceDetail = useAgentPresenceDetail(wsId, activeAgent?.id);
+  const availability =
+    presenceDetail === "loading" ? undefined : presenceDetail.availability;
+
  // Mount / unmount logging. ChatWindow lives in DashboardLayout, so this
  // fires on layout mount (login / workspace switch / fresh page load).
  useEffect(() => {
@@ -119,28 +131,11 @@ export function ChatWindow() {
    // eslint-disable-next-line react-hooks/exhaustive-deps -- once per mount
  }, []);

-  // Auto-restore most recent active session from server (only once on mount)
-  const didRestoreRef = useRef(false);
-  useEffect(() => {
-    if (didRestoreRef.current) return;
-    didRestoreRef.current = true;
-    if (activeSessionId || sessions.length === 0) {
-      uiLogger.debug("restore session skipped", {
-        reason: activeSessionId ? "already has session" : "no sessions",
-        activeSessionId,
-        sessionCount: sessions.length,
-      });
-      return;
-    }
-    const latest = sessions.find((s) => s.status === "active");
-    if (latest) {
-      uiLogger.info("restore session on mount", { sessionId: latest.id });
-      setActiveSession(latest.id);
-    } else {
-      uiLogger.debug("restore session: no active session found");
-    }
-    // eslint-disable-next-line react-hooks/exhaustive-deps -- run once when sessions load
-  }, [sessions]);
+  // Open intent is fully driven by `activeSessionId` in storage — no mount
+  // restore, no self-heal. Adding either reintroduces a "two signals
+  // describing one fact" race (the previous self-heal mis-cleared the
+  // freshly-created session because allSessions was still stale during the
+  // post-create invalidate-refetch window).

  // WS events are handled globally in useRealtimeSync — the query cache
  // stays current even when this window is closed. See packages/core/realtime/.
@@ -197,19 +192,34 @@ export function ChatWindow() {
        setActiveSession(sessionId);
      }

-      // Optimistic: show user message immediately.
+      // Optimistic burst — everything that gives the user "I sent a message
+      // and the agent is now working" feedback fires BEFORE the HTTP roundtrip.
+      // Pre-#status-pill the pending-task seed lived after `await
+      // sendChatMessage` and the pill blinked in a few hundred ms after the
+      // user's message — small but visible "did it actually send?" gap.
+      const sentAt = new Date().toISOString();
      const optimistic: ChatMessage = {
        id: `optimistic-${Date.now()}`,
        chat_session_id: sessionId,
        role: "user",
        content: finalContent,
        task_id: null,
-        created_at: new Date().toISOString(),
+        created_at: sentAt,
      };
      qc.setQueryData<ChatMessage[]>(
        chatKeys.messages(sessionId),
        (old) => (old ? [...old, optimistic] : [optimistic]),
      );
+      // Seed the pending-task with a temporary id so the StatusPill mounts
+      // and starts ticking the instant the user clicks send. Real task_id
+      // and server-authoritative created_at land below; until then the pill
+      // is anchored to the local clock (drift is the request RTT, ~50–200ms,
+      // which doesn't change the rendered "Ns" value).
+      qc.setQueryData<ChatPendingTask>(chatKeys.pendingTask(sessionId), {
+        task_id: `optimistic-${optimistic.id}`,
+        status: "queued",
+        created_at: sentAt,
+      });
      apiLogger.debug("sendChatMessage.optimistic", { sessionId, optimisticId: optimistic.id });

      const result = await api.sendChatMessage(sessionId, finalContent);
@@ -218,11 +228,13 @@ export function ChatWindow() {
        messageId: result.message_id,
        taskId: result.task_id,
      });
-      // Seed pending-task optimistically so the spinner shows instantly —
-      // the WS chat:message handler will invalidate + refetch to confirm.
-      qc.setQueryData(chatKeys.pendingTask(sessionId), {
+      // Replace the temporary task_id with the server's real one (so the WS
+      // task: handlers can match against it) and snap the anchor to the
+      // server's created_at — keeping the elapsed-seconds reading stable.
+      qc.setQueryData<ChatPendingTask>(chatKeys.pendingTask(sessionId), {
        task_id: result.task_id,
        status: "queued",
+        created_at: result.created_at,
      });
      qc.invalidateQueries({ queryKey: chatKeys.messages(sessionId) });
    },
@@ -236,24 +248,30 @@ export function ChatWindow() {
    ],
  );

-  const handleStop = useCallback(async () => {
-    if (!pendingTaskId) {
+  const handleStop = useCallback(() => {
+    if (!pendingTaskId || !activeSessionId) {
      apiLogger.debug("cancelTask skipped: no pending task");
      return;
    }
+    // Optimistic clear — pill disappears + input unlocks the moment the
+    // user clicks Stop, instead of after the HTTP roundtrip. WS
+    // task:cancelled will confirm later (no-op if cache is already empty);
+    // if the cancel POST fails because the task already finished, the
+    // assistant message arrives via task:completed → chat:done and renders
+    // normally. Either way the UI is in sync with reality without latency.
    apiLogger.info("cancelTask.start", { taskId: pendingTaskId, sessionId: activeSessionId });
-    try {
-      await api.cancelTaskById(pendingTaskId);
-      apiLogger.info("cancelTask.success", { taskId: pendingTaskId });
-    } catch (err) {
-      // Task may already be completed
-      apiLogger.warn("cancelTask.error (task may have already finished)", { taskId: pendingTaskId, err });
-    }
-    if (activeSessionId) {
-      // Clear pending immediately; WS task:cancelled will confirm.
-      qc.setQueryData(chatKeys.pendingTask(activeSessionId), {});
-      qc.invalidateQueries({ queryKey: chatKeys.messages(activeSessionId) });
-    }
+    qc.setQueryData(chatKeys.pendingTask(activeSessionId), {});
+    qc.invalidateQueries({ queryKey: chatKeys.messages(activeSessionId) });
+    // Fire-and-forget — UI is already in its post-cancel state. We log the
+    // outcome but never block on it.
+    api.cancelTaskById(pendingTaskId).then(
+      () => apiLogger.info("cancelTask.success", { taskId: pendingTaskId }),
+      (err) =>
+        apiLogger.warn("cancelTask.error (task may have already finished)", {
+          taskId: pendingTaskId,
+          err,
+        }),
+    );
  }, [pendingTaskId, activeSessionId, qc]);

  const handleSelectAgent = useCallback(
@@ -402,8 +420,9 @@ export function ChatWindow() {
      ) : hasMessages ? (
        <ChatMessageList
          messages={messages}
-          pendingTaskId={pendingTaskId}
-          isWaiting={!!pendingTaskId}
+          pendingTask={pendingTask}
+          availability={availability}
+          onCancel={handleStop}
        />
      ) : (
        <EmptyState
@@ -412,6 +431,12 @@ export function ChatWindow() {
        />
      )}

+      {/* Presence banner sits above the input card (not inside topSlot) so
+       *  the "offline / unstable" hint reads as a global session signal,
+       *  not an attachment to the message being composed. ContextAnchorCard
+       *  stays in topSlot because that's per-message context. */}
+      <OfflineBanner agentName={activeAgent?.name} availability={availability} />
+
      {/* Input — disabled for archived sessions */}
      <ChatInput
        onSend={handleSend}
@@ -469,7 +494,13 @@ function AgentDropdown({
  return (
    <DropdownMenu>
      <DropdownMenuTrigger className="flex items-center gap-1.5 rounded-md px-1.5 py-1 -ml-1 cursor-pointer outline-none transition-colors hover:bg-accent aria-expanded:bg-accent">
-        <AgentAvatarSmall agent={activeAgent} />
+        <ActorAvatar
+          actorType="agent"
+          actorId={activeAgent.id}
+          size={24}
+          enableHoverCard
+          showStatusDot
+        />
        <span className="text-xs font-medium max-w-28 truncate">{activeAgent.name}</span>
        <ChevronDown className="size-3 text-muted-foreground shrink-0" />
      </DropdownMenuTrigger>
@@ -520,7 +551,13 @@ function AgentMenuItem({
      onClick={() => onSelect(agent)}
      className="flex min-w-0 items-center gap-2"
    >
-      <AgentAvatarSmall agent={agent} />
+      <ActorAvatar
+        actorType="agent"
+        actorId={agent.id}
+        size={24}
+        enableHoverCard
+        showStatusDot
+      />
      <span className="truncate flex-1">{agent.name}</span>
      {isCurrent && <Check className="size-3.5 text-muted-foreground shrink-0" />}
    </DropdownMenuItem>
@@ -545,16 +582,60 @@ function SessionDropdown({
  activeSessionId: string | null;
  onSelectSession: (session: ChatSession) => void;
 }) {
+  const wsId = useWorkspaceId();
  const agentById = useMemo(() => new Map(agents.map((a) => [a.id, a])), [agents]);
  const activeSession = sessions.find((s) => s.id === activeSessionId);
  const title = activeSession?.title?.trim() || "New chat";
  const triggerAgent = activeSession ? agentById.get(activeSession.agent_id) ?? null : null;

+  // Aggregate "which sessions have an in-flight task right now". Reuses
+  // the same workspace-scoped query the FAB consumes, so toggling the chat
+  // window doesn't fire a second request — TanStack dedupes by key.
+  const { data: pending } = useQuery(pendingChatTasksOptions(wsId));
+  const inFlightSessionIds = useMemo(
+    () => new Set((pending?.tasks ?? []).map((t) => t.chat_session_id)),
+    [pending],
+  );
+
+  // Cross-session aggregate signal for the closed-dropdown trigger.
+  // "Active" here means there's something interesting happening in a
+  // session OTHER than the one the user is currently looking at — the
+  // user already sees their own session's state via the StatusPill /
+  // unread auto-mark, so highlighting it on the trigger would be noise.
+  // Same priority rule as the row pips: running > unread.
+  const otherSessionRunning = sessions.some(
+    (s) => s.id !== activeSessionId && inFlightSessionIds.has(s.id),
+  );
+  const otherSessionUnread = sessions.some(
+    (s) => s.id !== activeSessionId && s.has_unread,
+  );
+
  return (
    <DropdownMenu>
      <DropdownMenuTrigger className="flex items-center gap-1.5 min-w-0 rounded-md px-1.5 py-1 transition-colors hover:bg-accent aria-expanded:bg-accent">
-        {triggerAgent && <AgentAvatarSmall agent={triggerAgent} />}
+        {triggerAgent && (
+          <ActorAvatar
+            actorType="agent"
+            actorId={triggerAgent.id}
+            size={24}
+            enableHoverCard
+            showStatusDot
+          />
+        )}
        <span className="truncate text-sm font-medium">{title}</span>
+        {otherSessionRunning ? (
+          <span
+            aria-label="Another chat is running"
+            title="Another chat is running"
+            className="size-1.5 shrink-0 rounded-full bg-amber-500 animate-pulse"
+          />
+        ) : otherSessionUnread ? (
+          <span
+            aria-label="Another chat has unread replies"
+            title="Another chat has unread replies"
+            className="size-1.5 shrink-0 rounded-full bg-brand"
+          />
+        ) : null}
        <ChevronDown className="size-3 text-muted-foreground shrink-0" />
      </DropdownMenuTrigger>
      <DropdownMenuContent align="start" className="max-h-80 w-auto min-w-56 max-w-80">
@@ -566,6 +647,7 @@ function SessionDropdown({
          sessions.map((session) => {
            const isCurrent = session.id === activeSessionId;
            const agent = agentById.get(session.agent_id) ?? null;
+            const isRunning = inFlightSessionIds.has(session.id);
            return (
              <DropdownMenuItem
                key={session.id}
@@ -573,16 +655,38 @@ function SessionDropdown({
                className="flex min-w-0 items-center gap-2"
              >
                {agent ? (
-                  <AgentAvatarSmall agent={agent} />
+                  <ActorAvatar
+                    actorType="agent"
+                    actorId={agent.id}
+                    size={24}
+                    enableHoverCard
+                    showStatusDot
+                  />
                ) : (
                  <span className="size-6 shrink-0" />
                )}
                <span className="truncate flex-1 text-sm">
                  {session.title?.trim() || "New chat"}
                </span>
-                {session.has_unread && (
-                  <span className="size-1.5 shrink-0 rounded-full bg-brand" />
-                )}
+                {/* Right-edge status pip: in-flight wins over unread because
+                 *  "still working" is more actionable than "has reply" — and
+                 *  the two rarely coexist in practice (the unread flag fires
+                 *  on chat_message write, by which point the task has just
+                 *  finished). Same pip shape as unread for visual rhythm,
+                 *  amber + pulse to read as activity. */}
+                {isRunning ? (
+                  <span
+                    aria-label="Running"
+                    title="Running"
+                    className="size-1.5 shrink-0 rounded-full bg-amber-500 animate-pulse"
+                  />
+                ) : session.has_unread ? (
+                  <span
+                    aria-label="Unread"
+                    title="Unread"
+                    className="size-1.5 shrink-0 rounded-full bg-brand"
+                  />
+                ) : null}
                {isCurrent && <Check className="size-3.5 text-muted-foreground shrink-0" />}
              </DropdownMenuItem>
            );
@@ -593,17 +697,6 @@ function SessionDropdown({
  );
 }

-function AgentAvatarSmall({ agent }: { agent: Agent }) {
-  return (
-    <Avatar className="size-6">
-      {agent.avatar_url && <AvatarImage src={agent.avatar_url} />}
-      <AvatarFallback className="bg-purple-100 text-purple-700">
-        <Bot className="size-3.5" />
-      </AvatarFallback>
-    </Avatar>
-  );
-}
-
 /**
 * Three starter prompts shown on the empty state. Tapping one sends it
 * immediately — ChatGPT-style — because the point is showing users what
--- a/packages/views/chat/components/offline-banner.tsx
+++ b/packages/views/chat/components/offline-banner.tsx
@@ -0,0 +1,54 @@
+"use client";
+
+import { AlertCircle, WifiOff } from "lucide-react";
+import type { AgentAvailability } from "@multica/core/agents";
+
+interface Props {
+  /** Display name shown in the banner copy. */
+  agentName?: string;
+  /**
+   * Resolved presence availability. Pass `undefined` (or "loading") to
+   * suppress the banner — we only surface known offline / unstable states,
+   * never speculative copy.
+   */
+  availability: AgentAvailability | undefined;
+}
+
+// Inline notice rendered above the chat input when the active agent isn't
+// reachable. Hides on `online`, `undefined`, or while presence is loading —
+// users get the silent default behaviour and only see copy when there's a
+// real-world implication for the message they're about to send.
+//
+// Sits outside the input card (sibling of ChatInput) so the hint reads as
+// a session-level signal rather than per-message context. The outer wrapper
+// (`px-5`) and the inner container (`mx-auto max-w-4xl`) mirror ChatInput's
+// own layout so the banner's edges line up with the input box on every
+// viewport size — without `max-w-4xl` the banner stretches wider than the
+// input on large screens and looks "loose".
+export function OfflineBanner({ agentName, availability }: Props) {
+  if (availability !== "offline" && availability !== "unstable") return null;
+
+  const name = agentName?.trim() || "the agent";
+  if (availability === "unstable") {
+    return (
+      <div className="px-5 mb-1.5">
+        <div className="mx-auto flex w-full max-w-4xl items-center gap-1.5 rounded-md px-2.5 py-1.5 text-xs bg-amber-50 dark:bg-amber-950/40 text-amber-900 dark:text-amber-200 ring-1 ring-amber-200/60 dark:ring-amber-900/40">
+          <AlertCircle className="size-3.5 shrink-0" />
+          <span className="truncate">
+            {name}&apos;s connection is unstable — replies may be delayed.
+          </span>
+        </div>
+      </div>
+    );
+  }
+  return (
+    <div className="px-5 mb-1.5">
+      <div className="mx-auto flex w-full max-w-4xl items-center gap-1.5 rounded-md px-2.5 py-1.5 text-xs bg-muted text-muted-foreground ring-1 ring-border">
+        <WifiOff className="size-3.5 shrink-0" />
+        <span className="truncate">
+          {name} is offline — your message will be delivered when they&apos;re back.
+        </span>
+      </div>
+    </div>
+  );
+}
--- a/packages/views/chat/components/task-status-pill.tsx
+++ b/packages/views/chat/components/task-status-pill.tsx
@@ -0,0 +1,209 @@
+"use client";
+
+import { useEffect, useRef, useState } from "react";
+import { X } from "lucide-react";
+import { cn } from "@multica/ui/lib/utils";
+import { UnicodeSpinner } from "@multica/ui/components/common/unicode-spinner";
+import type { BrailleSpinnerName } from "unicode-animations";
+import type { AgentAvailability } from "@multica/core/agents";
+import type { ChatPendingTask, TaskMessagePayload } from "@multica/core/types";
+import { formatElapsedSecs } from "../lib/format";
+
+interface Props {
+  /** Server-authoritative pending-task snapshot (`created_at` anchors the timer). */
+  pendingTask: ChatPendingTask;
+  /** Live task-message stream — the latest non-error entry decides the running-stage label. */
+  taskMessages: readonly TaskMessagePayload[];
+  /** Resolved presence; pass `undefined` to suppress availability hints. */
+  availability: AgentAvailability | undefined;
+  /** When set, `onCancel` is exposed once the task crosses the long-run threshold. */
+  onCancel?: () => void;
+}
+
+interface Stage {
+  /** Standalone label, capitalised so it reads as a complete short phrase
+   *  ("Searching the web · 14s") without needing a subject. Matches the
+   *  ChatGPT / Cursor / Claude style — the agent identity is already on
+   *  the chat header, so we don't repeat it inline. */
+  label: string;
+  /** null = static (offline / unstable spinning would feel anxious). */
+  spinner: BrailleSpinnerName | null;
+  /** Stage represents a stable holding state (offline / waiting). When true,
+   *  the label is rendered without the shimmer animation — shimmer implies
+   *  "the agent is actively doing something", which a holding state isn't. */
+  static?: boolean;
+}
+
+// Tool → label. Short, action-flavoured phrases — the daemon-reported tool
+// slug is meaningful but ugly ("ToolUse: read"); these are the user-facing
+// translations. Unknown tools fall back to "Working" rather than leaking
+// the raw slug.
+const TOOL_STAGES: Record<string, Stage> = {
+  bash: { label: "Running a command", spinner: "helix" },
+  exec: { label: "Running a command", spinner: "helix" },
+  read: { label: "Reading files", spinner: "scan" },
+  glob: { label: "Reading files", spinner: "scan" },
+  grep: { label: "Searching the code", spinner: "scan" },
+  write: { label: "Making edits", spinner: "cascade" },
+  edit: { label: "Making edits", spinner: "cascade" },
+  multi_edit: { label: "Making edits", spinner: "cascade" },
+  multiedit: { label: "Making edits", spinner: "cascade" },
+  web_search: { label: "Searching the web", spinner: "orbit" },
+  websearch: { label: "Searching the web", spinner: "orbit" },
+};
+
+const STAGE_FALLBACK: Stage = { label: "Working", spinner: "helix" };
+
+// During the first-token gap (status=running but no task_message yet)
+// the agent could be loading the model, opening an API session, or
+// actually reasoning. Rotating the label by elapsed seconds — instead
+// of pinning a single "Thinking..." — makes the wait feel progressive
+// without claiming what the model is literally doing. Boundaries are
+// tiered (each label implies "this is taking a bit longer") rather
+// than randomised, which would jitter on every render.
+function pickThinkingLabel(elapsedSecs: number): string {
+  if (elapsedSecs < 5) return "Thinking";
+  if (elapsedSecs < 15) return "Reasoning";
+  if (elapsedSecs < 30) return "Working through it";
+  return "Taking a closer look";
+}
+
+// Pure stage decision. Two-tier signal: presence + status drive the
+// queued/wait copy, then taskMessages drive the running-state label.
+// Errors deliberately don't flip the pill — the timeline already renders
+// the error inline, and overwriting the label would mask whatever the
+// agent does next.
+function pickStage(
+  status: string | undefined,
+  taskMessages: readonly TaskMessagePayload[],
+  availability: AgentAvailability | undefined,
+  elapsedSecs: number,
+): Stage {
+  if (
+    (status === "queued" || status === "dispatched") &&
+    availability === "offline"
+  ) {
+    return { label: "Offline", spinner: null, static: true };
+  }
+  if (
+    (status === "queued" || status === "dispatched") &&
+    availability === "unstable"
+  ) {
+    return { label: "Reconnecting", spinner: "pulse" };
+  }
+  if (status === "queued") return { label: "Queued", spinner: "pulse" };
+  if (status === "dispatched") return { label: "Starting up", spinner: "breathe" };
+
+  // running: latest meaningful message decides the label. We deliberately
+  // skip both `error` rows (rendered inline by the timeline; flipping the
+  // pill would mask the next real action) and `tool_result` rows
+  // (tool_result is the completion event for a tool_use, not a new stage —
+  // treating it as one made the pill flicker bash → Thinking → grep →
+  // Thinking → web_search on every tool boundary, where reality is just
+  // bash → grep → web_search).
+  let latest: TaskMessagePayload | null = null;
+  for (let i = taskMessages.length - 1; i >= 0; i--) {
+    const m = taskMessages[i];
+    if (m && m.type !== "error" && m.type !== "tool_result") {
+      latest = m;
+      break;
+    }
+  }
+
+  // No task_message yet — first-token delay. Rotate the thinking label
+  // by elapsed so the user perceives progressive waiting rather than
+  // a stuck "Thinking..." loop.
+  if (!latest) {
+    return { label: pickThinkingLabel(elapsedSecs), spinner: "breathe" };
+  }
+
+  if (latest.type === "thinking") {
+    return { label: pickThinkingLabel(elapsedSecs), spinner: "breathe" };
+  }
+  if (latest.type === "text") {
+    return { label: "Typing", spinner: "braille" };
+  }
+  if (latest.type === "tool_use") {
+    const tool = (latest.tool ?? "").toLowerCase();
+    return TOOL_STAGES[tool] ?? STAGE_FALLBACK;
+  }
+  return { label: pickThinkingLabel(elapsedSecs), spinner: "breathe" };
+}
+
+const WARNING_THRESHOLD_S = 60;
+const CANCEL_THRESHOLD_S = 300;
+
+export function TaskStatusPill({
+  pendingTask,
+  taskMessages,
+  availability,
+  onCancel,
+}: Props) {
+  // Anchor: locked on first render. Once set we never reassign — otherwise
+  // the timer would visibly snap backwards when an optimistic-seeded
+  // `Date.now()` anchor is later replaced by a server-side created_at that
+  // happened a few hundred ms earlier. Monotonic elapsed > strict accuracy.
+  const anchorRef = useRef<number | null>(null);
+  if (anchorRef.current === null) {
+    if (pendingTask.created_at) {
+      const t = Date.parse(pendingTask.created_at);
+      anchorRef.current = Number.isFinite(t) ? t : Date.now();
+    } else {
+      anchorRef.current = Date.now();
+    }
+  }
+  const anchor = anchorRef.current;
+
+  const [now, setNow] = useState(() => Date.now());
+  useEffect(() => {
+    const timer = setInterval(() => setNow(Date.now()), 1000);
+    return () => clearInterval(timer);
+  }, []);
+
+  // Effective status — defense-in-depth derive on top of the cache. If any
+  // task_message has streamed in, the daemon has by definition started
+  // running; we trust that observation over a stale cache. Catches WS gaps,
+  // reconnect windows, or out-of-order delivery where the cache hasn't been
+  // writethrough'd yet.
+  const status = taskMessages.length > 0 ? "running" : pendingTask.status;
+  const elapsedSecs = Math.max(0, Math.floor((now - anchor) / 1000));
+  const stage = pickStage(status, taskMessages, availability, elapsedSecs);
+  const isWarning = elapsedSecs >= WARNING_THRESHOLD_S;
+  const showCancel = !!onCancel && elapsedSecs >= CANCEL_THRESHOLD_S;
+
+  // Shimmer the label whenever the agent is actively doing something —
+  // skipped for `static` stages (offline holding) and `isWarning` (the
+  // amber colour is the signal we want, shimmer would mute it under the
+  // gradient mask).
+  const animateLabel = !stage.static && !isWarning;
+
+  return (
+    <div
+      className={cn(
+        "flex items-center gap-1.5 px-1 text-xs",
+        isWarning ? "text-amber-700 dark:text-amber-300" : "text-muted-foreground",
+      )}
+      aria-live="polite"
+    >
+      {stage.spinner && (
+        <UnicodeSpinner name={stage.spinner} className="opacity-70" />
+      )}
+      <span className="truncate">
+        <span className={cn(animateLabel && "animate-chat-text-shimmer")}>
+          {stage.label}
+        </span>
+        <span className="opacity-70"> · {formatElapsedSecs(elapsedSecs)}</span>
+      </span>
+      {showCancel && (
+        <button
+          type="button"
+          onClick={onCancel}
+          className="ml-2 inline-flex items-center gap-1 rounded-md px-1.5 py-0.5 text-[11px] font-medium text-foreground hover:bg-accent transition-colors"
+        >
+          <X className="size-3" />
+          Cancel
+        </button>
+      )}
+    </div>
+  );
+}
--- a/packages/views/chat/lib/format.ts
+++ b/packages/views/chat/lib/format.ts
@@ -0,0 +1,19 @@
+/**
+ * Format an elapsed seconds value as `Ns` (under a minute) or `Nm Ms`
+ * (over a minute). Drops the seconds part when the remainder is 0 to
+ * keep round-minute readings short ("3m" rather than "3m 0s"). Shared
+ * by the live StatusPill timer and the persistent assistant-message
+ * timing line — keeping them in lockstep avoids visible drift between
+ * "Working · 38s" mid-flight and a final "Replied in 39s" caption.
+ */
+export function formatElapsedSecs(secs: number): string {
+  if (secs < 60) return `${secs}s`;
+  const m = Math.floor(secs / 60);
+  const s = secs % 60;
+  return s ? `${m}m ${s}s` : `${m}m`;
+}
+
+/** Convenience: same formatting, but the input is milliseconds (server-stored elapsed_ms). */
+export function formatElapsedMs(ms: number): string {
+  return formatElapsedSecs(Math.max(0, Math.round(ms / 1000)));
+}
--- a/packages/views/editor/content-editor.tsx
+++ b/packages/views/editor/content-editor.tsx
@@ -94,6 +94,10 @@ interface ContentEditorRef {
  getMarkdown: () => string;
  clearContent: () => void;
  focus: () => void;
+  /** Drop focus from the editor — used by chat after send so the caret
+   *  stops competing with the StatusPill / streaming reply for the user's
+   *  attention. */
+  blur: () => void;
  uploadFile: (file: File) => void;
  /** True when file uploads are still in progress. */
  hasActiveUploads: () => boolean;
@@ -233,6 +237,9 @@ const ContentEditor = forwardRef<ContentEditorRef, ContentEditorProps>(
      focus: () => {
        editor?.commands.focus();
      },
+      blur: () => {
+        editor?.commands.blur();
+      },
      uploadFile: (file: File) => {
        if (!editor || !onUploadFileRef.current) return;
        const endPos = editor.state.doc.content.size;
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -69,6 +69,9 @@ catalogs:
    typescript:
      specifier: ^5.9.3
      version: 5.9.3
+    unicode-animations:
+      specifier: ^1.0.3
+      version: 1.0.3
    vitest:
      specifier: ^4.1.0
      version: 4.1.0
@@ -609,6 +612,9 @@ importers:
      tw-animate-css:
        specifier: ^1.4.0
        version: 1.4.0
+      unicode-animations:
+        specifier: 'catalog:'
+        version: 1.0.3
      vaul:
        specifier: ^1.1.2
        version: 1.1.2(@types/react-dom@19.2.3(@types/react@19.2.14))(@types/react@19.2.14)(react-dom@19.2.3(react@19.2.3))(react@19.2.3)
@@ -7097,6 +7103,10 @@ packages:
    resolution: {integrity: sha512-3IWdCpjgxp15CbJnsi/Y9TCDE7HWVN19j1hmzVhoAkY/+CJx449tVxT5wZc1Gwg8J+P0LWvzlBzxYRnHJ+1i7Q==}
    engines: {node: '>=20.18.1'}

+  unicode-animations@1.0.3:
+    resolution: {integrity: sha512-+klB2oWwcYZjYWhwP4Pr8UZffWDFVx6jKeIahE6z0QYyM2dwDeDPyn5nevCYbyotxvtT9lh21cVURO1RX0+YMg==}
+    hasBin: true
+
  unicorn-magic@0.3.0:
    resolution: {integrity: sha512-+QBBXBCvifc56fsbuxZQ6Sic3wqqc3WWaqxs58gvJrcOuN83HGTCwz3oS5phzU9LthRNE9VrJCFCLUgHeeFnfA==}
    engines: {node: '>=18'}
@@ -14814,6 +14824,8 @@ snapshots:

  undici@7.24.5: {}

+  unicode-animations@1.0.3: {}
+
  unicorn-magic@0.3.0: {}

  unified@11.0.5:
--- a/pnpm-workspace.yaml
+++ b/pnpm-workspace.yaml
@@ -32,6 +32,9 @@ catalog:
  # Icons
  lucide-react: "^1.0.1"

+  # Loading animations (chat StatusPill)
+  unicode-animations: "^1.0.3"
+
  # Product analytics
  posthog-js: "^1.176.1"

--- a/server/internal/handler/chat.go
+++ b/server/internal/handler/chat.go
@@ -206,6 +206,12 @@ type SendChatMessageRequest struct {
 type SendChatMessageResponse struct {
 	MessageID string `json:"message_id"`
 	TaskID    string `json:"task_id"`
+	// CreatedAt anchors the chat StatusPill timer the instant the user
+	// hits send. Without it the front-end falls back to its local clock
+	// and the timer "snaps backwards" later when WS events deliver the
+	// real created_at. Returning it here means the pill renders 0s from
+	// the start with a stable anchor.
+	CreatedAt string `json:"created_at"`
 }

 func (h *Handler) SendChatMessage(w http.ResponseWriter, r *http.Request) {
@@ -273,6 +279,7 @@ func (h *Handler) SendChatMessage(w http.ResponseWriter, r *http.Request) {
 	writeJSON(w, http.StatusCreated, SendChatMessageResponse{
 		MessageID: uuidToString(msg.ID),
 		TaskID:    uuidToString(task.ID),
+		CreatedAt: timestampToString(task.CreatedAt),
 	})
 }

@@ -304,9 +311,14 @@ func (h *Handler) ListChatMessages(w http.ResponseWriter, r *http.Request) {

 // PendingChatTaskResponse is returned by GetPendingChatTask — either the
 // current in-flight task's id/status, or an empty object when none is active.
+// CreatedAt is the anchor the frontend uses to time the chat StatusPill
+// (elapsed seconds = now - CreatedAt). It must come from the server because
+// optimistic seeds don't have a real task created_at and the timer needs to
+// survive refresh / reopen.
 type PendingChatTaskResponse struct {
-	TaskID string `json:"task_id,omitempty"`
-	Status string `json:"status,omitempty"`
+	TaskID    string `json:"task_id,omitempty"`
+	Status    string `json:"status,omitempty"`
+	CreatedAt string `json:"created_at,omitempty"`
 }

 // MarkChatSessionRead clears the session's unread_since (→ has_unread=false)
@@ -403,8 +415,9 @@ func (h *Handler) GetPendingChatTask(w http.ResponseWriter, r *http.Request) {
 	}

 	writeJSON(w, http.StatusOK, PendingChatTaskResponse{
-		TaskID: uuidToString(task.ID),
-		Status: task.Status,
+		TaskID:    uuidToString(task.ID),
+		Status:    task.Status,
+		CreatedAt: timestampToString(task.CreatedAt),
 	})
 }

@@ -491,6 +504,12 @@ type ChatMessageResponse struct {
 	Content       string  `json:"content"`
 	TaskID        *string `json:"task_id"`
 	CreatedAt     string  `json:"created_at"`
+	// FailureReason flags an assistant row synthesized by FailTask's chat
+	// fallback. Front-end uses it to switch to the destructive bubble.
+	FailureReason *string `json:"failure_reason"`
+	// ElapsedMs is the wall-clock duration from task creation to terminal
+	// state. Drives "Replied in 38s" / "Failed after 12s" captions.
+	ElapsedMs *int64 `json:"elapsed_ms"`
 }

 func chatSessionToResponse(s db.ChatSession) ChatSessionResponse {
@@ -514,5 +533,7 @@ func chatMessageToResponse(m db.ChatMessage) ChatMessageResponse {
 		Content:       m.Content,
 		TaskID:        uuidToPtr(m.TaskID),
 		CreatedAt:     timestampToString(m.CreatedAt),
+		FailureReason: textToPtr(m.FailureReason),
+		ElapsedMs:     int8ToPtr(m.ElapsedMs),
 	}
 }
--- a/server/internal/handler/handler.go
+++ b/server/internal/handler/handler.go
@@ -138,6 +138,7 @@ func strToText(s string) pgtype.Text                { return util.StrToText(s) }
 func timestampToString(t pgtype.Timestamptz) string { return util.TimestampToString(t) }
 func timestampToPtr(t pgtype.Timestamptz) *string   { return util.TimestampToPtr(t) }
 func uuidToPtr(u pgtype.UUID) *string               { return util.UUIDToPtr(u) }
+func int8ToPtr(v pgtype.Int8) *int64                { return util.Int8ToPtr(v) }

 // parseUUIDOrBadRequest validates a UUID string sourced from user input
 // (URL params, request body, headers). On invalid input it writes a 400
--- a/server/internal/service/task.go
+++ b/server/internal/service/task.go
@@ -656,6 +656,7 @@ func (s *TaskService) CompleteTask(ctx context.Context, taskID pgtype.UUID, resu
 				Role:          "assistant",
 				Content:       redact.Text(body),
 				TaskID:        task.ID,
+				ElapsedMs:     computeChatElapsedMs(task),
 			}); err != nil {
 				slog.Error("failed to save assistant chat message", "task_id", util.UUIDToString(task.ID), "error", err)
 			} else {
@@ -758,6 +759,31 @@ func (s *TaskService) FailTask(ctx context.Context, taskID pgtype.UUID, errMsg,
 		s.createAgentComment(ctx, task.IssueID, task.AgentID, redact.Text(errMsg), "system", task.TriggerCommentID)
 	}

+	// Mirror the issue fallback for chat tasks: write an assistant
+	// chat_message tagged with the daemon-reported failure_reason so the
+	// conversation history shows what happened. Skip when auto-retry is
+	// pending (the new attempt will write its own outcome) — same guard as
+	// the issue path above.
+	if task.ChatSessionID.Valid && retried == nil {
+		if _, err := s.Queries.CreateChatMessage(ctx, db.CreateChatMessageParams{
+			ChatSessionID: task.ChatSessionID,
+			Role:          "assistant",
+			Content:       redact.Text(errMsg),
+			TaskID:        pgtype.UUID{Bytes: task.ID.Bytes, Valid: true},
+			FailureReason: pgtype.Text{String: failureReason, Valid: failureReason != ""},
+			ElapsedMs:     computeChatElapsedMs(task),
+		}); err != nil {
+			slog.Error("failed to save failure chat message",
+				"task_id", util.UUIDToString(task.ID),
+				"chat_session_id", util.UUIDToString(task.ChatSessionID),
+				"error", err)
+		} else if err := s.Queries.SetUnreadSinceIfNull(ctx, task.ChatSessionID); err != nil {
+			slog.Warn("failed to set unread_since on failure",
+				"chat_session_id", util.UUIDToString(task.ChatSessionID),
+				"error", err)
+		}
+	}
+
 	// Quick-create tasks: push a failure inbox notification to the
 	// requester so they can either retry or fall back to the advanced form
 	// without losing their original prompt. Skipped when an auto-retry is
@@ -1085,6 +1111,23 @@ type AgentSkillFileData struct {
 	Content string `json:"content"`
 }

+// computeChatElapsedMs returns the wall-clock duration from task creation
+// (user hit send) to terminal state (completed/failed). Stored on the
+// assistant chat_message so the UI can render "Replied in 38s" /
+// "Failed after 12s". Uses created_at — not started_at — because users
+// experience total wait time, including queue + dispatch, not just the
+// daemon's actual run time.
+func computeChatElapsedMs(task db.AgentTaskQueue) pgtype.Int8 {
+	if !task.CompletedAt.Valid || !task.CreatedAt.Valid {
+		return pgtype.Int8{}
+	}
+	ms := task.CompletedAt.Time.Sub(task.CreatedAt.Time).Milliseconds()
+	if ms < 0 {
+		ms = 0
+	}
+	return pgtype.Int8{Int64: ms, Valid: true}
+}
+
 func priorityToInt(p string) int32 {
 	switch p {
 	case "urgent":
@@ -1119,6 +1162,12 @@ func (s *TaskService) broadcastTaskDispatch(ctx context.Context, task db.AgentTa
 	payload["runtime_id"] = util.UUIDToString(task.RuntimeID)
 	payload["issue_id"] = util.UUIDToString(task.IssueID)
 	payload["agent_id"] = util.UUIDToString(task.AgentID)
+	// chat_session_id is the routing key the chat window uses to writethrough
+	// `chatKeys.pendingTask` to status="running" the moment the daemon claims
+	// the task. Without it the pill stays stuck at "Queued" until completion.
+	if task.ChatSessionID.Valid {
+		payload["chat_session_id"] = util.UUIDToString(task.ChatSessionID)
+	}

 	workspaceID := s.ResolveTaskWorkspaceID(ctx, task)
 	if workspaceID == "" {
--- a/server/internal/util/pgx.go
+++ b/server/internal/util/pgx.go
@@ -99,3 +99,10 @@ func UUIDToPtr(u pgtype.UUID) *string {
 	s := UUIDToString(u)
 	return &s
 }
+
+func Int8ToPtr(v pgtype.Int8) *int64 {
+	if !v.Valid {
+		return nil
+	}
+	return &v.Int64
+}
--- a/server/migrations/062_chat_message_failure_reason.down.sql
+++ b/server/migrations/062_chat_message_failure_reason.down.sql
@@ -0,0 +1 @@
+ALTER TABLE chat_message DROP COLUMN failure_reason;
--- a/server/migrations/062_chat_message_failure_reason.up.sql
+++ b/server/migrations/062_chat_message_failure_reason.up.sql
@@ -0,0 +1,6 @@
+-- Mirror the issue path's "fallback comment on failure" with a failure_reason
+-- column on chat_message. When FailTask runs on a chat task, server writes
+-- an assistant chat_message tagged with the daemon-reported reason so the
+-- conversation history shows what happened (instead of the previous black
+-- hole where a failed task left no trace in the user-visible thread).
+ALTER TABLE chat_message ADD COLUMN failure_reason TEXT;
--- a/server/migrations/063_chat_message_elapsed.down.sql
+++ b/server/migrations/063_chat_message_elapsed.down.sql
@@ -0,0 +1 @@
+ALTER TABLE chat_message DROP COLUMN elapsed_ms;
--- a/server/migrations/063_chat_message_elapsed.up.sql
+++ b/server/migrations/063_chat_message_elapsed.up.sql
@@ -0,0 +1,6 @@
+-- Capture per-task wall-clock duration (queue → done) on assistant chat
+-- messages so the UI can render "Replied in 38s" / "Failed after 12s"
+-- under each reply. BIGINT to avoid any int32 overflow concerns even
+-- though chat tasks are short — keeps the column reusable for longer
+-- workloads later.
+ALTER TABLE chat_message ADD COLUMN elapsed_ms BIGINT;
--- a/server/pkg/db/generated/chat.sql.go
+++ b/server/pkg/db/generated/chat.sql.go
@@ -22,9 +22,9 @@ func (q *Queries) ArchiveChatSession(ctx context.Context, id pgtype.UUID) error
 }

 const createChatMessage = `-- name: CreateChatMessage :one
-INSERT INTO chat_message (chat_session_id, role, content, task_id)
-VALUES ($1, $2, $3, $4)
-RETURNING id, chat_session_id, role, content, task_id, created_at
+INSERT INTO chat_message (chat_session_id, role, content, task_id, failure_reason, elapsed_ms)
+VALUES ($1, $2, $3, $4, $5, $6)
+RETURNING id, chat_session_id, role, content, task_id, created_at, failure_reason, elapsed_ms
 `

 type CreateChatMessageParams struct {
@@ -32,6 +32,8 @@ type CreateChatMessageParams struct {
 	Role          string      `json:"role"`
 	Content       string      `json:"content"`
 	TaskID        pgtype.UUID `json:"task_id"`
+	FailureReason pgtype.Text `json:"failure_reason"`
+	ElapsedMs     pgtype.Int8 `json:"elapsed_ms"`
 }

 func (q *Queries) CreateChatMessage(ctx context.Context, arg CreateChatMessageParams) (ChatMessage, error) {
@@ -40,6 +42,8 @@ func (q *Queries) CreateChatMessage(ctx context.Context, arg CreateChatMessagePa
 		arg.Role,
 		arg.Content,
 		arg.TaskID,
+		arg.FailureReason,
+		arg.ElapsedMs,
 	)
 	var i ChatMessage
 	err := row.Scan(
@@ -49,6 +53,8 @@ func (q *Queries) CreateChatMessage(ctx context.Context, arg CreateChatMessagePa
 		&i.Content,
 		&i.TaskID,
 		&i.CreatedAt,
+		&i.FailureReason,
+		&i.ElapsedMs,
 	)
 	return i, err
 }
@@ -141,7 +147,7 @@ func (q *Queries) CreateChatTask(ctx context.Context, arg CreateChatTaskParams)
 }

 const getChatMessage = `-- name: GetChatMessage :one
-SELECT id, chat_session_id, role, content, task_id, created_at FROM chat_message
+SELECT id, chat_session_id, role, content, task_id, created_at, failure_reason, elapsed_ms FROM chat_message
 WHERE id = $1
 `

@@ -155,6 +161,8 @@ func (q *Queries) GetChatMessage(ctx context.Context, id pgtype.UUID) (ChatMessa
 		&i.Content,
 		&i.TaskID,
 		&i.CreatedAt,
+		&i.FailureReason,
+		&i.ElapsedMs,
 	)
 	return i, err
 }
@@ -239,23 +247,27 @@ func (q *Queries) GetLastChatTaskSession(ctx context.Context, chatSessionID pgty
 }

 const getPendingChatTask = `-- name: GetPendingChatTask :one
-SELECT id, status FROM agent_task_queue
+SELECT id, status, created_at FROM agent_task_queue
 WHERE chat_session_id = $1 AND status IN ('queued', 'dispatched', 'running')
 ORDER BY created_at DESC
 LIMIT 1
 `

 type GetPendingChatTaskRow struct {
-	ID     pgtype.UUID `json:"id"`
-	Status string      `json:"status"`
+	ID        pgtype.UUID        `json:"id"`
+	Status    string             `json:"status"`
+	CreatedAt pgtype.Timestamptz `json:"created_at"`
 }

 // Returns the most recent in-flight task for a chat session, if any.
 // Used by the frontend to recover pending state after refresh / reopen.
+// created_at is the anchor for the chat StatusPill timer (it computes
+// elapsed = now - task.created_at), so the pill survives refresh / reopen
+// without "resetting to 0s".
 func (q *Queries) GetPendingChatTask(ctx context.Context, chatSessionID pgtype.UUID) (GetPendingChatTaskRow, error) {
 	row := q.db.QueryRow(ctx, getPendingChatTask, chatSessionID)
 	var i GetPendingChatTaskRow
-	err := row.Scan(&i.ID, &i.Status)
+	err := row.Scan(&i.ID, &i.Status, &i.CreatedAt)
 	return i, err
 }

@@ -321,7 +333,7 @@ func (q *Queries) ListAllChatSessionsByCreator(ctx context.Context, arg ListAllC
 }

 const listChatMessages = `-- name: ListChatMessages :many
-SELECT id, chat_session_id, role, content, task_id, created_at FROM chat_message
+SELECT id, chat_session_id, role, content, task_id, created_at, failure_reason, elapsed_ms FROM chat_message
 WHERE chat_session_id = $1
 ORDER BY created_at ASC
 `
@@ -342,6 +354,8 @@ func (q *Queries) ListChatMessages(ctx context.Context, chatSessionID pgtype.UUI
 			&i.Content,
 			&i.TaskID,
 			&i.CreatedAt,
+			&i.FailureReason,
+			&i.ElapsedMs,
 		); err != nil {
 			return nil, err
 		}
--- a/server/pkg/db/generated/models.go
+++ b/server/pkg/db/generated/models.go
@@ -161,6 +161,8 @@ type ChatMessage struct {
 	Content       string             `json:"content"`
 	TaskID        pgtype.UUID        `json:"task_id"`
 	CreatedAt     pgtype.Timestamptz `json:"created_at"`
+	FailureReason pgtype.Text        `json:"failure_reason"`
+	ElapsedMs     pgtype.Int8        `json:"elapsed_ms"`
 }

 type ChatSession struct {
--- a/server/pkg/db/queries/chat.sql
+++ b/server/pkg/db/queries/chat.sql
@@ -54,8 +54,8 @@ UPDATE chat_session SET updated_at = now()
 WHERE id = $1;

 -- name: CreateChatMessage :one
-INSERT INTO chat_message (chat_session_id, role, content, task_id)
-VALUES ($1, $2, $3, sqlc.narg(task_id))
+INSERT INTO chat_message (chat_session_id, role, content, task_id, failure_reason, elapsed_ms)
+VALUES ($1, $2, $3, sqlc.narg(task_id), sqlc.narg(failure_reason), sqlc.narg(elapsed_ms))
 RETURNING *;

 -- name: ListChatMessages :many
@@ -88,7 +88,10 @@ LIMIT 1;
 -- name: GetPendingChatTask :one
 -- Returns the most recent in-flight task for a chat session, if any.
 -- Used by the frontend to recover pending state after refresh / reopen.
-SELECT id, status FROM agent_task_queue
+-- created_at is the anchor for the chat StatusPill timer (it computes
+-- elapsed = now - task.created_at), so the pill survives refresh / reopen
+-- without "resetting to 0s".
+SELECT id, status, created_at FROM agent_task_queue
 WHERE chat_session_id = $1 AND status IN ('queued', 'dispatched', 'running')
 ORDER BY created_at DESC
 LIMIT 1;
				`@@ -0,0 +1 @@`
				`ALTER TABLE chat_message DROP COLUMN failure_reason;`
				`@@ -0,0 +1 @@`
				`ALTER TABLE chat_message DROP COLUMN elapsed_ms;`