fix(daemon): strip alias shadowing, harden timeout, lazy-resolve via login shell

Three follow-ups from the PR #2620 review (Elon): 1. Alias shadowing — `command -v claude` in zsh/bash returns the alias definition, not the binary, and the absolute-path filter then rejects it. The script now `unalias`/`unset -f` the name before lookup so `command -v` falls through to the real PATH binary. This is the exact case behind #2512. 2. Hard timeout — `CommandContext` kills only the shell process. Rc files that background processes inheriting stdout (`direnv hook`, `nvm` shims, plain `&`) keep the pipe open and `cmd.Output()` would block for as long as the survivors live. `Cmd.WaitDelay` forcibly closes the pipes once the cap elapses, so total startup penalty is bounded by `timeout + waitDelay` regardless of rc-file content. 3. Lazy fallback — the resolver no longer runs on every daemon start. `getShellResolved` is `sync.Once`-guarded and only fires when a bare command name actually misses `exec.LookPath`. Users whose PATH already contains every agent never pay the rc-file load cost. Tests: - `TestResolveAgentsViaLoginShell_StripsAliasShadowing` — rc declares `alias fakeclaude=...`, real binary lives on PATH, resolver must return the binary, not the alias text. - `TestResolveAgentsViaLoginShell_HardTimeoutOnBackgroundedStdout` — rc backgrounds a 60s sleeper holding stdout; resolver must return inside `timeout + waitDelay + slack`, not 60s. - `TestLoadConfig_SkipsLoginShellWhenLookPathSucceeds` — when exec.LookPath finds every agent, SHELL (a marker-writing sentinel) must not be invoked. Co-authored-by: multica-agent <github@multica.ai>
fix(daemon): resolve agent CLIs via login shell when daemon PATH misses them
2026-06-28 18:09:14 +02:00 · 2026-05-14 19:21:51 +08:00 · 2026-05-14 18:49:09 +08:00
2 changed files with 600 additions and 67 deletions
--- a/server/internal/daemon/config.go
+++ b/server/internal/daemon/config.go
@@ -1,12 +1,14 @@
 package daemon

 import (
+	"context"
 	"fmt"
 	"net/url"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
+	"sync"
 	"time"

 	"github.com/mattn/go-shellwords"
@@ -95,84 +97,88 @@ func LoadConfig(overrides Overrides) (Config, error) {
 		return Config{}, err
 	}

-	// Probe available agent CLIs
+	// Probe available agent CLIs. exec.LookPath is the primary path, but on
+	// macOS/Linux a GUI-launched daemon (Electron, Launchpad) does not
+	// inherit the user's interactive shell PATH — fnm/nvm/volta multishells,
+	// the Anthropic native installer prefix, and per-user npm prefixes all
+	// live in dirs that only get added to PATH by ~/.zshrc or ~/.bashrc.
+	// shellResolvedAgents asks the user's login shell, lazily on first miss,
+	// to resolve every standard agent name to its canonical absolute path,
+	// so we can find binaries the bare daemon process can't see. See
+	// resolveAgentsViaLoginShell for the details and constraints.
+	//
+	// Laziness matters: the happy path (every agent on the daemon's PATH or
+	// pinned to an explicit MULTICA_*_PATH) must not pay the cost of
+	// spawning the user's login shell — that touches their rc files and
+	// adds startup latency that scales with whatever they put in there. We
+	// only fork a shell when a bare command name actually missed LookPath.
+	var (
+		shellResolveOnce sync.Once
+		shellResolved    map[string]string
+	)
+	getShellResolved := func() map[string]string {
+		shellResolveOnce.Do(func() {
+			shellResolved = resolveAgentsViaLoginShell(defaultAgentCommandNames)
+		})
+		return shellResolved
+	}
+	probe := func(envVar, defaultCmd, modelEnv string) (AgentEntry, bool) {
+		cmd := envOrDefault(envVar, defaultCmd)
+		if _, err := exec.LookPath(cmd); err == nil {
+			return AgentEntry{
+				Path:  cmd,
+				Model: strings.TrimSpace(os.Getenv(modelEnv)),
+			}, true
+		}
+		// The shell fallback only rescues bare command names. An operator
+		// who pinned MULTICA_*_PATH to an absolute or relative path that
+		// doesn't exist should hard-miss, not silently get a different
+		// binary.
+		if strings.ContainsAny(cmd, "/\\") {
+			return AgentEntry{}, false
+		}
+		if path, ok := getShellResolved()[cmd]; ok {
+			return AgentEntry{
+				Path:  path,
+				Model: strings.TrimSpace(os.Getenv(modelEnv)),
+			}, true
+		}
+		return AgentEntry{}, false
+	}
+
 	agents := map[string]AgentEntry{}
-	claudePath := envOrDefault("MULTICA_CLAUDE_PATH", "claude")
-	if _, err := exec.LookPath(claudePath); err == nil {
-		agents["claude"] = AgentEntry{
-			Path:  claudePath,
-			Model: strings.TrimSpace(os.Getenv("MULTICA_CLAUDE_MODEL")),
-		}
+	if e, ok := probe("MULTICA_CLAUDE_PATH", "claude", "MULTICA_CLAUDE_MODEL"); ok {
+		agents["claude"] = e
 	}
-	codexPath := envOrDefault("MULTICA_CODEX_PATH", "codex")
-	if _, err := exec.LookPath(codexPath); err == nil {
-		agents["codex"] = AgentEntry{
-			Path:  codexPath,
-			Model: strings.TrimSpace(os.Getenv("MULTICA_CODEX_MODEL")),
-		}
+	if e, ok := probe("MULTICA_CODEX_PATH", "codex", "MULTICA_CODEX_MODEL"); ok {
+		agents["codex"] = e
 	}
-	opencodePath := envOrDefault("MULTICA_OPENCODE_PATH", "opencode")
-	if _, err := exec.LookPath(opencodePath); err == nil {
-		agents["opencode"] = AgentEntry{
-			Path:  opencodePath,
-			Model: strings.TrimSpace(os.Getenv("MULTICA_OPENCODE_MODEL")),
-		}
+	if e, ok := probe("MULTICA_OPENCODE_PATH", "opencode", "MULTICA_OPENCODE_MODEL"); ok {
+		agents["opencode"] = e
 	}
-	openclawPath := envOrDefault("MULTICA_OPENCLAW_PATH", "openclaw")
-	if _, err := exec.LookPath(openclawPath); err == nil {
-		agents["openclaw"] = AgentEntry{
-			Path:  openclawPath,
-			Model: strings.TrimSpace(os.Getenv("MULTICA_OPENCLAW_MODEL")),
-		}
+	if e, ok := probe("MULTICA_OPENCLAW_PATH", "openclaw", "MULTICA_OPENCLAW_MODEL"); ok {
+		agents["openclaw"] = e
 	}
-	hermesPath := envOrDefault("MULTICA_HERMES_PATH", "hermes")
-	if _, err := exec.LookPath(hermesPath); err == nil {
-		agents["hermes"] = AgentEntry{
-			Path:  hermesPath,
-			Model: strings.TrimSpace(os.Getenv("MULTICA_HERMES_MODEL")),
-		}
+	if e, ok := probe("MULTICA_HERMES_PATH", "hermes", "MULTICA_HERMES_MODEL"); ok {
+		agents["hermes"] = e
 	}
-	geminiPath := envOrDefault("MULTICA_GEMINI_PATH", "gemini")
-	if _, err := exec.LookPath(geminiPath); err == nil {
-		agents["gemini"] = AgentEntry{
-			Path:  geminiPath,
-			Model: strings.TrimSpace(os.Getenv("MULTICA_GEMINI_MODEL")),
-		}
+	if e, ok := probe("MULTICA_GEMINI_PATH", "gemini", "MULTICA_GEMINI_MODEL"); ok {
+		agents["gemini"] = e
 	}
-	piPath := envOrDefault("MULTICA_PI_PATH", "pi")
-	if _, err := exec.LookPath(piPath); err == nil {
-		agents["pi"] = AgentEntry{
-			Path:  piPath,
-			Model: strings.TrimSpace(os.Getenv("MULTICA_PI_MODEL")),
-		}
+	if e, ok := probe("MULTICA_PI_PATH", "pi", "MULTICA_PI_MODEL"); ok {
+		agents["pi"] = e
 	}
-	cursorPath := envOrDefault("MULTICA_CURSOR_PATH", "cursor-agent")
-	if _, err := exec.LookPath(cursorPath); err == nil {
-		agents["cursor"] = AgentEntry{
-			Path:  cursorPath,
-			Model: strings.TrimSpace(os.Getenv("MULTICA_CURSOR_MODEL")),
-		}
+	if e, ok := probe("MULTICA_CURSOR_PATH", "cursor-agent", "MULTICA_CURSOR_MODEL"); ok {
+		agents["cursor"] = e
 	}
-	copilotPath := envOrDefault("MULTICA_COPILOT_PATH", "copilot")
-	if _, err := exec.LookPath(copilotPath); err == nil {
-		agents["copilot"] = AgentEntry{
-			Path:  copilotPath,
-			Model: strings.TrimSpace(os.Getenv("MULTICA_COPILOT_MODEL")),
-		}
+	if e, ok := probe("MULTICA_COPILOT_PATH", "copilot", "MULTICA_COPILOT_MODEL"); ok {
+		agents["copilot"] = e
 	}
-	kimiPath := envOrDefault("MULTICA_KIMI_PATH", "kimi")
-	if _, err := exec.LookPath(kimiPath); err == nil {
-		agents["kimi"] = AgentEntry{
-			Path:  kimiPath,
-			Model: strings.TrimSpace(os.Getenv("MULTICA_KIMI_MODEL")),
-		}
+	if e, ok := probe("MULTICA_KIMI_PATH", "kimi", "MULTICA_KIMI_MODEL"); ok {
+		agents["kimi"] = e
 	}
-	kiroPath := envOrDefault("MULTICA_KIRO_PATH", "kiro-cli")
-	if _, err := exec.LookPath(kiroPath); err == nil {
-		agents["kiro"] = AgentEntry{
-			Path:  kiroPath,
-			Model: strings.TrimSpace(os.Getenv("MULTICA_KIRO_MODEL")),
-		}
+	if e, ok := probe("MULTICA_KIRO_PATH", "kiro-cli", "MULTICA_KIRO_MODEL"); ok {
+		agents["kiro"] = e
 	}
 	if len(agents) == 0 {
 		return Config{}, fmt.Errorf("no agent CLI found: install claude, codex, copilot, opencode, openclaw, hermes, gemini, pi, cursor-agent, kimi, or kiro-cli and ensure it is on PATH")
@@ -442,3 +448,201 @@ func shellArgsFromEnv(name string) ([]string, error) {
 	}
 	return args, nil
 }
+
+// defaultAgentCommandNames lists the command names the agent probe loop tries
+// before any MULTICA_*_PATH override is applied. Kept in sync with the
+// `probe(...)` calls in LoadConfig — the shell-fallback resolver uses this
+// list to pre-fetch canonical paths for every known agent in a single shell
+// invocation, instead of paying the cost-per-miss.
+var defaultAgentCommandNames = []string{
+	"claude", "codex", "opencode", "openclaw", "hermes",
+	"gemini", "pi", "cursor-agent", "copilot", "kimi", "kiro-cli",
+}
+
+// loginShellResolveTimeout caps how long the daemon will wait for the user's
+// login shell to print canonical agent paths. A broken rc file should not
+// block startup — if the shell takes longer than this, we proceed without
+// shell-resolved fallbacks and the daemon falls back to the same behaviour
+// it had before this code was added.
+const loginShellResolveTimeout = 3 * time.Second
+
+// loginShellResolveWaitDelay is the hard cap that runs *after*
+// loginShellResolveTimeout has elapsed and `CommandContext` has signalled the
+// shell to exit. The context kills the shell process itself, but rc files in
+// the wild routinely background things that inherit stdout (`nvm` shims,
+// `direnv hook`, `eval $(starship init)`, plain `&`). Those survivors keep
+// the stdout pipe open and `cmd.Output()` will block on EOF for as long as
+// they live. Cmd.WaitDelay (Go 1.20+) forcibly closes the pipes and returns
+// once this delay elapses, so the total daemon-startup penalty caused by a
+// pathological rc file is bounded by `timeout + waitDelay`, not by however
+// long the user's background processes happen to run.
+const loginShellResolveWaitDelay = 2 * time.Second
+
+// supportedLoginShells limits which interpreters we will invoke via
+// `<shell> -ilc <script>`. Sticking to POSIX-compatible shells means the
+// resolver script below works unchanged. Notably absent: fish (uses
+// `command -s` and a different syntax for command substitution).
+var supportedLoginShells = map[string]struct{}{
+	"bash": {},
+	"zsh":  {},
+	"sh":   {},
+	"dash": {},
+	"ksh":  {},
+}
+
+// resolveAgentsViaLoginShell asks the user's login shell to print the canonical
+// (symlink-resolved) absolute path to each name in `names`. It returns a map
+// of name → path for whatever the shell could find, and an empty map if the
+// shell is unavailable / unsupported / times out / produces no usable output.
+//
+// Why we need this:
+//
+// Daemon-style processes on macOS/Linux do not inherit the user's interactive
+// PATH. `claude --version` working in Terminal.app is no guarantee that
+// exec.LookPath("claude") will work from a binary spawned by Launchpad, the
+// Electron app, or `launchctl`. The most common offenders are fnm/nvm/volta
+// "multishell" prefix dirs (per-shell, ephemeral) and the Anthropic native
+// installer (`~/.claude/local/`) — both leave their binaries on a path that
+// only `.zshrc` knows about.
+//
+// Implementation notes:
+//
+//   - We invoke `$SHELL -ilc <script>` with both -i (interactive) and -l
+//     (login) so we pick up PATH set in either ~/.zshrc / ~/.bashrc OR
+//     ~/.zprofile / ~/.bash_profile. Real users put it in both places.
+//   - The script resolves symlinks via `cd "$dirname" && pwd -P` while the
+//     spawned shell is still alive. fnm/nvm "multishell" directories vanish
+//     on shell exit, so the canonical path must be captured before stdout is
+//     returned to Go — by then the original path is already gone.
+//   - We only trust outputs that look like an absolute path AND still pass a
+//     fresh exec.LookPath check from the daemon's vantage point. That filters
+//     out aliases (`command -v` prints the alias definition for those, not a
+//     path) and per-shell paths the shell happened not to fully canonicalise.
+//   - Agent names are restricted to the bare set in defaultAgentCommandNames
+//     (`[A-Za-z0-9._-]` only); we inline them into the script unquoted to
+//     keep the script readable. Custom MULTICA_*_PATH values never reach this
+//     resolver — those go through exec.LookPath directly.
+func resolveAgentsViaLoginShell(names []string) map[string]string {
+	out := map[string]string{}
+	if len(names) == 0 {
+		return out
+	}
+	shell := strings.TrimSpace(os.Getenv("SHELL"))
+	if shell == "" {
+		return out
+	}
+	if _, ok := supportedLoginShells[filepath.Base(shell)]; !ok {
+		return out
+	}
+
+	safe := make([]string, 0, len(names))
+	for _, n := range names {
+		if isSafeAgentName(n) {
+			safe = append(safe, n)
+		}
+	}
+	if len(safe) == 0 {
+		return out
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), loginShellResolveTimeout)
+	defer cancel()
+
+	cmd := exec.CommandContext(ctx, shell, "-ilc", buildLoginShellResolveScript(safe))
+	cmd.WaitDelay = loginShellResolveWaitDelay
+	raw, err := cmd.Output()
+	if err != nil {
+		return out
+	}
+
+	for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
+		parts := strings.SplitN(line, "\t", 2)
+		if len(parts) != 2 {
+			continue
+		}
+		name, path := parts[0], strings.TrimSpace(parts[1])
+		if !filepath.IsAbs(path) {
+			continue
+		}
+		// Final reality check: the path the shell gave us must still be
+		// executable from the daemon's perspective right now. fnm
+		// multishells are the motivating example — pwd -P inside the
+		// helper shell can fail to break out of the per-session bin dir,
+		// and we'd rather report "not found" than hand back a path that
+		// vanishes between detection and execution.
+		if _, err := exec.LookPath(path); err != nil {
+			continue
+		}
+		out[name] = path
+	}
+	return out
+}
+
+// buildLoginShellResolveScript returns the shell script that resolveAgentsViaLoginShell
+// runs inside `$SHELL -ilc`. The script:
+//
+//  1. iterates the provided command names,
+//  2. strips any locally-defined alias and shell function with that name so
+//     `command -v` reaches through to a real binary on PATH (see below),
+//  3. uses POSIX `command -v` to find each one on the interactive PATH,
+//  4. rejects results that are not absolute paths (defence in depth — if the
+//     unalias/unset -f pair somehow didn't take effect, `command -v` would
+//     still print the alias/function definition, and we'd rather drop it
+//     than hand back garbage),
+//  5. canonicalises the directory via `cd ... && pwd -P` so symlinked prefix
+//     dirs (fnm/nvm/volta) collapse to stable paths,
+//  6. prints `<name>\t<canonical_path>` one entry per line for the caller.
+//
+// Why steps 2 is important — and why this PR's first revision missed #2512:
+// the motivating case has `alias claude=...` in ~/.zshrc *and* fnm's real
+// claude binary further down on PATH. With `-i` set, the alias loads, and
+// `command -v claude` returns `claude: aliased to ...` (zsh) or `alias
+// claude='...'` (bash) — neither starts with `/`, so step 4 drops them, and
+// the loop never looks at PATH again. Unaliasing inside the same shell makes
+// `command -v` fall back to the PATH search the daemon actually wants.
+// Shell functions exhibit the same shadowing in bash/zsh, hence `unset -f`.
+// Both calls are wrapped in `2>/dev/null` so the harmless "no such alias"
+// error never reaches stderr.
+//
+// All input names are vetted by isSafeAgentName before they reach this
+// function, so inlining them unquoted into the for-loop word list is safe.
+func buildLoginShellResolveScript(names []string) string {
+	var b strings.Builder
+	b.WriteString("for n in")
+	for _, n := range names {
+		b.WriteByte(' ')
+		b.WriteString(n)
+	}
+	b.WriteString("; do\n")
+	b.WriteString("  unalias \"$n\" 2>/dev/null\n")
+	b.WriteString("  unset -f \"$n\" 2>/dev/null\n")
+	b.WriteString("  p=$(command -v \"$n\" 2>/dev/null) || continue\n")
+	b.WriteString("  [ -n \"$p\" ] || continue\n")
+	b.WriteString("  case \"$p\" in /*) ;; *) continue ;; esac\n")
+	b.WriteString("  d=$(dirname \"$p\") && f=$(basename \"$p\") && c=$(cd \"$d\" 2>/dev/null && pwd -P) || continue\n")
+	b.WriteString("  printf '%s\\t%s\\n' \"$n\" \"$c/$f\"\n")
+	b.WriteString("done\n")
+	return b.String()
+}
+
+// isSafeAgentName checks that `s` is a bare command name composed only of
+// characters that are safe to inline into a shell script (ASCII letters,
+// digits, dot, dash, underscore). The agent names this daemon ships with all
+// satisfy the predicate; it exists to guard against future drift, not to
+// constrain operator-supplied paths (those never reach the shell resolver).
+func isSafeAgentName(s string) bool {
+	if s == "" {
+		return false
+	}
+	for _, r := range s {
+		switch {
+		case r >= 'a' && r <= 'z':
+		case r >= 'A' && r <= 'Z':
+		case r >= '0' && r <= '9':
+		case r == '-' || r == '_' || r == '.':
+		default:
+			return false
+		}
+	}
+	return true
+}
--- a/server/internal/daemon/config_test.go
+++ b/server/internal/daemon/config_test.go
@@ -1,8 +1,14 @@
 package daemon

 import (
+	"os"
+	"os/exec"
+	"path/filepath"
 	"reflect"
+	"runtime"
+	"strings"
 	"testing"
+	"time"
 )

 func TestPatternsFromEnv_DefaultsWhenUnset(t *testing.T) {
@@ -27,3 +33,326 @@ func TestPatternsFromEnv_DropsSeparatorBearingEntries(t *testing.T) {
 		t.Fatalf("expected %v, got %v", want, got)
 	}
 }
+
+func TestIsSafeAgentName(t *testing.T) {
+	for _, tc := range []struct {
+		in   string
+		want bool
+	}{
+		{"claude", true},
+		{"cursor-agent", true},
+		{"kiro_cli", true},
+		{"v1.2", true},
+		{"Claude2", true},
+		{"", false},
+		{"a b", false},
+		{"a/b", false},
+		{"a;b", false},
+		{"a$b", false},
+		{"a`b", false},
+		{"a'b", false},
+		{`a"b`, false},
+	} {
+		if got := isSafeAgentName(tc.in); got != tc.want {
+			t.Errorf("isSafeAgentName(%q) = %v, want %v", tc.in, got, tc.want)
+		}
+	}
+}
+
+func TestBuildLoginShellResolveScript_ShapeAndContent(t *testing.T) {
+	got := buildLoginShellResolveScript([]string{"claude", "cursor-agent"})
+	// Must list exactly the names we asked for, in order.
+	if !strings.Contains(got, "for n in claude cursor-agent;") {
+		t.Errorf("script missing expected for-loop header:\n%s", got)
+	}
+	// Must strip aliases AND functions before `command -v` — otherwise
+	// `alias claude=...` in .zshrc shadows the real binary, which is the
+	// exact case behind #2512. The order matters (unalias/unset -f BEFORE
+	// command -v); we assert by relative position.
+	idxUnalias := strings.Index(got, `unalias "$n" 2>/dev/null`)
+	idxUnsetFn := strings.Index(got, `unset -f "$n" 2>/dev/null`)
+	idxLookup := strings.Index(got, `command -v "$n"`)
+	if idxUnalias < 0 || idxUnsetFn < 0 || idxLookup < 0 {
+		t.Fatalf("script missing unalias/unset -f/command -v steps:\n%s", got)
+	}
+	if !(idxUnalias < idxLookup && idxUnsetFn < idxLookup) {
+		t.Errorf("unalias/unset -f must precede command -v:\n%s", got)
+	}
+	// Must canonicalise via `cd ... && pwd -P` to break out of symlinked
+	// per-shell prefix dirs (fnm/nvm/volta) before the spawned shell exits.
+	if !strings.Contains(got, "pwd -P") {
+		t.Errorf("script missing pwd -P canonicalisation:\n%s", got)
+	}
+	// Output must be tab-separated `<name>\t<path>` so the parser can split.
+	if !strings.Contains(got, `printf '%s\t%s\n'`) {
+		t.Errorf("script missing tab-separated printf:\n%s", got)
+	}
+}
+
+// TestResolveAgentsViaLoginShell_ResolvesViaInteractiveShell verifies the
+// motivating bug scenario: a binary that lives in a directory which is NOT on
+// the daemon's PATH but IS added to PATH by the user's interactive shell rc
+// file gets resolved to a canonical absolute path.
+//
+// We simulate this by:
+//   - creating a temp dir containing an executable named "fakeclaude"
+//   - removing every other dir from PATH (so exec.LookPath misses)
+//   - pointing SHELL at /bin/sh and using ENV (sourced on -i) to add the dir
+//
+// Skipped on Windows (no POSIX shell), and skipped if /bin/sh is missing or
+// doesn't honour ENV (which would defeat the simulation — not the function's
+// fault).
+func TestResolveAgentsViaLoginShell_ResolvesViaInteractiveShell(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("POSIX shell not available on Windows")
+	}
+	sh := "/bin/sh"
+	if _, err := os.Stat(sh); err != nil {
+		t.Skipf("no /bin/sh available: %v", err)
+	}
+
+	binDir := t.TempDir()
+	binPath := filepath.Join(binDir, "fakeclaude")
+	// A trivially executable script. We only need it to exist and be
+	// marked +x; the resolver never runs it.
+	if err := os.WriteFile(binPath, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+		t.Fatalf("write fake binary: %v", err)
+	}
+
+	// Prove the precondition: with binDir absent from PATH, the daemon
+	// would normally miss this binary.
+	t.Setenv("PATH", "/usr/bin:/bin")
+	if _, err := lookPathInPath("fakeclaude"); err == nil {
+		t.Skip("PATH leak — test environment already exposes fakeclaude without shell help")
+	}
+
+	// Wire the interactive shell to add binDir to PATH on startup. POSIX
+	// sh reads $ENV when invoked with -i, so we write a tiny rc file that
+	// prepends binDir.
+	rc := filepath.Join(t.TempDir(), "sh.rc")
+	if err := os.WriteFile(rc, []byte("export PATH=\""+binDir+":$PATH\"\n"), 0o644); err != nil {
+		t.Fatalf("write rc: %v", err)
+	}
+	t.Setenv("SHELL", sh)
+	t.Setenv("ENV", rc)
+
+	got := resolveAgentsViaLoginShell([]string{"fakeclaude", "kiro-cli"})
+	resolved, ok := got["fakeclaude"]
+	if !ok {
+		t.Fatalf("expected fakeclaude in resolved map, got %v", got)
+	}
+	// Must be an absolute path, must exist, must point at our fake binary
+	// (resolving any symlinks t.TempDir may have introduced — macOS's
+	// /var → /private/var symlink is the usual culprit).
+	if !filepath.IsAbs(resolved) {
+		t.Errorf("expected absolute path, got %q", resolved)
+	}
+	wantCanonical, err := filepath.EvalSymlinks(binPath)
+	if err != nil {
+		t.Fatalf("eval symlinks for expected path: %v", err)
+	}
+	if resolved != wantCanonical {
+		t.Errorf("resolved = %q, want canonical %q", resolved, wantCanonical)
+	}
+}
+
+func TestResolveAgentsViaLoginShell_SkipsUnsupportedShell(t *testing.T) {
+	t.Setenv("SHELL", "/usr/bin/fish")
+	got := resolveAgentsViaLoginShell([]string{"claude"})
+	if len(got) != 0 {
+		t.Errorf("expected empty map for unsupported shell, got %v", got)
+	}
+}
+
+func TestResolveAgentsViaLoginShell_EmptyShellNoCrash(t *testing.T) {
+	t.Setenv("SHELL", "")
+	got := resolveAgentsViaLoginShell([]string{"claude"})
+	if len(got) != 0 {
+		t.Errorf("expected empty map when SHELL unset, got %v", got)
+	}
+}
+
+func TestResolveAgentsViaLoginShell_EmptyInput(t *testing.T) {
+	t.Setenv("SHELL", "/bin/sh")
+	got := resolveAgentsViaLoginShell(nil)
+	if len(got) != 0 {
+		t.Errorf("expected empty map for nil input, got %v", got)
+	}
+}
+
+// lookPathInPath is a thin wrapper used by the test above; matches what
+// exec.LookPath would do but lets the test be explicit about which call it's
+// asserting against.
+func lookPathInPath(name string) (string, error) {
+	return exec.LookPath(name)
+}
+
+// TestResolveAgentsViaLoginShell_StripsAliasShadowing locks down the fix for
+// #2512: when the user's rc file declares an alias with the same name as the
+// agent CLI, the resolver must still return the real binary on PATH, not the
+// alias text. The previous revision of this code passed the rest of the test
+// suite but silently dropped this case (alias text is not absolute, so the
+// `case "$p" in /*)` filter rejected it).
+func TestResolveAgentsViaLoginShell_StripsAliasShadowing(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("POSIX shell not available on Windows")
+	}
+	sh := "/bin/sh"
+	if _, err := os.Stat(sh); err != nil {
+		t.Skipf("no /bin/sh available: %v", err)
+	}
+
+	binDir := t.TempDir()
+	binPath := filepath.Join(binDir, "fakeclaude")
+	if err := os.WriteFile(binPath, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+		t.Fatalf("write fake binary: %v", err)
+	}
+
+	// rc adds binDir to PATH AND defines an alias that shadows the bare
+	// name with a non-existent path. The pre-fix script would see the
+	// alias, see that its target isn't absolute, and silently drop the
+	// agent. With unalias/unset -f in place, command -v falls through to
+	// the PATH search and finds binPath.
+	rc := filepath.Join(t.TempDir(), "sh.rc")
+	rcBody := "export PATH=\"" + binDir + ":$PATH\"\n" +
+		"alias fakeclaude=\"/nonexistent/wrapper-from-rc\"\n"
+	if err := os.WriteFile(rc, []byte(rcBody), 0o644); err != nil {
+		t.Fatalf("write rc: %v", err)
+	}
+
+	// Strip PATH so exec.LookPath misses fakeclaude — same precondition as
+	// the happy-path test, so we know the shell did the resolution.
+	t.Setenv("PATH", "/usr/bin:/bin")
+	if _, err := lookPathInPath("fakeclaude"); err == nil {
+		t.Skip("PATH leak — fakeclaude already visible to the daemon without shell help")
+	}
+	// Sanity-check that the simulated environment can actually load aliases.
+	// If the host /bin/sh doesn't honour $ENV in -i mode (rare but possible
+	// on minimal Linux images), skipping is more honest than asserting on a
+	// scenario the test couldn't actually set up.
+	t.Setenv("SHELL", sh)
+	t.Setenv("ENV", rc)
+	probe, err := exec.Command(sh, "-ilc", "alias fakeclaude 2>/dev/null").Output()
+	if err != nil || !strings.Contains(string(probe), "fakeclaude") {
+		t.Skipf("test host's /bin/sh did not load alias from $ENV; cannot simulate shadowing (probe=%q err=%v)", string(probe), err)
+	}
+
+	got := resolveAgentsViaLoginShell([]string{"fakeclaude"})
+	resolved, ok := got["fakeclaude"]
+	if !ok {
+		t.Fatalf("expected fakeclaude in resolved map despite alias shadowing, got %v", got)
+	}
+	wantCanonical, err := filepath.EvalSymlinks(binPath)
+	if err != nil {
+		t.Fatalf("eval symlinks for expected path: %v", err)
+	}
+	if resolved != wantCanonical {
+		t.Errorf("resolved = %q, want canonical %q (got the alias instead of the PATH binary?)", resolved, wantCanonical)
+	}
+}
+
+// TestResolveAgentsViaLoginShell_HardTimeoutOnBackgroundedStdout exercises the
+// failure mode Cmd.WaitDelay guards against: an rc file that backgrounds a
+// long-running process inheriting stdout. Killing the shell on context
+// cancel does not close the inherited pipe, so cmd.Output() would hang on
+// EOF until the survivor exits. The hard deadline must be roughly
+// loginShellResolveTimeout + loginShellResolveWaitDelay, not the survivor's
+// lifetime.
+func TestResolveAgentsViaLoginShell_HardTimeoutOnBackgroundedStdout(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("POSIX shell not available on Windows")
+	}
+	sh := "/bin/sh"
+	if _, err := os.Stat(sh); err != nil {
+		t.Skipf("no /bin/sh available: %v", err)
+	}
+
+	// rc backgrounds a sleeper that holds stdout for far longer than any
+	// reasonable WaitDelay. The resolver script never gets to print
+	// anything (we never even reach the for-loop because rc is still
+	// being sourced when the sleeper forks), but that's exactly the
+	// scenario we care about — we don't want to leak time-to-startup.
+	rc := filepath.Join(t.TempDir(), "sh.rc")
+	rcBody := "( sleep 60 ) &\n"
+	if err := os.WriteFile(rc, []byte(rcBody), 0o644); err != nil {
+		t.Fatalf("write rc: %v", err)
+	}
+	t.Setenv("SHELL", sh)
+	t.Setenv("ENV", rc)
+
+	// Cap = context timeout + wait delay + generous slack for goroutine
+	// scheduling. A bug that disables WaitDelay would blow past 60s here.
+	cap := loginShellResolveTimeout + loginShellResolveWaitDelay + 3*time.Second
+	start := time.Now()
+	done := make(chan struct{})
+	go func() {
+		_ = resolveAgentsViaLoginShell([]string{"claude"})
+		close(done)
+	}()
+	select {
+	case <-done:
+		if elapsed := time.Since(start); elapsed > cap {
+			t.Errorf("resolver took %v, expected <= %v (WaitDelay leak?)", elapsed, cap)
+		}
+	case <-time.After(cap):
+		t.Fatalf("resolver did not return within %v — WaitDelay is not enforcing a hard ceiling", cap)
+	}
+}
+
+// TestLoadConfig_SkipsLoginShellWhenLookPathSucceeds proves the laziness
+// requirement: if every agent CLI the operator cares about is already
+// resolvable via the daemon's PATH (or pinned to an explicit MULTICA_*_PATH),
+// the shell-fallback path must not run. We assert this by pointing SHELL at
+// a sentinel script that touches a marker file when invoked.
+func TestLoadConfig_SkipsLoginShellWhenLookPathSucceeds(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("POSIX shell not available on Windows")
+	}
+
+	// Stage 1: a fake `claude` binary the daemon's bare exec.LookPath
+	// definitely sees, so the probe loop never has reason to consult
+	// shellResolved.
+	pathDir := t.TempDir()
+	fakeClaude := filepath.Join(pathDir, "claude")
+	if err := os.WriteFile(fakeClaude, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+		t.Fatalf("write fake claude: %v", err)
+	}
+
+	// Stage 2: a SHELL that writes a marker file when invoked. If
+	// LoadConfig's getShellResolved closure fires, the marker appears.
+	shellDir := t.TempDir()
+	shellPath := filepath.Join(shellDir, "bash") // pick a name the resolver's allowlist accepts
+	marker := filepath.Join(shellDir, "invoked.marker")
+	shellBody := "#!/bin/sh\ntouch \"" + marker + "\"\n"
+	if err := os.WriteFile(shellPath, []byte(shellBody), 0o755); err != nil {
+		t.Fatalf("write sentinel shell: %v", err)
+	}
+
+	t.Setenv("PATH", pathDir)
+	t.Setenv("SHELL", shellPath)
+	// Pin a non-existent agent to a bare name so it would normally trip
+	// the fallback — except `claude` already resolves, and the user hasn't
+	// configured anything else, so the probe loop should be satisfied
+	// after the first probe alone.
+	t.Setenv("MULTICA_DAEMON_ID", "11111111-1111-1111-1111-111111111111")
+
+	if _, err := LoadConfig(Overrides{
+		ServerURL:      "http://localhost:0",
+		WorkspacesRoot: t.TempDir(),
+	}); err != nil {
+		// Some daemon-id / workspace bookkeeping outside our concern may
+		// fail in CI; the marker assertion below is what matters either
+		// way, so we don't fail on LoadConfig errors directly.
+		t.Logf("LoadConfig returned %v (non-fatal for this test)", err)
+	}
+	// Brief wait for any goroutine the resolver might have leaked. The
+	// sync.Once-guarded resolver runs synchronously today, so this should
+	// be immediate; the sleep is just to avoid a flake if that ever
+	// changes.
+	time.Sleep(50 * time.Millisecond)
+	if _, err := os.Stat(marker); err == nil {
+		t.Fatalf("login shell was invoked even though exec.LookPath found every agent — laziness broken")
+	} else if !os.IsNotExist(err) {
+		t.Fatalf("unexpected error stat-ing marker file: %v", err)
+	}
+}