Compare commits

...

2 Commits

Author SHA1 Message Date
Jiang Bohan
4d53657e65 fix(daemon): strip alias shadowing, harden timeout, lazy-resolve via login shell
Three follow-ups from the PR #2620 review (Elon):

1. Alias shadowing — `command -v claude` in zsh/bash returns the alias
   definition, not the binary, and the absolute-path filter then rejects it.
   The script now `unalias`/`unset -f` the name before lookup so `command -v`
   falls through to the real PATH binary. This is the exact case behind
   #2512.

2. Hard timeout — `CommandContext` kills only the shell process. Rc files
   that background processes inheriting stdout (`direnv hook`, `nvm` shims,
   plain `&`) keep the pipe open and `cmd.Output()` would block for as long
   as the survivors live. `Cmd.WaitDelay` forcibly closes the pipes once
   the cap elapses, so total startup penalty is bounded by
   `timeout + waitDelay` regardless of rc-file content.

3. Lazy fallback — the resolver no longer runs on every daemon start.
   `getShellResolved` is `sync.Once`-guarded and only fires when a bare
   command name actually misses `exec.LookPath`. Users whose PATH already
   contains every agent never pay the rc-file load cost.

Tests: - `TestResolveAgentsViaLoginShell_StripsAliasShadowing` — rc declares
    `alias fakeclaude=...`, real binary lives on PATH, resolver must
    return the binary, not the alias text.
  - `TestResolveAgentsViaLoginShell_HardTimeoutOnBackgroundedStdout` —
    rc backgrounds a 60s sleeper holding stdout; resolver must return
    inside `timeout + waitDelay + slack`, not 60s.
  - `TestLoadConfig_SkipsLoginShellWhenLookPathSucceeds` — when
    exec.LookPath finds every agent, SHELL (a marker-writing sentinel)
    must not be invoked.
Co-authored-by: multica-agent <github@multica.ai>
2026-05-14 19:21:51 +08:00
Jiang Bohan
08fa1ff04e fix(daemon): resolve agent CLIs via login shell when daemon PATH misses them
GUI-launched daemons on macOS/Linux do not inherit the user's interactive
shell PATH, so fnm/nvm/volta multishells and the Anthropic native installer
silently disappear during onboarding even though `claude --version` works
in Terminal. Fall back to `$SHELL -ilc` to ask the login shell for the
canonical absolute path, then verify it with exec.LookPath before trusting
it. Symlinks (fnm/nvm prefix dirs) are resolved while the helper shell is
still alive so per-session paths get canonicalised before they vanish.

Refs MUL-2167, multica-ai/multica#2512.

Co-authored-by: multica-agent <github@multica.ai>
2026-05-14 18:49:09 +08:00
2 changed files with 600 additions and 67 deletions

View File

@@ -1,12 +1,14 @@
package daemon
import (
"context"
"fmt"
"net/url"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
"github.com/mattn/go-shellwords"
@@ -95,84 +97,88 @@ func LoadConfig(overrides Overrides) (Config, error) {
return Config{}, err
}
// Probe available agent CLIs
// Probe available agent CLIs. exec.LookPath is the primary path, but on
// macOS/Linux a GUI-launched daemon (Electron, Launchpad) does not
// inherit the user's interactive shell PATH — fnm/nvm/volta multishells,
// the Anthropic native installer prefix, and per-user npm prefixes all
// live in dirs that only get added to PATH by ~/.zshrc or ~/.bashrc.
// shellResolvedAgents asks the user's login shell, lazily on first miss,
// to resolve every standard agent name to its canonical absolute path,
// so we can find binaries the bare daemon process can't see. See
// resolveAgentsViaLoginShell for the details and constraints.
//
// Laziness matters: the happy path (every agent on the daemon's PATH or
// pinned to an explicit MULTICA_*_PATH) must not pay the cost of
// spawning the user's login shell — that touches their rc files and
// adds startup latency that scales with whatever they put in there. We
// only fork a shell when a bare command name actually missed LookPath.
var (
shellResolveOnce sync.Once
shellResolved map[string]string
)
getShellResolved := func() map[string]string {
shellResolveOnce.Do(func() {
shellResolved = resolveAgentsViaLoginShell(defaultAgentCommandNames)
})
return shellResolved
}
probe := func(envVar, defaultCmd, modelEnv string) (AgentEntry, bool) {
cmd := envOrDefault(envVar, defaultCmd)
if _, err := exec.LookPath(cmd); err == nil {
return AgentEntry{
Path: cmd,
Model: strings.TrimSpace(os.Getenv(modelEnv)),
}, true
}
// The shell fallback only rescues bare command names. An operator
// who pinned MULTICA_*_PATH to an absolute or relative path that
// doesn't exist should hard-miss, not silently get a different
// binary.
if strings.ContainsAny(cmd, "/\\") {
return AgentEntry{}, false
}
if path, ok := getShellResolved()[cmd]; ok {
return AgentEntry{
Path: path,
Model: strings.TrimSpace(os.Getenv(modelEnv)),
}, true
}
return AgentEntry{}, false
}
agents := map[string]AgentEntry{}
claudePath := envOrDefault("MULTICA_CLAUDE_PATH", "claude")
if _, err := exec.LookPath(claudePath); err == nil {
agents["claude"] = AgentEntry{
Path: claudePath,
Model: strings.TrimSpace(os.Getenv("MULTICA_CLAUDE_MODEL")),
}
if e, ok := probe("MULTICA_CLAUDE_PATH", "claude", "MULTICA_CLAUDE_MODEL"); ok {
agents["claude"] = e
}
codexPath := envOrDefault("MULTICA_CODEX_PATH", "codex")
if _, err := exec.LookPath(codexPath); err == nil {
agents["codex"] = AgentEntry{
Path: codexPath,
Model: strings.TrimSpace(os.Getenv("MULTICA_CODEX_MODEL")),
}
if e, ok := probe("MULTICA_CODEX_PATH", "codex", "MULTICA_CODEX_MODEL"); ok {
agents["codex"] = e
}
opencodePath := envOrDefault("MULTICA_OPENCODE_PATH", "opencode")
if _, err := exec.LookPath(opencodePath); err == nil {
agents["opencode"] = AgentEntry{
Path: opencodePath,
Model: strings.TrimSpace(os.Getenv("MULTICA_OPENCODE_MODEL")),
}
if e, ok := probe("MULTICA_OPENCODE_PATH", "opencode", "MULTICA_OPENCODE_MODEL"); ok {
agents["opencode"] = e
}
openclawPath := envOrDefault("MULTICA_OPENCLAW_PATH", "openclaw")
if _, err := exec.LookPath(openclawPath); err == nil {
agents["openclaw"] = AgentEntry{
Path: openclawPath,
Model: strings.TrimSpace(os.Getenv("MULTICA_OPENCLAW_MODEL")),
}
if e, ok := probe("MULTICA_OPENCLAW_PATH", "openclaw", "MULTICA_OPENCLAW_MODEL"); ok {
agents["openclaw"] = e
}
hermesPath := envOrDefault("MULTICA_HERMES_PATH", "hermes")
if _, err := exec.LookPath(hermesPath); err == nil {
agents["hermes"] = AgentEntry{
Path: hermesPath,
Model: strings.TrimSpace(os.Getenv("MULTICA_HERMES_MODEL")),
}
if e, ok := probe("MULTICA_HERMES_PATH", "hermes", "MULTICA_HERMES_MODEL"); ok {
agents["hermes"] = e
}
geminiPath := envOrDefault("MULTICA_GEMINI_PATH", "gemini")
if _, err := exec.LookPath(geminiPath); err == nil {
agents["gemini"] = AgentEntry{
Path: geminiPath,
Model: strings.TrimSpace(os.Getenv("MULTICA_GEMINI_MODEL")),
}
if e, ok := probe("MULTICA_GEMINI_PATH", "gemini", "MULTICA_GEMINI_MODEL"); ok {
agents["gemini"] = e
}
piPath := envOrDefault("MULTICA_PI_PATH", "pi")
if _, err := exec.LookPath(piPath); err == nil {
agents["pi"] = AgentEntry{
Path: piPath,
Model: strings.TrimSpace(os.Getenv("MULTICA_PI_MODEL")),
}
if e, ok := probe("MULTICA_PI_PATH", "pi", "MULTICA_PI_MODEL"); ok {
agents["pi"] = e
}
cursorPath := envOrDefault("MULTICA_CURSOR_PATH", "cursor-agent")
if _, err := exec.LookPath(cursorPath); err == nil {
agents["cursor"] = AgentEntry{
Path: cursorPath,
Model: strings.TrimSpace(os.Getenv("MULTICA_CURSOR_MODEL")),
}
if e, ok := probe("MULTICA_CURSOR_PATH", "cursor-agent", "MULTICA_CURSOR_MODEL"); ok {
agents["cursor"] = e
}
copilotPath := envOrDefault("MULTICA_COPILOT_PATH", "copilot")
if _, err := exec.LookPath(copilotPath); err == nil {
agents["copilot"] = AgentEntry{
Path: copilotPath,
Model: strings.TrimSpace(os.Getenv("MULTICA_COPILOT_MODEL")),
}
if e, ok := probe("MULTICA_COPILOT_PATH", "copilot", "MULTICA_COPILOT_MODEL"); ok {
agents["copilot"] = e
}
kimiPath := envOrDefault("MULTICA_KIMI_PATH", "kimi")
if _, err := exec.LookPath(kimiPath); err == nil {
agents["kimi"] = AgentEntry{
Path: kimiPath,
Model: strings.TrimSpace(os.Getenv("MULTICA_KIMI_MODEL")),
}
if e, ok := probe("MULTICA_KIMI_PATH", "kimi", "MULTICA_KIMI_MODEL"); ok {
agents["kimi"] = e
}
kiroPath := envOrDefault("MULTICA_KIRO_PATH", "kiro-cli")
if _, err := exec.LookPath(kiroPath); err == nil {
agents["kiro"] = AgentEntry{
Path: kiroPath,
Model: strings.TrimSpace(os.Getenv("MULTICA_KIRO_MODEL")),
}
if e, ok := probe("MULTICA_KIRO_PATH", "kiro-cli", "MULTICA_KIRO_MODEL"); ok {
agents["kiro"] = e
}
if len(agents) == 0 {
return Config{}, fmt.Errorf("no agent CLI found: install claude, codex, copilot, opencode, openclaw, hermes, gemini, pi, cursor-agent, kimi, or kiro-cli and ensure it is on PATH")
@@ -442,3 +448,201 @@ func shellArgsFromEnv(name string) ([]string, error) {
}
return args, nil
}
// defaultAgentCommandNames lists the command names the agent probe loop tries
// before any MULTICA_*_PATH override is applied. Kept in sync with the
// `probe(...)` calls in LoadConfig — the shell-fallback resolver uses this
// list to pre-fetch canonical paths for every known agent in a single shell
// invocation, instead of paying the cost-per-miss.
var defaultAgentCommandNames = []string{
"claude", "codex", "opencode", "openclaw", "hermes",
"gemini", "pi", "cursor-agent", "copilot", "kimi", "kiro-cli",
}
// loginShellResolveTimeout caps how long the daemon will wait for the user's
// login shell to print canonical agent paths. A broken rc file should not
// block startup — if the shell takes longer than this, we proceed without
// shell-resolved fallbacks and the daemon falls back to the same behaviour
// it had before this code was added.
const loginShellResolveTimeout = 3 * time.Second
// loginShellResolveWaitDelay is the hard cap that runs *after*
// loginShellResolveTimeout has elapsed and `CommandContext` has signalled the
// shell to exit. The context kills the shell process itself, but rc files in
// the wild routinely background things that inherit stdout (`nvm` shims,
// `direnv hook`, `eval $(starship init)`, plain `&`). Those survivors keep
// the stdout pipe open and `cmd.Output()` will block on EOF for as long as
// they live. Cmd.WaitDelay (Go 1.20+) forcibly closes the pipes and returns
// once this delay elapses, so the total daemon-startup penalty caused by a
// pathological rc file is bounded by `timeout + waitDelay`, not by however
// long the user's background processes happen to run.
const loginShellResolveWaitDelay = 2 * time.Second
// supportedLoginShells limits which interpreters we will invoke via
// `<shell> -ilc <script>`. Sticking to POSIX-compatible shells means the
// resolver script below works unchanged. Notably absent: fish (uses
// `command -s` and a different syntax for command substitution).
var supportedLoginShells = map[string]struct{}{
"bash": {},
"zsh": {},
"sh": {},
"dash": {},
"ksh": {},
}
// resolveAgentsViaLoginShell asks the user's login shell to print the canonical
// (symlink-resolved) absolute path to each name in `names`. It returns a map
// of name → path for whatever the shell could find, and an empty map if the
// shell is unavailable / unsupported / times out / produces no usable output.
//
// Why we need this:
//
// Daemon-style processes on macOS/Linux do not inherit the user's interactive
// PATH. `claude --version` working in Terminal.app is no guarantee that
// exec.LookPath("claude") will work from a binary spawned by Launchpad, the
// Electron app, or `launchctl`. The most common offenders are fnm/nvm/volta
// "multishell" prefix dirs (per-shell, ephemeral) and the Anthropic native
// installer (`~/.claude/local/`) — both leave their binaries on a path that
// only `.zshrc` knows about.
//
// Implementation notes:
//
// - We invoke `$SHELL -ilc <script>` with both -i (interactive) and -l
// (login) so we pick up PATH set in either ~/.zshrc / ~/.bashrc OR
// ~/.zprofile / ~/.bash_profile. Real users put it in both places.
// - The script resolves symlinks via `cd "$dirname" && pwd -P` while the
// spawned shell is still alive. fnm/nvm "multishell" directories vanish
// on shell exit, so the canonical path must be captured before stdout is
// returned to Go — by then the original path is already gone.
// - We only trust outputs that look like an absolute path AND still pass a
// fresh exec.LookPath check from the daemon's vantage point. That filters
// out aliases (`command -v` prints the alias definition for those, not a
// path) and per-shell paths the shell happened not to fully canonicalise.
// - Agent names are restricted to the bare set in defaultAgentCommandNames
// (`[A-Za-z0-9._-]` only); we inline them into the script unquoted to
// keep the script readable. Custom MULTICA_*_PATH values never reach this
// resolver — those go through exec.LookPath directly.
func resolveAgentsViaLoginShell(names []string) map[string]string {
out := map[string]string{}
if len(names) == 0 {
return out
}
shell := strings.TrimSpace(os.Getenv("SHELL"))
if shell == "" {
return out
}
if _, ok := supportedLoginShells[filepath.Base(shell)]; !ok {
return out
}
safe := make([]string, 0, len(names))
for _, n := range names {
if isSafeAgentName(n) {
safe = append(safe, n)
}
}
if len(safe) == 0 {
return out
}
ctx, cancel := context.WithTimeout(context.Background(), loginShellResolveTimeout)
defer cancel()
cmd := exec.CommandContext(ctx, shell, "-ilc", buildLoginShellResolveScript(safe))
cmd.WaitDelay = loginShellResolveWaitDelay
raw, err := cmd.Output()
if err != nil {
return out
}
for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
parts := strings.SplitN(line, "\t", 2)
if len(parts) != 2 {
continue
}
name, path := parts[0], strings.TrimSpace(parts[1])
if !filepath.IsAbs(path) {
continue
}
// Final reality check: the path the shell gave us must still be
// executable from the daemon's perspective right now. fnm
// multishells are the motivating example — pwd -P inside the
// helper shell can fail to break out of the per-session bin dir,
// and we'd rather report "not found" than hand back a path that
// vanishes between detection and execution.
if _, err := exec.LookPath(path); err != nil {
continue
}
out[name] = path
}
return out
}
// buildLoginShellResolveScript returns the shell script that resolveAgentsViaLoginShell
// runs inside `$SHELL -ilc`. The script:
//
// 1. iterates the provided command names,
// 2. strips any locally-defined alias and shell function with that name so
// `command -v` reaches through to a real binary on PATH (see below),
// 3. uses POSIX `command -v` to find each one on the interactive PATH,
// 4. rejects results that are not absolute paths (defence in depth — if the
// unalias/unset -f pair somehow didn't take effect, `command -v` would
// still print the alias/function definition, and we'd rather drop it
// than hand back garbage),
// 5. canonicalises the directory via `cd ... && pwd -P` so symlinked prefix
// dirs (fnm/nvm/volta) collapse to stable paths,
// 6. prints `<name>\t<canonical_path>` one entry per line for the caller.
//
// Why steps 2 is important — and why this PR's first revision missed #2512:
// the motivating case has `alias claude=...` in ~/.zshrc *and* fnm's real
// claude binary further down on PATH. With `-i` set, the alias loads, and
// `command -v claude` returns `claude: aliased to ...` (zsh) or `alias
// claude='...'` (bash) — neither starts with `/`, so step 4 drops them, and
// the loop never looks at PATH again. Unaliasing inside the same shell makes
// `command -v` fall back to the PATH search the daemon actually wants.
// Shell functions exhibit the same shadowing in bash/zsh, hence `unset -f`.
// Both calls are wrapped in `2>/dev/null` so the harmless "no such alias"
// error never reaches stderr.
//
// All input names are vetted by isSafeAgentName before they reach this
// function, so inlining them unquoted into the for-loop word list is safe.
func buildLoginShellResolveScript(names []string) string {
var b strings.Builder
b.WriteString("for n in")
for _, n := range names {
b.WriteByte(' ')
b.WriteString(n)
}
b.WriteString("; do\n")
b.WriteString(" unalias \"$n\" 2>/dev/null\n")
b.WriteString(" unset -f \"$n\" 2>/dev/null\n")
b.WriteString(" p=$(command -v \"$n\" 2>/dev/null) || continue\n")
b.WriteString(" [ -n \"$p\" ] || continue\n")
b.WriteString(" case \"$p\" in /*) ;; *) continue ;; esac\n")
b.WriteString(" d=$(dirname \"$p\") && f=$(basename \"$p\") && c=$(cd \"$d\" 2>/dev/null && pwd -P) || continue\n")
b.WriteString(" printf '%s\\t%s\\n' \"$n\" \"$c/$f\"\n")
b.WriteString("done\n")
return b.String()
}
// isSafeAgentName checks that `s` is a bare command name composed only of
// characters that are safe to inline into a shell script (ASCII letters,
// digits, dot, dash, underscore). The agent names this daemon ships with all
// satisfy the predicate; it exists to guard against future drift, not to
// constrain operator-supplied paths (those never reach the shell resolver).
func isSafeAgentName(s string) bool {
if s == "" {
return false
}
for _, r := range s {
switch {
case r >= 'a' && r <= 'z':
case r >= 'A' && r <= 'Z':
case r >= '0' && r <= '9':
case r == '-' || r == '_' || r == '.':
default:
return false
}
}
return true
}

View File

@@ -1,8 +1,14 @@
package daemon
import (
"os"
"os/exec"
"path/filepath"
"reflect"
"runtime"
"strings"
"testing"
"time"
)
func TestPatternsFromEnv_DefaultsWhenUnset(t *testing.T) {
@@ -27,3 +33,326 @@ func TestPatternsFromEnv_DropsSeparatorBearingEntries(t *testing.T) {
t.Fatalf("expected %v, got %v", want, got)
}
}
func TestIsSafeAgentName(t *testing.T) {
for _, tc := range []struct {
in string
want bool
}{
{"claude", true},
{"cursor-agent", true},
{"kiro_cli", true},
{"v1.2", true},
{"Claude2", true},
{"", false},
{"a b", false},
{"a/b", false},
{"a;b", false},
{"a$b", false},
{"a`b", false},
{"a'b", false},
{`a"b`, false},
} {
if got := isSafeAgentName(tc.in); got != tc.want {
t.Errorf("isSafeAgentName(%q) = %v, want %v", tc.in, got, tc.want)
}
}
}
func TestBuildLoginShellResolveScript_ShapeAndContent(t *testing.T) {
got := buildLoginShellResolveScript([]string{"claude", "cursor-agent"})
// Must list exactly the names we asked for, in order.
if !strings.Contains(got, "for n in claude cursor-agent;") {
t.Errorf("script missing expected for-loop header:\n%s", got)
}
// Must strip aliases AND functions before `command -v` — otherwise
// `alias claude=...` in .zshrc shadows the real binary, which is the
// exact case behind #2512. The order matters (unalias/unset -f BEFORE
// command -v); we assert by relative position.
idxUnalias := strings.Index(got, `unalias "$n" 2>/dev/null`)
idxUnsetFn := strings.Index(got, `unset -f "$n" 2>/dev/null`)
idxLookup := strings.Index(got, `command -v "$n"`)
if idxUnalias < 0 || idxUnsetFn < 0 || idxLookup < 0 {
t.Fatalf("script missing unalias/unset -f/command -v steps:\n%s", got)
}
if !(idxUnalias < idxLookup && idxUnsetFn < idxLookup) {
t.Errorf("unalias/unset -f must precede command -v:\n%s", got)
}
// Must canonicalise via `cd ... && pwd -P` to break out of symlinked
// per-shell prefix dirs (fnm/nvm/volta) before the spawned shell exits.
if !strings.Contains(got, "pwd -P") {
t.Errorf("script missing pwd -P canonicalisation:\n%s", got)
}
// Output must be tab-separated `<name>\t<path>` so the parser can split.
if !strings.Contains(got, `printf '%s\t%s\n'`) {
t.Errorf("script missing tab-separated printf:\n%s", got)
}
}
// TestResolveAgentsViaLoginShell_ResolvesViaInteractiveShell verifies the
// motivating bug scenario: a binary that lives in a directory which is NOT on
// the daemon's PATH but IS added to PATH by the user's interactive shell rc
// file gets resolved to a canonical absolute path.
//
// We simulate this by:
// - creating a temp dir containing an executable named "fakeclaude"
// - removing every other dir from PATH (so exec.LookPath misses)
// - pointing SHELL at /bin/sh and using ENV (sourced on -i) to add the dir
//
// Skipped on Windows (no POSIX shell), and skipped if /bin/sh is missing or
// doesn't honour ENV (which would defeat the simulation — not the function's
// fault).
func TestResolveAgentsViaLoginShell_ResolvesViaInteractiveShell(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("POSIX shell not available on Windows")
}
sh := "/bin/sh"
if _, err := os.Stat(sh); err != nil {
t.Skipf("no /bin/sh available: %v", err)
}
binDir := t.TempDir()
binPath := filepath.Join(binDir, "fakeclaude")
// A trivially executable script. We only need it to exist and be
// marked +x; the resolver never runs it.
if err := os.WriteFile(binPath, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
t.Fatalf("write fake binary: %v", err)
}
// Prove the precondition: with binDir absent from PATH, the daemon
// would normally miss this binary.
t.Setenv("PATH", "/usr/bin:/bin")
if _, err := lookPathInPath("fakeclaude"); err == nil {
t.Skip("PATH leak — test environment already exposes fakeclaude without shell help")
}
// Wire the interactive shell to add binDir to PATH on startup. POSIX
// sh reads $ENV when invoked with -i, so we write a tiny rc file that
// prepends binDir.
rc := filepath.Join(t.TempDir(), "sh.rc")
if err := os.WriteFile(rc, []byte("export PATH=\""+binDir+":$PATH\"\n"), 0o644); err != nil {
t.Fatalf("write rc: %v", err)
}
t.Setenv("SHELL", sh)
t.Setenv("ENV", rc)
got := resolveAgentsViaLoginShell([]string{"fakeclaude", "kiro-cli"})
resolved, ok := got["fakeclaude"]
if !ok {
t.Fatalf("expected fakeclaude in resolved map, got %v", got)
}
// Must be an absolute path, must exist, must point at our fake binary
// (resolving any symlinks t.TempDir may have introduced — macOS's
// /var → /private/var symlink is the usual culprit).
if !filepath.IsAbs(resolved) {
t.Errorf("expected absolute path, got %q", resolved)
}
wantCanonical, err := filepath.EvalSymlinks(binPath)
if err != nil {
t.Fatalf("eval symlinks for expected path: %v", err)
}
if resolved != wantCanonical {
t.Errorf("resolved = %q, want canonical %q", resolved, wantCanonical)
}
}
func TestResolveAgentsViaLoginShell_SkipsUnsupportedShell(t *testing.T) {
t.Setenv("SHELL", "/usr/bin/fish")
got := resolveAgentsViaLoginShell([]string{"claude"})
if len(got) != 0 {
t.Errorf("expected empty map for unsupported shell, got %v", got)
}
}
func TestResolveAgentsViaLoginShell_EmptyShellNoCrash(t *testing.T) {
t.Setenv("SHELL", "")
got := resolveAgentsViaLoginShell([]string{"claude"})
if len(got) != 0 {
t.Errorf("expected empty map when SHELL unset, got %v", got)
}
}
func TestResolveAgentsViaLoginShell_EmptyInput(t *testing.T) {
t.Setenv("SHELL", "/bin/sh")
got := resolveAgentsViaLoginShell(nil)
if len(got) != 0 {
t.Errorf("expected empty map for nil input, got %v", got)
}
}
// lookPathInPath is a thin wrapper used by the test above; matches what
// exec.LookPath would do but lets the test be explicit about which call it's
// asserting against.
func lookPathInPath(name string) (string, error) {
return exec.LookPath(name)
}
// TestResolveAgentsViaLoginShell_StripsAliasShadowing locks down the fix for
// #2512: when the user's rc file declares an alias with the same name as the
// agent CLI, the resolver must still return the real binary on PATH, not the
// alias text. The previous revision of this code passed the rest of the test
// suite but silently dropped this case (alias text is not absolute, so the
// `case "$p" in /*)` filter rejected it).
func TestResolveAgentsViaLoginShell_StripsAliasShadowing(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("POSIX shell not available on Windows")
}
sh := "/bin/sh"
if _, err := os.Stat(sh); err != nil {
t.Skipf("no /bin/sh available: %v", err)
}
binDir := t.TempDir()
binPath := filepath.Join(binDir, "fakeclaude")
if err := os.WriteFile(binPath, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
t.Fatalf("write fake binary: %v", err)
}
// rc adds binDir to PATH AND defines an alias that shadows the bare
// name with a non-existent path. The pre-fix script would see the
// alias, see that its target isn't absolute, and silently drop the
// agent. With unalias/unset -f in place, command -v falls through to
// the PATH search and finds binPath.
rc := filepath.Join(t.TempDir(), "sh.rc")
rcBody := "export PATH=\"" + binDir + ":$PATH\"\n" +
"alias fakeclaude=\"/nonexistent/wrapper-from-rc\"\n"
if err := os.WriteFile(rc, []byte(rcBody), 0o644); err != nil {
t.Fatalf("write rc: %v", err)
}
// Strip PATH so exec.LookPath misses fakeclaude — same precondition as
// the happy-path test, so we know the shell did the resolution.
t.Setenv("PATH", "/usr/bin:/bin")
if _, err := lookPathInPath("fakeclaude"); err == nil {
t.Skip("PATH leak — fakeclaude already visible to the daemon without shell help")
}
// Sanity-check that the simulated environment can actually load aliases.
// If the host /bin/sh doesn't honour $ENV in -i mode (rare but possible
// on minimal Linux images), skipping is more honest than asserting on a
// scenario the test couldn't actually set up.
t.Setenv("SHELL", sh)
t.Setenv("ENV", rc)
probe, err := exec.Command(sh, "-ilc", "alias fakeclaude 2>/dev/null").Output()
if err != nil || !strings.Contains(string(probe), "fakeclaude") {
t.Skipf("test host's /bin/sh did not load alias from $ENV; cannot simulate shadowing (probe=%q err=%v)", string(probe), err)
}
got := resolveAgentsViaLoginShell([]string{"fakeclaude"})
resolved, ok := got["fakeclaude"]
if !ok {
t.Fatalf("expected fakeclaude in resolved map despite alias shadowing, got %v", got)
}
wantCanonical, err := filepath.EvalSymlinks(binPath)
if err != nil {
t.Fatalf("eval symlinks for expected path: %v", err)
}
if resolved != wantCanonical {
t.Errorf("resolved = %q, want canonical %q (got the alias instead of the PATH binary?)", resolved, wantCanonical)
}
}
// TestResolveAgentsViaLoginShell_HardTimeoutOnBackgroundedStdout exercises the
// failure mode Cmd.WaitDelay guards against: an rc file that backgrounds a
// long-running process inheriting stdout. Killing the shell on context
// cancel does not close the inherited pipe, so cmd.Output() would hang on
// EOF until the survivor exits. The hard deadline must be roughly
// loginShellResolveTimeout + loginShellResolveWaitDelay, not the survivor's
// lifetime.
func TestResolveAgentsViaLoginShell_HardTimeoutOnBackgroundedStdout(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("POSIX shell not available on Windows")
}
sh := "/bin/sh"
if _, err := os.Stat(sh); err != nil {
t.Skipf("no /bin/sh available: %v", err)
}
// rc backgrounds a sleeper that holds stdout for far longer than any
// reasonable WaitDelay. The resolver script never gets to print
// anything (we never even reach the for-loop because rc is still
// being sourced when the sleeper forks), but that's exactly the
// scenario we care about — we don't want to leak time-to-startup.
rc := filepath.Join(t.TempDir(), "sh.rc")
rcBody := "( sleep 60 ) &\n"
if err := os.WriteFile(rc, []byte(rcBody), 0o644); err != nil {
t.Fatalf("write rc: %v", err)
}
t.Setenv("SHELL", sh)
t.Setenv("ENV", rc)
// Cap = context timeout + wait delay + generous slack for goroutine
// scheduling. A bug that disables WaitDelay would blow past 60s here.
cap := loginShellResolveTimeout + loginShellResolveWaitDelay + 3*time.Second
start := time.Now()
done := make(chan struct{})
go func() {
_ = resolveAgentsViaLoginShell([]string{"claude"})
close(done)
}()
select {
case <-done:
if elapsed := time.Since(start); elapsed > cap {
t.Errorf("resolver took %v, expected <= %v (WaitDelay leak?)", elapsed, cap)
}
case <-time.After(cap):
t.Fatalf("resolver did not return within %v — WaitDelay is not enforcing a hard ceiling", cap)
}
}
// TestLoadConfig_SkipsLoginShellWhenLookPathSucceeds proves the laziness
// requirement: if every agent CLI the operator cares about is already
// resolvable via the daemon's PATH (or pinned to an explicit MULTICA_*_PATH),
// the shell-fallback path must not run. We assert this by pointing SHELL at
// a sentinel script that touches a marker file when invoked.
func TestLoadConfig_SkipsLoginShellWhenLookPathSucceeds(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("POSIX shell not available on Windows")
}
// Stage 1: a fake `claude` binary the daemon's bare exec.LookPath
// definitely sees, so the probe loop never has reason to consult
// shellResolved.
pathDir := t.TempDir()
fakeClaude := filepath.Join(pathDir, "claude")
if err := os.WriteFile(fakeClaude, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
t.Fatalf("write fake claude: %v", err)
}
// Stage 2: a SHELL that writes a marker file when invoked. If
// LoadConfig's getShellResolved closure fires, the marker appears.
shellDir := t.TempDir()
shellPath := filepath.Join(shellDir, "bash") // pick a name the resolver's allowlist accepts
marker := filepath.Join(shellDir, "invoked.marker")
shellBody := "#!/bin/sh\ntouch \"" + marker + "\"\n"
if err := os.WriteFile(shellPath, []byte(shellBody), 0o755); err != nil {
t.Fatalf("write sentinel shell: %v", err)
}
t.Setenv("PATH", pathDir)
t.Setenv("SHELL", shellPath)
// Pin a non-existent agent to a bare name so it would normally trip
// the fallback — except `claude` already resolves, and the user hasn't
// configured anything else, so the probe loop should be satisfied
// after the first probe alone.
t.Setenv("MULTICA_DAEMON_ID", "11111111-1111-1111-1111-111111111111")
if _, err := LoadConfig(Overrides{
ServerURL: "http://localhost:0",
WorkspacesRoot: t.TempDir(),
}); err != nil {
// Some daemon-id / workspace bookkeeping outside our concern may
// fail in CI; the marker assertion below is what matters either
// way, so we don't fail on LoadConfig errors directly.
t.Logf("LoadConfig returned %v (non-fatal for this test)", err)
}
// Brief wait for any goroutine the resolver might have leaked. The
// sync.Once-guarded resolver runs synchronously today, so this should
// be immediate; the sleep is just to avoid a flake if that ever
// changes.
time.Sleep(50 * time.Millisecond)
if _, err := os.Stat(marker); err == nil {
t.Fatalf("login shell was invoked even though exec.LookPath found every agent — laziness broken")
} else if !os.IsNotExist(err) {
t.Fatalf("unexpected error stat-ing marker file: %v", err)
}
}