fix(daemon): drop stale resume session when workdir is not reused (#4027)

CLI backends key their session stores to the cwd (Claude Code looks
sessions up under ~/.claude/projects/<encoded-cwd>/), so a prior session
id can only resolve when the task runs in the exact workdir the session
was recorded against. When the prior workdir no longer exists (GC'd
after the issue went done, daemon reinstall, manual cleanup),
execenv.Reuse falls back to a fresh Prepare but the stale session id was
still passed to the backend: claude exited within a second and the run
failed before doing any work — permanently, because the failed run
records no session_id and the next claim serves the same stale pointer
again.

Gate ResumeSessionID on the workdir actually being reused, and correct
PriorSessionResumed so the runtime brief uses the cold-path wording when
the session is dropped.

Fixes multica-ai/multica#3854 (MUL-3221)

Co-authored-by: J <j@multica.ai>
Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
Bohan Jiang
2026-06-11 13:07:44 +08:00
committed by GitHub
parent e4ec9dc425
commit 8151f60c6c
2 changed files with 92 additions and 1 deletions

View File

@@ -2579,6 +2579,30 @@ func providerNeedsInlineSystemPrompt(provider string) bool {
}
}
// gateResumeToReusedWorkdir clears the task's prior session unless the task
// runs in the exact workdir the session was recorded against, and reports
// whether that workdir was reused. CLI backends key their session stores to
// the cwd (Claude Code looks sessions up under ~/.claude/projects/<encoded-cwd>/),
// so a session id from a different workdir can never resolve: the CLI exits
// within a second and the run fails before doing any work — permanently,
// because the failed run records no session and the next claim serves the
// same stale pointer again. This fires whenever the prior workdir no longer
// exists (GC'd after the issue went done, daemon reinstall, manual cleanup)
// and execenv.Reuse fell back to a fresh Prepare (GitHub #3854).
func gateResumeToReusedWorkdir(task *Task, taskCtx *execenv.TaskContextForEnv, envWorkDir string, taskLog *slog.Logger) bool {
reused := task.PriorWorkDir != "" && envWorkDir == task.PriorWorkDir
if !reused && task.PriorSessionID != "" {
taskLog.Info("dropping prior session: workdir not reused, per-cwd session cannot resolve",
"session_id", task.PriorSessionID,
"prior_workdir", task.PriorWorkDir,
"workdir", envWorkDir,
)
task.PriorSessionID = ""
taskCtx.PriorSessionResumed = false
}
return reused
}
func (d *Daemon) runTask(ctx context.Context, task Task, provider string, slot int, taskLog *slog.Logger) (TaskResult, error) {
// Refuse to spawn an agent without a workspace. An empty workspace_id
// here would make MULTICA_WORKSPACE_ID empty in the agent env, and the
@@ -2721,6 +2745,8 @@ func (d *Daemon) runTask(ctx context.Context, task Task, provider string, slot i
defer d.unmarkActiveEnvRoot(env.RootDir)
}
reused := gateResumeToReusedWorkdir(&task, &taskCtx, env.WorkDir, taskLog)
// Inject runtime-specific config (meta skill) so the agent discovers .agent_context/.
runtimeBrief, err := execenv.InjectRuntimeConfig(env.WorkDir, provider, taskCtx)
if err != nil {
@@ -2853,7 +2879,6 @@ func (d *Daemon) runTask(ctx context.Context, task Task, provider string, slot i
return TaskResult{}, fmt.Errorf("create agent backend: %w", err)
}
reused := task.PriorWorkDir != "" && env.WorkDir == task.PriorWorkDir
taskLog.Info("starting agent",
"provider", provider,
"workdir", env.WorkDir,

View File

@@ -18,6 +18,7 @@ import (
"testing"
"time"
"github.com/multica-ai/multica/server/internal/daemon/execenv"
"github.com/multica-ai/multica/server/internal/daemon/repocache"
"github.com/multica-ai/multica/server/pkg/agent"
)
@@ -896,6 +897,71 @@ func newRepoReadyTestDaemon(t *testing.T, handler http.HandlerFunc) *Daemon {
return d
}
func TestGateResumeToReusedWorkdir(t *testing.T) {
t.Parallel()
tests := []struct {
name string
sessionID string
priorDir string
envDir string
wantSession string
wantReused bool
}{
{
name: "same workdir keeps session",
sessionID: "sess-1",
priorDir: "/ws/task-a/workdir",
envDir: "/ws/task-a/workdir",
wantSession: "sess-1",
wantReused: true,
},
{
name: "fresh workdir drops session",
sessionID: "sess-1",
priorDir: "/ws/task-a/workdir",
envDir: "/ws/task-b/workdir",
wantSession: "",
wantReused: false,
},
{
name: "session without recorded workdir drops session",
sessionID: "sess-1",
priorDir: "",
envDir: "/ws/task-b/workdir",
wantSession: "",
wantReused: false,
},
{
name: "no prior session is a no-op",
sessionID: "",
priorDir: "/ws/task-a/workdir",
envDir: "/ws/task-b/workdir",
wantSession: "",
wantReused: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
task := Task{PriorSessionID: tt.sessionID, PriorWorkDir: tt.priorDir}
taskCtx := execenv.TaskContextForEnv{PriorSessionResumed: tt.sessionID != ""}
reused := gateResumeToReusedWorkdir(&task, &taskCtx, tt.envDir, slog.Default())
if reused != tt.wantReused {
t.Fatalf("reused = %v, want %v", reused, tt.wantReused)
}
if task.PriorSessionID != tt.wantSession {
t.Fatalf("PriorSessionID = %q, want %q", task.PriorSessionID, tt.wantSession)
}
if taskCtx.PriorSessionResumed != (tt.wantSession != "") {
t.Fatalf("PriorSessionResumed = %v, want %v", taskCtx.PriorSessionResumed, tt.wantSession != "")
}
})
}
}
func TestExecuteAndDrain_ResumeFailureFallback(t *testing.T) {
t.Parallel()