fix(agent): narrow Claude async launch detection

Co-authored-by: multica-agent <github@multica.ai>
fix(agent): force Claude background tools foreground
2026-06-29 18:39:17 +02:00 · 2026-06-17 18:34:41 +08:00 · 2026-06-17 18:25:47 +08:00 · 2026-06-17 18:01:43 +08:00
5 changed files with 379 additions and 4 deletions
--- a/server/internal/daemon/execenv/execenv_test.go
+++ b/server/internal/daemon/execenv/execenv_test.go
@@ -801,6 +801,47 @@ func TestInjectRuntimeConfigClaude(t *testing.T) {
 	}
 }

+func TestInjectRuntimeConfigBackgroundTaskSafetyProviderAgnostic(t *testing.T) {
+	t.Parallel()
+
+	providers := []struct {
+		name string
+		file string
+	}{
+		{"claude", "CLAUDE.md"},
+		{"codex", "AGENTS.md"},
+		{"opencode", "AGENTS.md"},
+		{"gemini", "GEMINI.md"},
+		{"hermes", "AGENTS.md"},
+	}
+
+	for _, tc := range providers {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			dir := t.TempDir()
+			if _, err := InjectRuntimeConfig(dir, tc.name, TaskContextForEnv{IssueID: "issue-1"}); err != nil {
+				t.Fatalf("InjectRuntimeConfig failed: %v", err)
+			}
+			data, err := os.ReadFile(filepath.Join(dir, tc.file))
+			if err != nil {
+				t.Fatalf("read %s: %v", tc.file, err)
+			}
+			s := string(data)
+			for _, want := range []string{
+				"## Background Task Safety",
+				"Do NOT end your turn while background tasks",
+				"wait for a future notification/reminder",
+				"run the work synchronously instead",
+			} {
+				if !strings.Contains(s, want) {
+					t.Errorf("%s missing background task safety text %q\n---\n%s", tc.file, want, s)
+				}
+			}
+		})
+	}
+}
+
 func TestInjectRuntimeConfigAvailableCommandsCoreOnly(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
--- a/server/internal/daemon/execenv/runtime_config.go
+++ b/server/internal/daemon/execenv/runtime_config.go
@@ -366,6 +366,7 @@ func buildMetaSkillContent(provider string, ctx TaskContextForEnv) string {

 	b.WriteString("# Multica Agent Runtime\n\n")
 	b.WriteString("You are a coding agent in the Multica platform. Use the `multica` CLI to interact with the platform.\n\n")
+	writeBackgroundTaskSafetyInstructions(&b)

 	// Always emit agent identity so the agent knows who it is, even when
 	// dispatched via @mention on an issue assigned to a different agent.
@@ -794,3 +795,13 @@ func buildMetaSkillContent(provider string, ctx TaskContextForEnv) string {

 	return b.String()
 }
+
+func writeBackgroundTaskSafetyInstructions(b *strings.Builder) {
+	b.WriteString("## Background Task Safety\n\n")
+	b.WriteString("Multica marks this task terminal when your top-level agent process/turn exits. Any background work you started but did not collect before exiting can be orphaned: its result may be lost, and the user may see a completed/failed task even though the delegated work was never synthesized.\n\n")
+	b.WriteString("- Do NOT end your turn while background tasks, async subagents, background shell commands, or detached tool calls are still running.\n")
+	b.WriteString("- If a tool or runtime offers a background mode, use it only when you can explicitly wait for completion and collect the result before your final response.\n")
+	b.WriteString("- If a tool response says to wait for a future notification/reminder instead of collecting now, do not rely on that in Multica-managed runs. Block on the appropriate wait/output/collect operation before exiting.\n")
+	b.WriteString("- If you cannot observe or collect a background task's result, do not spawn it in the background; run the work synchronously instead.\n")
+	b.WriteString("- Before posting your final result or exiting silently, account for every background task you started and incorporate its output or failure into your response.\n\n")
+}
--- a/server/pkg/agent/claude.go
+++ b/server/pkg/agent/claude.go
@@ -135,6 +135,7 @@ func (b *claudeBackend) Execute(ctx context.Context, prompt string, opts ExecOpt
 		var sessionID string
 		finalStatus := "completed"
 		var finalError string
+		sawAsyncLaunch := false
 		usage := make(map[string]TokenUsage)

 		// Close stdout when the context is cancelled so scanner.Scan() unblocks.
@@ -162,7 +163,9 @@ func (b *claudeBackend) Execute(ctx context.Context, prompt string, opts ExecOpt
 			case "assistant":
 				b.handleAssistant(msg, msgCh, &output, usage)
 			case "user":
-				b.handleUser(msg, msgCh)
+				if b.handleUser(msg, msgCh) {
+					sawAsyncLaunch = true
+				}
 			case "system":
 				if msg.SessionID != "" {
 					sessionID = msg.SessionID
@@ -222,6 +225,10 @@ func (b *claudeBackend) Execute(ctx context.Context, prompt string, opts ExecOpt
 			finalStatus = "failed"
 			finalError = fmt.Sprintf("claude exited with error: %v", exitErr)
 		}
+		if finalStatus == "completed" && sawAsyncLaunch {
+			finalStatus = "failed"
+			finalError = "claude launched an async background task; Multica-managed runs require foreground execution"
+		}

 		// cmd.Wait() has returned — os/exec's stderr copy goroutine has
 		// observed every byte claude wrote to stderr before exiting, so
@@ -297,17 +304,21 @@ func (b *claudeBackend) handleAssistant(msg claudeSDKMessage, ch chan<- Message,
 	}
 }

-func (b *claudeBackend) handleUser(msg claudeSDKMessage, ch chan<- Message) {
+func (b *claudeBackend) handleUser(msg claudeSDKMessage, ch chan<- Message) bool {
 	var content claudeMessageContent
 	if err := json.Unmarshal(msg.Message, &content); err != nil {
-		return
+		return false
 	}

+	sawAsyncLaunch := false
 	for _, block := range content.Content {
 		if block.Type == "tool_result" {
 			resultStr := ""
 			if block.Content != nil {
 				resultStr = string(block.Content)
+				if claudeToolResultHasAsyncLaunch(block.Content) {
+					sawAsyncLaunch = true
+				}
 			}
 			trySend(ch, Message{
 				Type:   MessageToolResult,
@@ -316,6 +327,7 @@ func (b *claudeBackend) handleUser(msg claudeSDKMessage, ch chan<- Message) {
 			})
 		}
 	}
+	return sawAsyncLaunch
 }

 func (b *claudeBackend) handleControlRequest(msg claudeSDKMessage, stdin interface{ Write([]byte) (int, error) }) {
@@ -332,6 +344,12 @@ func (b *claudeBackend) handleControlRequest(msg claudeSDKMessage, stdin interfa
 	if inputMap == nil {
 		inputMap = map[string]any{}
 	}
+	if forceClaudeToolInputForeground(inputMap) {
+		b.cfg.Logger.Info("claude: forced foreground tool execution",
+			"request_id", msg.RequestID,
+			"tool", req.ToolName,
+		)
+	}

 	response := map[string]any{
 		"type": "control_response",
@@ -356,6 +374,50 @@ func (b *claudeBackend) handleControlRequest(msg claudeSDKMessage, stdin interfa
 	}
 }

+func forceClaudeToolInputForeground(input map[string]any) bool {
+	if runInBackground, ok := input["run_in_background"].(bool); ok && runInBackground {
+		input["run_in_background"] = false
+		return true
+	}
+	return false
+}
+
+func claudeToolResultHasAsyncLaunch(raw json.RawMessage) bool {
+	if len(raw) == 0 {
+		return false
+	}
+	var value any
+	if err := json.Unmarshal(raw, &value); err != nil {
+		return false
+	}
+	switch v := value.(type) {
+	case map[string]any:
+		if claudeMapHasAsyncLaunchStatus(v) {
+			return true
+		}
+		if content, ok := v["content"].([]any); ok {
+			return claudeArrayHasAsyncLaunchStatus(content)
+		}
+	case []any:
+		return claudeArrayHasAsyncLaunchStatus(v)
+	}
+	return false
+}
+
+func claudeArrayHasAsyncLaunchStatus(values []any) bool {
+	for _, value := range values {
+		if item, ok := value.(map[string]any); ok && claudeMapHasAsyncLaunchStatus(item) {
+			return true
+		}
+	}
+	return false
+}
+
+func claudeMapHasAsyncLaunchStatus(value map[string]any) bool {
+	status, ok := value["status"].(string)
+	return ok && status == "async_launched"
+}
+
 // ── Claude SDK JSON types ──

 type claudeSDKMessage struct {
--- a/server/pkg/agent/claude_deadlock_test.go
+++ b/server/pkg/agent/claude_deadlock_test.go
@@ -25,6 +25,12 @@ func TestMain(m *testing.M) {
 	case "control_request":
 		runFakeClaudeControlRequest()
 		os.Exit(0)
+	case "background_control_request":
+		runFakeClaudeBackgroundControlRequest()
+		os.Exit(0)
+	case "async_launched_tool_result":
+		runFakeClaudeAsyncLaunchedToolResult()
+		os.Exit(0)
 	default:
 		fmt.Fprintf(os.Stderr, "unknown CLAUDE_FAKE_MODE: %q\n", mode)
 		os.Exit(2)
@@ -86,6 +92,57 @@ func runFakeClaudeControlRequest() {
 	fmt.Println(`{"type":"result","subtype":"success","is_error":false,"session_id":"sess-control","result":"done after control"}`)
 }

+func runFakeClaudeBackgroundControlRequest() {
+	reader := bufio.NewReader(os.Stdin)
+	if _, err := reader.ReadString('\n'); err != nil {
+		fmt.Fprintf(os.Stderr, "read prompt: %v\n", err)
+		os.Exit(31)
+	}
+	fmt.Println(`{"type":"system","session_id":"sess-background-control"}`)
+	fmt.Println(`{"type":"control_request","request_id":"req-bg","request":{"subtype":"tool_use","tool_name":"Bash","input":{"command":"sleep 60","run_in_background":true}}}`)
+
+	line, err := reader.ReadString('\n')
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "read control response: %v\n", err)
+		os.Exit(32)
+	}
+	var resp struct {
+		Type     string `json:"type"`
+		Response struct {
+			RequestID string `json:"request_id"`
+			Response  struct {
+				UpdatedInput map[string]any `json:"updatedInput"`
+			} `json:"response"`
+		} `json:"response"`
+	}
+	if err := json.Unmarshal([]byte(strings.TrimSpace(line)), &resp); err != nil {
+		fmt.Fprintf(os.Stderr, "decode control response: %v\n", err)
+		os.Exit(33)
+	}
+	if resp.Type != "control_response" || resp.Response.RequestID != "req-bg" {
+		fmt.Fprintf(os.Stderr, "unexpected control response: %s\n", line)
+		os.Exit(34)
+	}
+	if runInBackground, ok := resp.Response.Response.UpdatedInput["run_in_background"].(bool); !ok || runInBackground {
+		fmt.Fprintf(os.Stderr, "expected foreground updatedInput, got: %s\n", line)
+		os.Exit(35)
+	}
+
+	fmt.Println(`{"type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"call-bg","content":"foreground completed"}]}}`)
+	fmt.Println(`{"type":"result","subtype":"success","is_error":false,"session_id":"sess-background-control","result":"done after foreground rewrite"}`)
+}
+
+func runFakeClaudeAsyncLaunchedToolResult() {
+	reader := bufio.NewReader(os.Stdin)
+	if _, err := reader.ReadString('\n'); err != nil {
+		fmt.Fprintf(os.Stderr, "read prompt: %v\n", err)
+		os.Exit(41)
+	}
+	fmt.Println(`{"type":"system","session_id":"sess-async-launched"}`)
+	fmt.Println(`{"type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"call-async","content":{"status":"async_launched","message":"background task launched"}}]}}`)
+	fmt.Println(`{"type":"result","subtype":"success","is_error":false,"session_id":"sess-async-launched","result":"parent turn completed early"}`)
+}
+
 // TestClaudeExecuteDoesNotDeadlockOnStartupStdoutBurst verifies that the
 // claude backend drains stdout concurrently with writing the prompt to
 // stdin. The buggy path serialises the two: writeClaudeInput runs before
@@ -194,3 +251,99 @@ func TestClaudeExecuteRespondsToControlRequest(t *testing.T) {
 		t.Fatal("timeout waiting for result — claude backend did not answer control_request")
 	}
 }
+
+func TestClaudeExecuteForcesBackgroundControlRequestForeground(t *testing.T) {
+	t.Parallel()
+
+	self, err := os.Executable()
+	if err != nil {
+		t.Fatalf("os.Executable: %v", err)
+	}
+
+	backend, err := New("claude", Config{
+		ExecutablePath: self,
+		Env:            map[string]string{"CLAUDE_FAKE_MODE": "background_control_request"},
+		Logger:         slog.Default(),
+	})
+	if err != nil {
+		t.Fatalf("new claude backend: %v", err)
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	session, err := backend.Execute(ctx, "run a background command", ExecOptions{Timeout: 8 * time.Second})
+	if err != nil {
+		t.Fatalf("execute returned error: %v", err)
+	}
+	go func() {
+		for range session.Messages {
+		}
+	}()
+
+	select {
+	case result, ok := <-session.Result:
+		if !ok {
+			t.Fatal("result channel closed without a value")
+		}
+		if result.Status != "completed" {
+			t.Fatalf("expected status=completed, got %q (error=%q)", result.Status, result.Error)
+		}
+		if result.Output != "done after foreground rewrite" {
+			t.Fatalf("expected foreground rewrite result, got %q", result.Output)
+		}
+		if result.SessionID != "sess-background-control" {
+			t.Fatalf("expected session id sess-background-control, got %q", result.SessionID)
+		}
+	case <-time.After(5 * time.Second):
+		t.Fatal("timeout waiting for result — claude backend did not foreground background control_request")
+	}
+}
+
+func TestClaudeExecuteFailsLoudlyOnAsyncLaunchedToolResult(t *testing.T) {
+	t.Parallel()
+
+	self, err := os.Executable()
+	if err != nil {
+		t.Fatalf("os.Executable: %v", err)
+	}
+
+	backend, err := New("claude", Config{
+		ExecutablePath: self,
+		Env:            map[string]string{"CLAUDE_FAKE_MODE": "async_launched_tool_result"},
+		Logger:         slog.Default(),
+	})
+	if err != nil {
+		t.Fatalf("new claude backend: %v", err)
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	session, err := backend.Execute(ctx, "launch async work", ExecOptions{Timeout: 8 * time.Second})
+	if err != nil {
+		t.Fatalf("execute returned error: %v", err)
+	}
+	go func() {
+		for range session.Messages {
+		}
+	}()
+
+	select {
+	case result, ok := <-session.Result:
+		if !ok {
+			t.Fatal("result channel closed without a value")
+		}
+		if result.Status != "failed" {
+			t.Fatalf("expected status=failed, got %q (error=%q)", result.Status, result.Error)
+		}
+		if !strings.Contains(result.Error, "async background task") {
+			t.Fatalf("expected async background task error, got %q", result.Error)
+		}
+		if result.SessionID != "sess-async-launched" {
+			t.Fatalf("expected session id sess-async-launched, got %q", result.SessionID)
+		}
+	case <-time.After(5 * time.Second):
+		t.Fatal("timeout waiting for result — claude backend did not fail async_launched tool result")
+	}
+}
--- a/server/pkg/agent/claude_test.go
+++ b/server/pkg/agent/claude_test.go
@@ -105,7 +105,9 @@ func TestClaudeHandleUserToolResult(t *testing.T) {
 		}),
 	}

-	b.handleUser(msg, ch)
+	if b.handleUser(msg, ch) {
+		t.Fatal("did not expect async launch in ordinary tool result")
+	}

 	select {
 	case m := <-ch:
@@ -152,6 +154,112 @@ func TestClaudeHandleControlRequestAutoApproves(t *testing.T) {
 	if innerResp["behavior"] != "allow" {
 		t.Fatalf("expected behavior allow, got %v", innerResp["behavior"])
 	}
+	updatedInput := innerResp["updatedInput"].(map[string]any)
+	if _, ok := updatedInput["run_in_background"]; ok {
+		t.Fatal("did not expect run_in_background to be injected into ordinary tool input")
+	}
+}
+
+func TestClaudeHandleControlRequestForcesBackgroundToolsForeground(t *testing.T) {
+	t.Parallel()
+
+	for _, toolName := range []string{"Bash", "Agent"} {
+		t.Run(toolName, func(t *testing.T) {
+			t.Parallel()
+
+			b := &claudeBackend{cfg: Config{Logger: slog.Default()}}
+
+			var written bytes.Buffer
+
+			msg := claudeSDKMessage{
+				Type:      "control_request",
+				RequestID: "req-42",
+				Request: mustMarshal(t, claudeControlRequestPayload{
+					Subtype:  "tool_use",
+					ToolName: toolName,
+					Input: mustMarshal(t, map[string]any{
+						"command":           "sleep 60",
+						"run_in_background": true,
+					}),
+				}),
+			}
+
+			b.handleControlRequest(msg, &written)
+
+			var resp map[string]any
+			if err := json.Unmarshal(bytes.TrimSpace(written.Bytes()), &resp); err != nil {
+				t.Fatalf("unmarshal response: %v", err)
+			}
+
+			respInner := resp["response"].(map[string]any)
+			innerResp := respInner["response"].(map[string]any)
+			if innerResp["behavior"] != "allow" {
+				t.Fatalf("expected behavior allow, got %v", innerResp["behavior"])
+			}
+			updatedInput := innerResp["updatedInput"].(map[string]any)
+			if updatedInput["run_in_background"] != false {
+				t.Fatalf("expected run_in_background=false, got %v", updatedInput["run_in_background"])
+			}
+			if updatedInput["command"] != "sleep 60" {
+				t.Fatalf("expected original command to be preserved, got %v", updatedInput["command"])
+			}
+		})
+	}
+}
+
+func TestClaudeHandleUserDetectsAsyncLaunchedToolResult(t *testing.T) {
+	t.Parallel()
+
+	b := &claudeBackend{cfg: Config{Logger: slog.Default()}}
+	ch := make(chan Message, 10)
+
+	msg := claudeSDKMessage{
+		Type: "user",
+		Message: mustMarshal(t, claudeMessageContent{
+			Role: "user",
+			Content: []claudeContentBlock{
+				{
+					Type:      "tool_result",
+					ToolUseID: "call-1",
+					Content: mustMarshal(t, map[string]any{
+						"status":  "async_launched",
+						"message": "background task launched",
+					}),
+				},
+			},
+		}),
+	}
+
+	if !b.handleUser(msg, ch) {
+		t.Fatal("expected async launch to be detected")
+	}
+}
+
+func TestClaudeHandleUserIgnoresAsyncLaunchedTextOutput(t *testing.T) {
+	t.Parallel()
+
+	b := &claudeBackend{cfg: Config{Logger: slog.Default()}}
+	ch := make(chan Message, 10)
+
+	msg := claudeSDKMessage{
+		Type: "user",
+		Message: mustMarshal(t, claudeMessageContent{
+			Role: "user",
+			Content: []claudeContentBlock{
+				{
+					Type:      "tool_result",
+					ToolUseID: "call-1",
+					Content: mustMarshal(t, map[string]any{
+						"stdout": `fixture contained {"status":"async_launched"} as plain text`,
+					}),
+				},
+			},
+		}),
+	}
+
+	if b.handleUser(msg, ch) {
+		t.Fatal("did not expect async launch to be detected in ordinary text output")
+	}
 }

 func TestClaudeHandleAssistantInvalidJSON(t *testing.T) {
Author	SHA1	Message	Date
J	6a974af4c1	fix(agent): narrow Claude async launch detection Co-authored-by: multica-agent <github@multica.ai>	2026-06-17 18:34:41 +08:00
J	984d198f4a	fix(agent): force Claude background tools foreground Co-authored-by: multica-agent <github@multica.ai>	2026-06-17 18:25:47 +08:00
J	704d496cd6	fix(daemon): add background task safety brief Co-authored-by: multica-agent <github@multica.ai>	2026-06-17 18:01:43 +08:00