Compare commits

...

3 Commits

Author SHA1 Message Date
J
6a974af4c1 fix(agent): narrow Claude async launch detection
Co-authored-by: multica-agent <github@multica.ai>
2026-06-17 18:34:41 +08:00
J
984d198f4a fix(agent): force Claude background tools foreground
Co-authored-by: multica-agent <github@multica.ai>
2026-06-17 18:25:47 +08:00
J
704d496cd6 fix(daemon): add background task safety brief
Co-authored-by: multica-agent <github@multica.ai>
2026-06-17 18:01:43 +08:00
5 changed files with 379 additions and 4 deletions

View File

@@ -801,6 +801,47 @@ func TestInjectRuntimeConfigClaude(t *testing.T) {
}
}
func TestInjectRuntimeConfigBackgroundTaskSafetyProviderAgnostic(t *testing.T) {
t.Parallel()
providers := []struct {
name string
file string
}{
{"claude", "CLAUDE.md"},
{"codex", "AGENTS.md"},
{"opencode", "AGENTS.md"},
{"gemini", "GEMINI.md"},
{"hermes", "AGENTS.md"},
}
for _, tc := range providers {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
dir := t.TempDir()
if _, err := InjectRuntimeConfig(dir, tc.name, TaskContextForEnv{IssueID: "issue-1"}); err != nil {
t.Fatalf("InjectRuntimeConfig failed: %v", err)
}
data, err := os.ReadFile(filepath.Join(dir, tc.file))
if err != nil {
t.Fatalf("read %s: %v", tc.file, err)
}
s := string(data)
for _, want := range []string{
"## Background Task Safety",
"Do NOT end your turn while background tasks",
"wait for a future notification/reminder",
"run the work synchronously instead",
} {
if !strings.Contains(s, want) {
t.Errorf("%s missing background task safety text %q\n---\n%s", tc.file, want, s)
}
}
})
}
}
func TestInjectRuntimeConfigAvailableCommandsCoreOnly(t *testing.T) {
t.Parallel()
dir := t.TempDir()

View File

@@ -366,6 +366,7 @@ func buildMetaSkillContent(provider string, ctx TaskContextForEnv) string {
b.WriteString("# Multica Agent Runtime\n\n")
b.WriteString("You are a coding agent in the Multica platform. Use the `multica` CLI to interact with the platform.\n\n")
writeBackgroundTaskSafetyInstructions(&b)
// Always emit agent identity so the agent knows who it is, even when
// dispatched via @mention on an issue assigned to a different agent.
@@ -794,3 +795,13 @@ func buildMetaSkillContent(provider string, ctx TaskContextForEnv) string {
return b.String()
}
func writeBackgroundTaskSafetyInstructions(b *strings.Builder) {
b.WriteString("## Background Task Safety\n\n")
b.WriteString("Multica marks this task terminal when your top-level agent process/turn exits. Any background work you started but did not collect before exiting can be orphaned: its result may be lost, and the user may see a completed/failed task even though the delegated work was never synthesized.\n\n")
b.WriteString("- Do NOT end your turn while background tasks, async subagents, background shell commands, or detached tool calls are still running.\n")
b.WriteString("- If a tool or runtime offers a background mode, use it only when you can explicitly wait for completion and collect the result before your final response.\n")
b.WriteString("- If a tool response says to wait for a future notification/reminder instead of collecting now, do not rely on that in Multica-managed runs. Block on the appropriate wait/output/collect operation before exiting.\n")
b.WriteString("- If you cannot observe or collect a background task's result, do not spawn it in the background; run the work synchronously instead.\n")
b.WriteString("- Before posting your final result or exiting silently, account for every background task you started and incorporate its output or failure into your response.\n\n")
}

View File

@@ -135,6 +135,7 @@ func (b *claudeBackend) Execute(ctx context.Context, prompt string, opts ExecOpt
var sessionID string
finalStatus := "completed"
var finalError string
sawAsyncLaunch := false
usage := make(map[string]TokenUsage)
// Close stdout when the context is cancelled so scanner.Scan() unblocks.
@@ -162,7 +163,9 @@ func (b *claudeBackend) Execute(ctx context.Context, prompt string, opts ExecOpt
case "assistant":
b.handleAssistant(msg, msgCh, &output, usage)
case "user":
b.handleUser(msg, msgCh)
if b.handleUser(msg, msgCh) {
sawAsyncLaunch = true
}
case "system":
if msg.SessionID != "" {
sessionID = msg.SessionID
@@ -222,6 +225,10 @@ func (b *claudeBackend) Execute(ctx context.Context, prompt string, opts ExecOpt
finalStatus = "failed"
finalError = fmt.Sprintf("claude exited with error: %v", exitErr)
}
if finalStatus == "completed" && sawAsyncLaunch {
finalStatus = "failed"
finalError = "claude launched an async background task; Multica-managed runs require foreground execution"
}
// cmd.Wait() has returned — os/exec's stderr copy goroutine has
// observed every byte claude wrote to stderr before exiting, so
@@ -297,17 +304,21 @@ func (b *claudeBackend) handleAssistant(msg claudeSDKMessage, ch chan<- Message,
}
}
func (b *claudeBackend) handleUser(msg claudeSDKMessage, ch chan<- Message) {
func (b *claudeBackend) handleUser(msg claudeSDKMessage, ch chan<- Message) bool {
var content claudeMessageContent
if err := json.Unmarshal(msg.Message, &content); err != nil {
return
return false
}
sawAsyncLaunch := false
for _, block := range content.Content {
if block.Type == "tool_result" {
resultStr := ""
if block.Content != nil {
resultStr = string(block.Content)
if claudeToolResultHasAsyncLaunch(block.Content) {
sawAsyncLaunch = true
}
}
trySend(ch, Message{
Type: MessageToolResult,
@@ -316,6 +327,7 @@ func (b *claudeBackend) handleUser(msg claudeSDKMessage, ch chan<- Message) {
})
}
}
return sawAsyncLaunch
}
func (b *claudeBackend) handleControlRequest(msg claudeSDKMessage, stdin interface{ Write([]byte) (int, error) }) {
@@ -332,6 +344,12 @@ func (b *claudeBackend) handleControlRequest(msg claudeSDKMessage, stdin interfa
if inputMap == nil {
inputMap = map[string]any{}
}
if forceClaudeToolInputForeground(inputMap) {
b.cfg.Logger.Info("claude: forced foreground tool execution",
"request_id", msg.RequestID,
"tool", req.ToolName,
)
}
response := map[string]any{
"type": "control_response",
@@ -356,6 +374,50 @@ func (b *claudeBackend) handleControlRequest(msg claudeSDKMessage, stdin interfa
}
}
func forceClaudeToolInputForeground(input map[string]any) bool {
if runInBackground, ok := input["run_in_background"].(bool); ok && runInBackground {
input["run_in_background"] = false
return true
}
return false
}
func claudeToolResultHasAsyncLaunch(raw json.RawMessage) bool {
if len(raw) == 0 {
return false
}
var value any
if err := json.Unmarshal(raw, &value); err != nil {
return false
}
switch v := value.(type) {
case map[string]any:
if claudeMapHasAsyncLaunchStatus(v) {
return true
}
if content, ok := v["content"].([]any); ok {
return claudeArrayHasAsyncLaunchStatus(content)
}
case []any:
return claudeArrayHasAsyncLaunchStatus(v)
}
return false
}
func claudeArrayHasAsyncLaunchStatus(values []any) bool {
for _, value := range values {
if item, ok := value.(map[string]any); ok && claudeMapHasAsyncLaunchStatus(item) {
return true
}
}
return false
}
func claudeMapHasAsyncLaunchStatus(value map[string]any) bool {
status, ok := value["status"].(string)
return ok && status == "async_launched"
}
// ── Claude SDK JSON types ──
type claudeSDKMessage struct {

View File

@@ -25,6 +25,12 @@ func TestMain(m *testing.M) {
case "control_request":
runFakeClaudeControlRequest()
os.Exit(0)
case "background_control_request":
runFakeClaudeBackgroundControlRequest()
os.Exit(0)
case "async_launched_tool_result":
runFakeClaudeAsyncLaunchedToolResult()
os.Exit(0)
default:
fmt.Fprintf(os.Stderr, "unknown CLAUDE_FAKE_MODE: %q\n", mode)
os.Exit(2)
@@ -86,6 +92,57 @@ func runFakeClaudeControlRequest() {
fmt.Println(`{"type":"result","subtype":"success","is_error":false,"session_id":"sess-control","result":"done after control"}`)
}
func runFakeClaudeBackgroundControlRequest() {
reader := bufio.NewReader(os.Stdin)
if _, err := reader.ReadString('\n'); err != nil {
fmt.Fprintf(os.Stderr, "read prompt: %v\n", err)
os.Exit(31)
}
fmt.Println(`{"type":"system","session_id":"sess-background-control"}`)
fmt.Println(`{"type":"control_request","request_id":"req-bg","request":{"subtype":"tool_use","tool_name":"Bash","input":{"command":"sleep 60","run_in_background":true}}}`)
line, err := reader.ReadString('\n')
if err != nil {
fmt.Fprintf(os.Stderr, "read control response: %v\n", err)
os.Exit(32)
}
var resp struct {
Type string `json:"type"`
Response struct {
RequestID string `json:"request_id"`
Response struct {
UpdatedInput map[string]any `json:"updatedInput"`
} `json:"response"`
} `json:"response"`
}
if err := json.Unmarshal([]byte(strings.TrimSpace(line)), &resp); err != nil {
fmt.Fprintf(os.Stderr, "decode control response: %v\n", err)
os.Exit(33)
}
if resp.Type != "control_response" || resp.Response.RequestID != "req-bg" {
fmt.Fprintf(os.Stderr, "unexpected control response: %s\n", line)
os.Exit(34)
}
if runInBackground, ok := resp.Response.Response.UpdatedInput["run_in_background"].(bool); !ok || runInBackground {
fmt.Fprintf(os.Stderr, "expected foreground updatedInput, got: %s\n", line)
os.Exit(35)
}
fmt.Println(`{"type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"call-bg","content":"foreground completed"}]}}`)
fmt.Println(`{"type":"result","subtype":"success","is_error":false,"session_id":"sess-background-control","result":"done after foreground rewrite"}`)
}
func runFakeClaudeAsyncLaunchedToolResult() {
reader := bufio.NewReader(os.Stdin)
if _, err := reader.ReadString('\n'); err != nil {
fmt.Fprintf(os.Stderr, "read prompt: %v\n", err)
os.Exit(41)
}
fmt.Println(`{"type":"system","session_id":"sess-async-launched"}`)
fmt.Println(`{"type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"call-async","content":{"status":"async_launched","message":"background task launched"}}]}}`)
fmt.Println(`{"type":"result","subtype":"success","is_error":false,"session_id":"sess-async-launched","result":"parent turn completed early"}`)
}
// TestClaudeExecuteDoesNotDeadlockOnStartupStdoutBurst verifies that the
// claude backend drains stdout concurrently with writing the prompt to
// stdin. The buggy path serialises the two: writeClaudeInput runs before
@@ -194,3 +251,99 @@ func TestClaudeExecuteRespondsToControlRequest(t *testing.T) {
t.Fatal("timeout waiting for result — claude backend did not answer control_request")
}
}
func TestClaudeExecuteForcesBackgroundControlRequestForeground(t *testing.T) {
t.Parallel()
self, err := os.Executable()
if err != nil {
t.Fatalf("os.Executable: %v", err)
}
backend, err := New("claude", Config{
ExecutablePath: self,
Env: map[string]string{"CLAUDE_FAKE_MODE": "background_control_request"},
Logger: slog.Default(),
})
if err != nil {
t.Fatalf("new claude backend: %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
session, err := backend.Execute(ctx, "run a background command", ExecOptions{Timeout: 8 * time.Second})
if err != nil {
t.Fatalf("execute returned error: %v", err)
}
go func() {
for range session.Messages {
}
}()
select {
case result, ok := <-session.Result:
if !ok {
t.Fatal("result channel closed without a value")
}
if result.Status != "completed" {
t.Fatalf("expected status=completed, got %q (error=%q)", result.Status, result.Error)
}
if result.Output != "done after foreground rewrite" {
t.Fatalf("expected foreground rewrite result, got %q", result.Output)
}
if result.SessionID != "sess-background-control" {
t.Fatalf("expected session id sess-background-control, got %q", result.SessionID)
}
case <-time.After(5 * time.Second):
t.Fatal("timeout waiting for result — claude backend did not foreground background control_request")
}
}
func TestClaudeExecuteFailsLoudlyOnAsyncLaunchedToolResult(t *testing.T) {
t.Parallel()
self, err := os.Executable()
if err != nil {
t.Fatalf("os.Executable: %v", err)
}
backend, err := New("claude", Config{
ExecutablePath: self,
Env: map[string]string{"CLAUDE_FAKE_MODE": "async_launched_tool_result"},
Logger: slog.Default(),
})
if err != nil {
t.Fatalf("new claude backend: %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
session, err := backend.Execute(ctx, "launch async work", ExecOptions{Timeout: 8 * time.Second})
if err != nil {
t.Fatalf("execute returned error: %v", err)
}
go func() {
for range session.Messages {
}
}()
select {
case result, ok := <-session.Result:
if !ok {
t.Fatal("result channel closed without a value")
}
if result.Status != "failed" {
t.Fatalf("expected status=failed, got %q (error=%q)", result.Status, result.Error)
}
if !strings.Contains(result.Error, "async background task") {
t.Fatalf("expected async background task error, got %q", result.Error)
}
if result.SessionID != "sess-async-launched" {
t.Fatalf("expected session id sess-async-launched, got %q", result.SessionID)
}
case <-time.After(5 * time.Second):
t.Fatal("timeout waiting for result — claude backend did not fail async_launched tool result")
}
}

View File

@@ -105,7 +105,9 @@ func TestClaudeHandleUserToolResult(t *testing.T) {
}),
}
b.handleUser(msg, ch)
if b.handleUser(msg, ch) {
t.Fatal("did not expect async launch in ordinary tool result")
}
select {
case m := <-ch:
@@ -152,6 +154,112 @@ func TestClaudeHandleControlRequestAutoApproves(t *testing.T) {
if innerResp["behavior"] != "allow" {
t.Fatalf("expected behavior allow, got %v", innerResp["behavior"])
}
updatedInput := innerResp["updatedInput"].(map[string]any)
if _, ok := updatedInput["run_in_background"]; ok {
t.Fatal("did not expect run_in_background to be injected into ordinary tool input")
}
}
func TestClaudeHandleControlRequestForcesBackgroundToolsForeground(t *testing.T) {
t.Parallel()
for _, toolName := range []string{"Bash", "Agent"} {
t.Run(toolName, func(t *testing.T) {
t.Parallel()
b := &claudeBackend{cfg: Config{Logger: slog.Default()}}
var written bytes.Buffer
msg := claudeSDKMessage{
Type: "control_request",
RequestID: "req-42",
Request: mustMarshal(t, claudeControlRequestPayload{
Subtype: "tool_use",
ToolName: toolName,
Input: mustMarshal(t, map[string]any{
"command": "sleep 60",
"run_in_background": true,
}),
}),
}
b.handleControlRequest(msg, &written)
var resp map[string]any
if err := json.Unmarshal(bytes.TrimSpace(written.Bytes()), &resp); err != nil {
t.Fatalf("unmarshal response: %v", err)
}
respInner := resp["response"].(map[string]any)
innerResp := respInner["response"].(map[string]any)
if innerResp["behavior"] != "allow" {
t.Fatalf("expected behavior allow, got %v", innerResp["behavior"])
}
updatedInput := innerResp["updatedInput"].(map[string]any)
if updatedInput["run_in_background"] != false {
t.Fatalf("expected run_in_background=false, got %v", updatedInput["run_in_background"])
}
if updatedInput["command"] != "sleep 60" {
t.Fatalf("expected original command to be preserved, got %v", updatedInput["command"])
}
})
}
}
func TestClaudeHandleUserDetectsAsyncLaunchedToolResult(t *testing.T) {
t.Parallel()
b := &claudeBackend{cfg: Config{Logger: slog.Default()}}
ch := make(chan Message, 10)
msg := claudeSDKMessage{
Type: "user",
Message: mustMarshal(t, claudeMessageContent{
Role: "user",
Content: []claudeContentBlock{
{
Type: "tool_result",
ToolUseID: "call-1",
Content: mustMarshal(t, map[string]any{
"status": "async_launched",
"message": "background task launched",
}),
},
},
}),
}
if !b.handleUser(msg, ch) {
t.Fatal("expected async launch to be detected")
}
}
func TestClaudeHandleUserIgnoresAsyncLaunchedTextOutput(t *testing.T) {
t.Parallel()
b := &claudeBackend{cfg: Config{Logger: slog.Default()}}
ch := make(chan Message, 10)
msg := claudeSDKMessage{
Type: "user",
Message: mustMarshal(t, claudeMessageContent{
Role: "user",
Content: []claudeContentBlock{
{
Type: "tool_result",
ToolUseID: "call-1",
Content: mustMarshal(t, map[string]any{
"stdout": `fixture contained {"status":"async_launched"} as plain text`,
}),
},
},
}),
}
if b.handleUser(msg, ch) {
t.Fatal("did not expect async launch to be detected in ordinary text output")
}
}
func TestClaudeHandleAssistantInvalidJSON(t *testing.T) {