fix(agent/acp): surface kimi tool input/output via content blocks

Kimi-cli emits tool_call and tool_call_update ACP frames with the input/output inside a `content` array of ContentToolCallContent blocks (shape: {type:"content", content:{type:"text", text:"..."}}), not in the hermes-style `rawInput` map / `rawOutput` string. Our parser only looked at rawInput/rawOutput, so the daemon recorded empty Input and Output for every kimi tool — the execution-history UI showed blank terminal panels even for commands that ran fine. Add extractACPToolCallText() and a fallback in handleToolCallStart / handleToolCallUpdate: when rawInput is nil / rawOutput is empty, pull the text out of the content blocks. rawInput / rawOutput still take precedence so hermes' behaviour is untouched. Terminal / FileEditToolCallContent blocks are skipped (we have nothing to render them as — kimi only emits TerminalToolCallContent when the client advertises terminal capability, which we don't). Tests: - TestHermesClientHandleToolCallStartKimiContent — content array → Input.text populated. - TestHermesClientHandleToolCallCompleteKimiContent — multi-block content → Output concatenated with newline separator. - TestHermesClientHandleToolCallRawOutputTakesPrecedence — hermes rawOutput still wins when both are present. - TestExtractACPToolCallText — unit coverage for the helper (single/multiple text blocks, terminal-block skip, empty input).
2026-06-17 03:38:32 +02:00 · 2026-04-21 01:56:36 +08:00
parent 7e29543fe3
commit 4335c19813
2 changed files with 196 additions and 10 deletions
--- a/server/pkg/agent/hermes.go
+++ b/server/pkg/agent/hermes.go
@@ -636,32 +636,43 @@ func (c *hermesClient) handleAgentThought(data json.RawMessage) {

 func (c *hermesClient) handleToolCallStart(data json.RawMessage) {
 	var msg struct {
-		ToolCallID string         `json:"toolCallId"`
-		Title      string         `json:"title"`
-		Kind       string         `json:"kind"`
-		RawInput   map[string]any `json:"rawInput"`
+		ToolCallID string            `json:"toolCallId"`
+		Title      string            `json:"title"`
+		Kind       string            `json:"kind"`
+		RawInput   map[string]any    `json:"rawInput"`
+		Content    []json.RawMessage `json:"content"`
 	}
 	if err := json.Unmarshal(data, &msg); err != nil {
 		return
 	}

 	toolName := hermesToolNameFromTitle(msg.Title, msg.Kind)
+	input := msg.RawInput
+	// kimi-cli emits the tool input as ACP `content` blocks (text type),
+	// not `rawInput`. Fall back so the UI shows "echo hello" instead of
+	// a blank input bubble. See kimi_cli/acp/session.py::_send_tool_call.
+	if input == nil {
+		if text := extractACPToolCallText(msg.Content); text != "" {
+			input = map[string]any{"text": text}
+		}
+	}
 	if c.onMessage != nil {
 		c.onMessage(Message{
 			Type:   MessageToolUse,
 			Tool:   toolName,
 			CallID: msg.ToolCallID,
-			Input:  msg.RawInput,
+			Input:  input,
 		})
 	}
 }

 func (c *hermesClient) handleToolCallUpdate(data json.RawMessage) {
 	var msg struct {
-		ToolCallID string `json:"toolCallId"`
-		Status     string `json:"status"`
-		Kind       string `json:"kind"`
-		RawOutput  string `json:"rawOutput"`
+		ToolCallID string            `json:"toolCallId"`
+		Status     string            `json:"status"`
+		Kind       string            `json:"kind"`
+		RawOutput  string            `json:"rawOutput"`
+		Content    []json.RawMessage `json:"content"`
 	}
 	if err := json.Unmarshal(data, &msg); err != nil {
 		return
@@ -672,15 +683,61 @@ func (c *hermesClient) handleToolCallUpdate(data json.RawMessage) {
 		return
 	}

+	output := msg.RawOutput
+	// kimi-cli returns Shell / file-tool output via `content` blocks on
+	// the completion update, not `rawOutput`. Fall back so the UI shows
+	// the command's stdout instead of an empty terminal panel. See
+	// kimi_cli/acp/session.py::_send_tool_result.
+	if output == "" {
+		output = extractACPToolCallText(msg.Content)
+	}
 	if c.onMessage != nil {
 		c.onMessage(Message{
 			Type:   MessageToolResult,
 			CallID: msg.ToolCallID,
-			Output: msg.RawOutput,
+			Output: output,
 		})
 	}
 }

+// extractACPToolCallText concatenates the text of every ACP
+// `ContentToolCallContent` block (shape: {"type":"content",
+// "content":{"type":"text","text":"..."}}) from a tool_call /
+// tool_call_update's `content` array. Non-text blocks (terminal,
+// diff, image) are skipped — the client UI doesn't have a way to
+// render raw terminal_id references, so a text concatenation is the
+// most useful thing we can surface.
+func extractACPToolCallText(blocks []json.RawMessage) string {
+	var b strings.Builder
+	for _, raw := range blocks {
+		var outer struct {
+			Type    string          `json:"type"`
+			Content json.RawMessage `json:"content"`
+		}
+		if err := json.Unmarshal(raw, &outer); err != nil {
+			continue
+		}
+		if outer.Type != "content" || len(outer.Content) == 0 {
+			continue
+		}
+		var inner struct {
+			Type string `json:"type"`
+			Text string `json:"text"`
+		}
+		if err := json.Unmarshal(outer.Content, &inner); err != nil {
+			continue
+		}
+		if inner.Type != "text" || inner.Text == "" {
+			continue
+		}
+		if b.Len() > 0 {
+			b.WriteByte('\n')
+		}
+		b.WriteString(inner.Text)
+	}
+	return b.String()
+}
+
 func (c *hermesClient) handleUsageUpdate(data json.RawMessage) {
 	var msg struct {
 		Usage struct {
--- a/server/pkg/agent/hermes_test.go
+++ b/server/pkg/agent/hermes_test.go
@@ -343,6 +343,135 @@ func TestHermesClientHandleToolCallComplete(t *testing.T) {
 	}
 }

+// TestHermesClientHandleToolCallStartKimiContent covers the kimi-cli
+// emission shape: tool_call carries an ACP `content` array with a
+// nested text block instead of a `rawInput` map. Without the fallback
+// extractor the daemon records empty Input and the UI renders a blank
+// tool bubble ("terminal's input is empty").
+func TestHermesClientHandleToolCallStartKimiContent(t *testing.T) {
+	t.Parallel()
+
+	var got Message
+	c := &hermesClient{
+		pending: make(map[int]*pendingRPC),
+		onMessage: func(msg Message) {
+			got = msg
+		},
+	}
+
+	line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call","toolCallId":"tc-kimi-1","title":"Shell","status":"in_progress","content":[{"type":"content","content":{"type":"text","text":"{\"command\":\"echo hello\"}"}}]}}}`
+	c.handleLine(line)
+
+	if got.Type != MessageToolUse {
+		t.Fatalf("type: got %v, want MessageToolUse", got.Type)
+	}
+	if got.CallID != "tc-kimi-1" {
+		t.Errorf("callID: got %q", got.CallID)
+	}
+	text, ok := got.Input["text"].(string)
+	if !ok {
+		t.Fatalf("Input.text missing or not a string: %v", got.Input)
+	}
+	if !strings.Contains(text, "echo hello") {
+		t.Errorf("Input.text should contain the command args, got %q", text)
+	}
+}
+
+// TestHermesClientHandleToolCallCompleteKimiContent covers kimi's
+// completion shape: the Shell tool's stdout comes back via `content`
+// blocks rather than `rawOutput`.
+func TestHermesClientHandleToolCallCompleteKimiContent(t *testing.T) {
+	t.Parallel()
+
+	var got Message
+	c := &hermesClient{
+		pending: make(map[int]*pendingRPC),
+		onMessage: func(msg Message) {
+			got = msg
+		},
+	}
+
+	line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc-kimi-1","status":"completed","content":[{"type":"content","content":{"type":"text","text":"hello\n"}},{"type":"content","content":{"type":"text","text":"exit 0"}}]}}}`
+	c.handleLine(line)
+
+	if got.Type != MessageToolResult {
+		t.Fatalf("type: got %v, want MessageToolResult", got.Type)
+	}
+	// Multiple content blocks must be concatenated; newline separator
+	// keeps streamed chunks visually distinct in the UI.
+	if got.Output != "hello\n\nexit 0" {
+		t.Errorf("output: got %q, want %q", got.Output, "hello\n\nexit 0")
+	}
+}
+
+// TestHermesClientHandleToolCallRawOutputTakesPrecedence keeps hermes
+// behaviour unchanged: when the update has both `rawOutput` (hermes
+// convention) and `content` (would be ambiguous), honour rawOutput.
+func TestHermesClientHandleToolCallRawOutputTakesPrecedence(t *testing.T) {
+	t.Parallel()
+
+	var got Message
+	c := &hermesClient{
+		pending: make(map[int]*pendingRPC),
+		onMessage: func(msg Message) {
+			got = msg
+		},
+	}
+
+	line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc","status":"completed","rawOutput":"raw wins","content":[{"type":"content","content":{"type":"text","text":"ignored"}}]}}}`
+	c.handleLine(line)
+
+	if got.Output != "raw wins" {
+		t.Errorf("output: got %q, want %q", got.Output, "raw wins")
+	}
+}
+
+func TestExtractACPToolCallText(t *testing.T) {
+	t.Parallel()
+	tests := []struct {
+		name string
+		json string
+		want string
+	}{
+		{
+			name: "single text block",
+			json: `[{"type":"content","content":{"type":"text","text":"hello"}}]`,
+			want: "hello",
+		},
+		{
+			name: "multiple text blocks join with newline",
+			json: `[{"type":"content","content":{"type":"text","text":"a"}},{"type":"content","content":{"type":"text","text":"b"}}]`,
+			want: "a\nb",
+		},
+		{
+			name: "terminal blocks skipped",
+			json: `[{"type":"terminal","terminalId":"t1"},{"type":"content","content":{"type":"text","text":"shell out"}}]`,
+			want: "shell out",
+		},
+		{
+			name: "empty array returns empty",
+			json: `[]`,
+			want: "",
+		},
+		{
+			name: "no text content",
+			json: `[{"type":"terminal","terminalId":"t1"}]`,
+			want: "",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var blocks []json.RawMessage
+			if err := json.Unmarshal([]byte(tt.json), &blocks); err != nil {
+				t.Fatalf("unmarshal: %v", err)
+			}
+			if got := extractACPToolCallText(blocks); got != tt.want {
+				t.Errorf("got %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
 func TestHermesClientHandleToolCallInProgressIgnored(t *testing.T) {
 	t.Parallel()