fix(agent/acp): surface kimi tool input/output via content blocks

Kimi-cli emits tool_call and tool_call_update ACP frames with the
input/output inside a `content` array of ContentToolCallContent
blocks (shape: {type:"content", content:{type:"text", text:"..."}}),
not in the hermes-style `rawInput` map / `rawOutput` string. Our
parser only looked at rawInput/rawOutput, so the daemon recorded
empty Input and Output for every kimi tool — the execution-history
UI showed blank terminal panels even for commands that ran fine.

Add extractACPToolCallText() and a fallback in handleToolCallStart /
handleToolCallUpdate: when rawInput is nil / rawOutput is empty, pull
the text out of the content blocks. rawInput / rawOutput still take
precedence so hermes' behaviour is untouched. Terminal /
FileEditToolCallContent blocks are skipped (we have nothing to render
them as — kimi only emits TerminalToolCallContent when the client
advertises terminal capability, which we don't).

Tests:
- TestHermesClientHandleToolCallStartKimiContent — content array →
  Input.text populated.
- TestHermesClientHandleToolCallCompleteKimiContent — multi-block
  content → Output concatenated with newline separator.
- TestHermesClientHandleToolCallRawOutputTakesPrecedence — hermes
  rawOutput still wins when both are present.
- TestExtractACPToolCallText — unit coverage for the helper
  (single/multiple text blocks, terminal-block skip, empty input).
This commit is contained in:
Lambda
2026-04-21 01:56:36 +08:00
parent 7e29543fe3
commit 4335c19813
2 changed files with 196 additions and 10 deletions

View File

@@ -636,32 +636,43 @@ func (c *hermesClient) handleAgentThought(data json.RawMessage) {
func (c *hermesClient) handleToolCallStart(data json.RawMessage) {
var msg struct {
ToolCallID string `json:"toolCallId"`
Title string `json:"title"`
Kind string `json:"kind"`
RawInput map[string]any `json:"rawInput"`
ToolCallID string `json:"toolCallId"`
Title string `json:"title"`
Kind string `json:"kind"`
RawInput map[string]any `json:"rawInput"`
Content []json.RawMessage `json:"content"`
}
if err := json.Unmarshal(data, &msg); err != nil {
return
}
toolName := hermesToolNameFromTitle(msg.Title, msg.Kind)
input := msg.RawInput
// kimi-cli emits the tool input as ACP `content` blocks (text type),
// not `rawInput`. Fall back so the UI shows "echo hello" instead of
// a blank input bubble. See kimi_cli/acp/session.py::_send_tool_call.
if input == nil {
if text := extractACPToolCallText(msg.Content); text != "" {
input = map[string]any{"text": text}
}
}
if c.onMessage != nil {
c.onMessage(Message{
Type: MessageToolUse,
Tool: toolName,
CallID: msg.ToolCallID,
Input: msg.RawInput,
Input: input,
})
}
}
func (c *hermesClient) handleToolCallUpdate(data json.RawMessage) {
var msg struct {
ToolCallID string `json:"toolCallId"`
Status string `json:"status"`
Kind string `json:"kind"`
RawOutput string `json:"rawOutput"`
ToolCallID string `json:"toolCallId"`
Status string `json:"status"`
Kind string `json:"kind"`
RawOutput string `json:"rawOutput"`
Content []json.RawMessage `json:"content"`
}
if err := json.Unmarshal(data, &msg); err != nil {
return
@@ -672,15 +683,61 @@ func (c *hermesClient) handleToolCallUpdate(data json.RawMessage) {
return
}
output := msg.RawOutput
// kimi-cli returns Shell / file-tool output via `content` blocks on
// the completion update, not `rawOutput`. Fall back so the UI shows
// the command's stdout instead of an empty terminal panel. See
// kimi_cli/acp/session.py::_send_tool_result.
if output == "" {
output = extractACPToolCallText(msg.Content)
}
if c.onMessage != nil {
c.onMessage(Message{
Type: MessageToolResult,
CallID: msg.ToolCallID,
Output: msg.RawOutput,
Output: output,
})
}
}
// extractACPToolCallText concatenates the text of every ACP
// `ContentToolCallContent` block (shape: {"type":"content",
// "content":{"type":"text","text":"..."}}) from a tool_call /
// tool_call_update's `content` array. Non-text blocks (terminal,
// diff, image) are skipped — the client UI doesn't have a way to
// render raw terminal_id references, so a text concatenation is the
// most useful thing we can surface.
func extractACPToolCallText(blocks []json.RawMessage) string {
var b strings.Builder
for _, raw := range blocks {
var outer struct {
Type string `json:"type"`
Content json.RawMessage `json:"content"`
}
if err := json.Unmarshal(raw, &outer); err != nil {
continue
}
if outer.Type != "content" || len(outer.Content) == 0 {
continue
}
var inner struct {
Type string `json:"type"`
Text string `json:"text"`
}
if err := json.Unmarshal(outer.Content, &inner); err != nil {
continue
}
if inner.Type != "text" || inner.Text == "" {
continue
}
if b.Len() > 0 {
b.WriteByte('\n')
}
b.WriteString(inner.Text)
}
return b.String()
}
func (c *hermesClient) handleUsageUpdate(data json.RawMessage) {
var msg struct {
Usage struct {

View File

@@ -343,6 +343,135 @@ func TestHermesClientHandleToolCallComplete(t *testing.T) {
}
}
// TestHermesClientHandleToolCallStartKimiContent covers the kimi-cli
// emission shape: tool_call carries an ACP `content` array with a
// nested text block instead of a `rawInput` map. Without the fallback
// extractor the daemon records empty Input and the UI renders a blank
// tool bubble ("terminal's input is empty").
func TestHermesClientHandleToolCallStartKimiContent(t *testing.T) {
t.Parallel()
var got Message
c := &hermesClient{
pending: make(map[int]*pendingRPC),
onMessage: func(msg Message) {
got = msg
},
}
line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call","toolCallId":"tc-kimi-1","title":"Shell","status":"in_progress","content":[{"type":"content","content":{"type":"text","text":"{\"command\":\"echo hello\"}"}}]}}}`
c.handleLine(line)
if got.Type != MessageToolUse {
t.Fatalf("type: got %v, want MessageToolUse", got.Type)
}
if got.CallID != "tc-kimi-1" {
t.Errorf("callID: got %q", got.CallID)
}
text, ok := got.Input["text"].(string)
if !ok {
t.Fatalf("Input.text missing or not a string: %v", got.Input)
}
if !strings.Contains(text, "echo hello") {
t.Errorf("Input.text should contain the command args, got %q", text)
}
}
// TestHermesClientHandleToolCallCompleteKimiContent covers kimi's
// completion shape: the Shell tool's stdout comes back via `content`
// blocks rather than `rawOutput`.
func TestHermesClientHandleToolCallCompleteKimiContent(t *testing.T) {
t.Parallel()
var got Message
c := &hermesClient{
pending: make(map[int]*pendingRPC),
onMessage: func(msg Message) {
got = msg
},
}
line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc-kimi-1","status":"completed","content":[{"type":"content","content":{"type":"text","text":"hello\n"}},{"type":"content","content":{"type":"text","text":"exit 0"}}]}}}`
c.handleLine(line)
if got.Type != MessageToolResult {
t.Fatalf("type: got %v, want MessageToolResult", got.Type)
}
// Multiple content blocks must be concatenated; newline separator
// keeps streamed chunks visually distinct in the UI.
if got.Output != "hello\n\nexit 0" {
t.Errorf("output: got %q, want %q", got.Output, "hello\n\nexit 0")
}
}
// TestHermesClientHandleToolCallRawOutputTakesPrecedence keeps hermes
// behaviour unchanged: when the update has both `rawOutput` (hermes
// convention) and `content` (would be ambiguous), honour rawOutput.
func TestHermesClientHandleToolCallRawOutputTakesPrecedence(t *testing.T) {
t.Parallel()
var got Message
c := &hermesClient{
pending: make(map[int]*pendingRPC),
onMessage: func(msg Message) {
got = msg
},
}
line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc","status":"completed","rawOutput":"raw wins","content":[{"type":"content","content":{"type":"text","text":"ignored"}}]}}}`
c.handleLine(line)
if got.Output != "raw wins" {
t.Errorf("output: got %q, want %q", got.Output, "raw wins")
}
}
func TestExtractACPToolCallText(t *testing.T) {
t.Parallel()
tests := []struct {
name string
json string
want string
}{
{
name: "single text block",
json: `[{"type":"content","content":{"type":"text","text":"hello"}}]`,
want: "hello",
},
{
name: "multiple text blocks join with newline",
json: `[{"type":"content","content":{"type":"text","text":"a"}},{"type":"content","content":{"type":"text","text":"b"}}]`,
want: "a\nb",
},
{
name: "terminal blocks skipped",
json: `[{"type":"terminal","terminalId":"t1"},{"type":"content","content":{"type":"text","text":"shell out"}}]`,
want: "shell out",
},
{
name: "empty array returns empty",
json: `[]`,
want: "",
},
{
name: "no text content",
json: `[{"type":"terminal","terminalId":"t1"}]`,
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var blocks []json.RawMessage
if err := json.Unmarshal([]byte(tt.json), &blocks); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if got := extractACPToolCallText(blocks); got != tt.want {
t.Errorf("got %q, want %q", got, tt.want)
}
})
}
}
func TestHermesClientHandleToolCallInProgressIgnored(t *testing.T) {
t.Parallel()