multica/server/pkg/agent/hermes_test.go

package agent

import (
	"context"
	"encoding/json"
	"log/slog"
	"path/filepath"
	"strings"
	"sync"
	"testing"
	"time"
)

func TestNewReturnsHermesBackend(t *testing.T) {
	t.Parallel()
	b, err := New("hermes", Config{ExecutablePath: "/nonexistent/hermes"})
	if err != nil {
		t.Fatalf("New(hermes) error: %v", err)
	}
	if _, ok := b.(*hermesBackend); !ok {
		t.Fatalf("expected *hermesBackend, got %T", b)
	}
}

// ── extractACPSessionID ──

func TestExtractACPSessionID(t *testing.T) {
	t.Parallel()
	raw := json.RawMessage(`{"sessionId":"20260410_141145_47260c"}`)
	got := extractACPSessionID(raw)
	if got != "20260410_141145_47260c" {
		t.Errorf("got %q, want %q", got, "20260410_141145_47260c")
	}
}

func TestExtractACPSessionIDEmpty(t *testing.T) {
	t.Parallel()
	raw := json.RawMessage(`{}`)
	got := extractACPSessionID(raw)
	if got != "" {
		t.Errorf("got %q, want empty", got)
	}
}

func TestExtractACPSessionIDInvalidJSON(t *testing.T) {
	t.Parallel()
	raw := json.RawMessage(`not json`)
	got := extractACPSessionID(raw)
	if got != "" {
		t.Errorf("got %q, want empty", got)
	}
}

// ── resolveResumedSessionID ──

func TestResolveResumedSessionIDMatching(t *testing.T) {
	t.Parallel()
	// Server confirms our requested id — happy resume path. No change.
	got, changed := resolveResumedSessionID(
		"ses_alpha",
		json.RawMessage(`{"sessionId":"ses_alpha"}`),
	)
	if got != "ses_alpha" {
		t.Errorf("got %q, want ses_alpha", got)
	}
	if changed {
		t.Errorf("changed: got true, want false")
	}
}

func TestResolveResumedSessionIDDifferent(t *testing.T) {
	t.Parallel()
	// Server returned a different id — local state was lost and the
	// server silently spun up a new session. We trust the server.
	got, changed := resolveResumedSessionID(
		"ses_alpha",
		json.RawMessage(`{"sessionId":"ses_beta_new"}`),
	)
	if got != "ses_beta_new" {
		t.Errorf("got %q, want ses_beta_new", got)
	}
	if !changed {
		t.Errorf("changed: got false, want true")
	}
}

func TestResolveResumedSessionIDEmptyResponse(t *testing.T) {
	t.Parallel()
	// Older / non-conforming server returns no sessionId — defensive
	// fallback to the requested id. This preserves the legacy happy
	// path; a stale id will eventually fail downstream and be retried
	// via the daemon's session-resume fallback (daemon.go).
	for _, body := range []string{
		`{}`,
		`{"sessionId":""}`,
		`not json`,
	} {
		got, changed := resolveResumedSessionID(
			"ses_alpha",
			json.RawMessage(body),
		)
		if got != "ses_alpha" {
			t.Errorf("body=%q: got %q, want ses_alpha", body, got)
		}
		if changed {
			t.Errorf("body=%q: changed: got true, want false", body)
		}
	}
}

// ── buildHermesSessionParams ──

func TestBuildHermesSessionParamsIncludesModel(t *testing.T) {
	t.Parallel()
	params := buildHermesSessionParams("/tmp/work", "gpt-4o")
	if params["cwd"] != "/tmp/work" {
		t.Errorf("cwd: got %v, want /tmp/work", params["cwd"])
	}
	if _, ok := params["mcpServers"]; !ok {
		t.Error("mcpServers missing")
	}
	if got, ok := params["model"].(string); !ok || got != "gpt-4o" {
		t.Errorf("model: got %v, want gpt-4o", params["model"])
	}
}

func TestBuildHermesSessionParamsOmitsEmptyModel(t *testing.T) {
	t.Parallel()
	params := buildHermesSessionParams("/tmp/work", "")
	if _, present := params["model"]; present {
		t.Error("expected model key to be omitted when model is empty")
	}
}

// ── hermesToolNameFromTitle ──

func TestHermesToolNameFromTitle(t *testing.T) {
	t.Parallel()
	tests := []struct {
		title string
		kind  string
		want  string
	}{
		{"terminal: ls -la", "execute", "terminal"},
		{"read: /tmp/foo.go", "read", "read_file"},
		{"write: /tmp/bar.go", "edit", "write_file"},
		{"patch (replace): /tmp/baz.go", "edit", "patch"},
		{"search: *.go", "search", "search_files"},
		{"web search: golang acp protocol", "fetch", "web_search"},
		{"extract: https://example.com", "fetch", "web_extract"},
		{"delegate: fix the bug", "execute", "delegate_task"},
		{"analyze image: what is this?", "read", "vision_analyze"},
		{"execute code", "execute", "execute_code"},
		// Fallback to kind when no colon in title but kind is known.
		{"unknownTool", "read", "read_file"},
		{"unknownTool", "edit", "write_file"},
		{"unknownTool", "execute", "terminal"},
		{"unknownTool", "search", "search_files"},
		{"unknownTool", "fetch", "web_search"},
		{"unknownTool", "think", "thinking"},
		// Bare title (no colon, no known kind) — preserve the title
		// itself rather than falling back to an unclassified kind.
		// Matters for kimi: its ACP `tool_call` updates emit a bare
		// `title: "Shell"` with no `kind`, and we need downstream
		// normalisation (kimiToolNameFromTitle) to see "Shell" rather
		// than an empty string.
		{"Shell", "", "Shell"},
		{"Read file", "", "Read file"},
		{"unknownTool", "other", "unknownTool"},
		// Empty title falls back to kind, even when kind isn't known.
		{"", "other", "other"},
		// Tool with colon but not in known map.
		{"custom_tool: args", "other", "custom_tool"},
	}
	for _, tt := range tests {
		got := hermesToolNameFromTitle(tt.title, tt.kind)
		if got != tt.want {
			t.Errorf("hermesToolNameFromTitle(%q, %q) = %q, want %q", tt.title, tt.kind, got, tt.want)
		}
	}
}

// ── handleLine routing ──

func TestHermesClientHandleLineResponse(t *testing.T) {
	t.Parallel()

	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
	}
	pr := &pendingRPC{ch: make(chan rpcResult, 1), method: "session/new"}
	c.pending[1] = pr

	c.handleLine(`{"jsonrpc":"2.0","id":1,"result":{"sessionId":"ses_abc"}}`)

	res := <-pr.ch
	if res.err != nil {
		t.Fatalf("unexpected error: %v", res.err)
	}
	sid := extractACPSessionID(res.result)
	if sid != "ses_abc" {
		t.Errorf("sessionId: got %q, want %q", sid, "ses_abc")
	}
}

func TestHermesClientHandleLineError(t *testing.T) {
	t.Parallel()

	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
	}
	pr := &pendingRPC{ch: make(chan rpcResult, 1), method: "initialize"}
	c.pending[0] = pr

	c.handleLine(`{"jsonrpc":"2.0","id":0,"error":{"code":-32600,"message":"bad request"}}`)

	res := <-pr.ch
	if res.err == nil {
		t.Fatal("expected error")
	}
	if got := res.err.Error(); got != "initialize: bad request (code=-32600)" {
		t.Errorf("error: got %q", got)
	}
}

// TestHermesClientHandleLineErrorWithData guards #2192-class regressions: when
// an ACP backend returns -32603 (Internal error), the meaningful reason lives
// in the `data` field. Dropping it leaves operators with a bare "Internal
// error" and no way to tell apart "session expired", "model unavailable",
// "auth lost", etc. Kiro CLI 2.2.x emits `data` as a string; some backends use
// objects/arrays — both must round-trip into the wrapped Go error.
func TestHermesClientHandleLineErrorWithStringData(t *testing.T) {
	t.Parallel()

	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
	}
	pr := &pendingRPC{ch: make(chan rpcResult, 1), method: "session/prompt"}
	c.pending[3] = pr

	c.handleLine(`{"jsonrpc":"2.0","id":3,"error":{"code":-32603,"message":"Internal error","data":"No session found with id"}}`)

	res := <-pr.ch
	if res.err == nil {
		t.Fatal("expected error")
	}
	want := "session/prompt: Internal error (code=-32603, data=No session found with id)"
	if got := res.err.Error(); got != want {
		t.Errorf("error: got %q, want %q", got, want)
	}
}

func TestHermesClientHandleLineErrorWithObjectData(t *testing.T) {
	t.Parallel()

	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
	}
	pr := &pendingRPC{ch: make(chan rpcResult, 1), method: "session/prompt"}
	c.pending[5] = pr

	c.handleLine(`{"jsonrpc":"2.0","id":5,"error":{"code":-32000,"message":"quota","data":{"reason":"limit","remaining":0}}}`)

	res := <-pr.ch
	if res.err == nil {
		t.Fatal("expected error")
	}
	want := `session/prompt: quota (code=-32000, data={"reason":"limit","remaining":0})`
	if got := res.err.Error(); got != want {
		t.Errorf("error: got %q, want %q", got, want)
	}
}

func TestHermesClientHandleLineErrorWithNullData(t *testing.T) {
	t.Parallel()

	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
	}
	pr := &pendingRPC{ch: make(chan rpcResult, 1), method: "initialize"}
	c.pending[7] = pr

	c.handleLine(`{"jsonrpc":"2.0","id":7,"error":{"code":-32600,"message":"bad request","data":null}}`)

	res := <-pr.ch
	if res.err == nil {
		t.Fatal("expected error")
	}
	if got := res.err.Error(); got != "initialize: bad request (code=-32600)" {
		t.Errorf("error: got %q", got)
	}
}

// ── agent → client request handling ──

// bufferWriter is a test stand-in for cmd.StdinPipe that captures
// writes in-memory so we can assert what handleAgentRequest emitted.
type bufferWriter struct {
	mu  sync.Mutex
	buf strings.Builder
}

func (b *bufferWriter) Write(p []byte) (int, error) {
	b.mu.Lock()
	defer b.mu.Unlock()
	return b.buf.WriteString(string(p))
}

func (b *bufferWriter) String() string {
	b.mu.Lock()
	defer b.mu.Unlock()
	return b.buf.String()
}

// TestHermesClientAutoApprovesPermissionRequest asserts that when an
// ACP agent sends us `session/request_permission` (kimi does this on
// every Shell / file-mutating tool call), the client replies with
// `approve_for_session` — without this the agent blocks 300s and the
// task hangs. The id in the reply must match the agent's request id
// so its in-flight future resolves.
func TestHermesClientAutoApprovesPermissionRequest(t *testing.T) {
	t.Parallel()

	w := &bufferWriter{}
	c := &hermesClient{
		cfg:     Config{Logger: slog.Default()},
		stdin:   w,
		pending: make(map[int]*pendingRPC),
	}

	c.handleLine(`{"jsonrpc":"2.0","id":42,"method":"session/request_permission","params":{"sessionId":"ses_1","options":[{"optionId":"approve","name":"Approve once","kind":"allow_once"},{"optionId":"approve_for_session","name":"Approve for this session","kind":"allow_always"},{"optionId":"reject","name":"Reject","kind":"reject_once"}],"toolCall":{"toolCallId":"tc_1","title":"Shell","content":[]}}}`)

	got := w.String()
	var resp struct {
		JSONRPC string `json:"jsonrpc"`
		ID      int    `json:"id"`
		Result  struct {
			Outcome struct {
				Outcome  string `json:"outcome"`
				OptionID string `json:"optionId"`
			} `json:"outcome"`
		} `json:"result"`
	}
	if err := json.Unmarshal([]byte(strings.TrimSpace(got)), &resp); err != nil {
		t.Fatalf("reply is not valid JSON: %q err=%v", got, err)
	}
	if resp.JSONRPC != "2.0" {
		t.Errorf("jsonrpc: got %q, want 2.0", resp.JSONRPC)
	}
	if resp.ID != 42 {
		t.Errorf("id: got %d, want 42 (must echo agent's request id)", resp.ID)
	}
	if resp.Result.Outcome.Outcome != "selected" {
		t.Errorf("outcome.outcome: got %q, want %q", resp.Result.Outcome.Outcome, "selected")
	}
	if resp.Result.Outcome.OptionID != "approve_for_session" {
		t.Errorf("outcome.optionId: got %q, want %q", resp.Result.Outcome.OptionID, "approve_for_session")
	}
}

// TestHermesClientReplesMethodNotFoundForUnknownAgentRequest ensures
// that any agent → client request we don't explicitly handle gets a
// proper JSON-RPC error back, not silence. Silence would block the
// agent for however long its internal timeout is, same as the
// session/request_permission hang this change fixes.
func TestHermesClientReplesMethodNotFoundForUnknownAgentRequest(t *testing.T) {
	t.Parallel()

	w := &bufferWriter{}
	c := &hermesClient{
		cfg:     Config{Logger: slog.Default()},
		stdin:   w,
		pending: make(map[int]*pendingRPC),
	}
	c.handleLine(`{"jsonrpc":"2.0","id":7,"method":"fs/read_text_file","params":{"path":"/tmp/x"}}`)

	got := w.String()
	var resp struct {
		ID    int `json:"id"`
		Error struct {
			Code    int    `json:"code"`
			Message string `json:"message"`
		} `json:"error"`
	}
	if err := json.Unmarshal([]byte(strings.TrimSpace(got)), &resp); err != nil {
		t.Fatalf("reply not valid JSON: %q err=%v", got, err)
	}
	if resp.ID != 7 {
		t.Errorf("id echo: got %d, want 7", resp.ID)
	}
	if resp.Error.Code != -32601 {
		t.Errorf("error code: got %d, want -32601 (method not found)", resp.Error.Code)
	}
	if !strings.Contains(resp.Error.Message, "fs/read_text_file") {
		t.Errorf("error message should name the unhandled method, got %q", resp.Error.Message)
	}
}

// ── session/update notification handling ──

func TestHermesClientHandleAgentMessage(t *testing.T) {
	t.Parallel()

	var got Message
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onMessage: func(msg Message) {
			got = msg
		},
	}

	line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"agent_message_chunk","content":{"type":"text","text":"Hello world"}}}}`
	c.handleLine(line)

	if got.Type != MessageText {
		t.Errorf("type: got %v, want MessageText", got.Type)
	}
	if got.Content != "Hello world" {
		t.Errorf("content: got %q, want %q", got.Content, "Hello world")
	}
}

func TestHermesClientHandleSessionNotificationAgentMessage(t *testing.T) {
	t.Parallel()

	var got Message
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onMessage: func(msg Message) {
			got = msg
		},
	}

	line := `{"jsonrpc":"2.0","method":"session/notification","params":{"sessionId":"ses_1","update":{"type":"AgentMessageChunk","content":{"type":"text","text":"Hello from Kiro"}}}}`
	c.handleLine(line)

	if got.Type != MessageText {
		t.Errorf("type: got %v, want MessageText", got.Type)
	}
	if got.Content != "Hello from Kiro" {
		t.Errorf("content: got %q, want %q", got.Content, "Hello from Kiro")
	}
}

// Regression for #1997: Hermes ACP can flush queued session updates from
// the previous turn (history replay on session/resume, or chunks queued
// before our session/prompt response is sent) before the current turn
// actually starts. Until acceptNotification gates them out, those updates
// were appended to output and re-sent to the UI, making the previous
// answer appear duplicated alongside the new one. The Backend wires the
// gate to a streamingCurrentTurn flag set just before session/prompt; here
// we exercise the gate directly on hermesClient.
func TestHermesClientAcceptNotificationGate(t *testing.T) {
	t.Parallel()

	var (
		got    []Message
		accept bool
	)
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		acceptNotification: func(string) bool {
			return accept
		},
		onMessage: func(msg Message) {
			got = append(got, msg)
		},
	}

	replay := `{"jsonrpc":"2.0","method":"session/notification","params":{"sessionId":"ses_1","update":{"type":"AgentMessageChunk","content":{"type":"text","text":"history should be ignored"}}}}`
	c.handleLine(replay)
	if len(got) != 0 {
		t.Fatalf("expected gate to drop replay before turn starts, got %+v", got)
	}

	accept = true
	live := `{"jsonrpc":"2.0","method":"session/notification","params":{"sessionId":"ses_1","update":{"type":"AgentMessageChunk","content":{"type":"text","text":"current"}}}}`
	c.handleLine(live)
	if len(got) != 1 {
		t.Fatalf("expected current-turn update to pass the gate, got %+v", got)
	}
	if got[0].Content != "current" {
		t.Fatalf("got content %q, want \"current\"", got[0].Content)
	}
}

func TestHermesClientHandleAgentThought(t *testing.T) {
	t.Parallel()

	var got Message
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onMessage: func(msg Message) {
			got = msg
		},
	}

	line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"agent_thought_chunk","content":{"type":"text","text":"Let me think..."}}}}`
	c.handleLine(line)

	if got.Type != MessageThinking {
		t.Errorf("type: got %v, want MessageThinking", got.Type)
	}
	if got.Content != "Let me think..." {
		t.Errorf("content: got %q, want %q", got.Content, "Let me think...")
	}
}

func TestHermesClientHandleToolCallStart(t *testing.T) {
	t.Parallel()

	var got Message
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onMessage: func(msg Message) {
			got = msg
		},
	}

	line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call","toolCallId":"tc-abc123","title":"terminal: ls -la","kind":"execute","status":"pending","rawInput":{"command":"ls -la"}}}}`
	c.handleLine(line)

	if got.Type != MessageToolUse {
		t.Errorf("type: got %v, want MessageToolUse", got.Type)
	}
	if got.Tool != "terminal" {
		t.Errorf("tool: got %q, want %q", got.Tool, "terminal")
	}
	if got.CallID != "tc-abc123" {
		t.Errorf("callID: got %q, want %q", got.CallID, "tc-abc123")
	}
	if cmd, ok := got.Input["command"].(string); !ok || cmd != "ls -la" {
		t.Errorf("input.command: got %v", got.Input["command"])
	}
}

func TestHermesClientHandleSessionNotificationToolCall(t *testing.T) {
	t.Parallel()

	var got []Message
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onMessage: func(msg Message) {
			got = append(got, msg)
		},
	}

	c.handleLine(`{"jsonrpc":"2.0","method":"session/notification","params":{"sessionId":"ses_1","update":{"type":"ToolCall","toolCallId":"tc-kiro","name":"Shell","status":"pending","parameters":{"command":"pwd"}}}}`)
	c.handleLine(`{"jsonrpc":"2.0","method":"session/notification","params":{"sessionId":"ses_1","update":{"type":"ToolCallUpdate","toolCallId":"tc-kiro","status":"completed","name":"Shell","output":"/tmp/project\n"}}}`)

	if len(got) != 2 {
		t.Fatalf("expected [ToolUse, ToolResult], got %+v", got)
	}
	if got[0].Type != MessageToolUse {
		t.Errorf("first message: got %v, want MessageToolUse", got[0].Type)
	}
	if got[0].Tool != "Shell" {
		t.Errorf("first tool: got %q, want Shell", got[0].Tool)
	}
	if cmd, _ := got[0].Input["command"].(string); cmd != "pwd" {
		t.Errorf("first input.command: got %v, want pwd", got[0].Input["command"])
	}
	if got[1].Type != MessageToolResult {
		t.Errorf("second message: got %v, want MessageToolResult", got[1].Type)
	}
	if got[1].Output != "/tmp/project\n" {
		t.Errorf("second output: got %q", got[1].Output)
	}
}

func TestHermesClientHandleSessionNotificationTurnEnd(t *testing.T) {
	t.Parallel()

	var got hermesPromptResult
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onPromptDone: func(result hermesPromptResult) {
			got = result
		},
	}

	line := `{"jsonrpc":"2.0","method":"session/notification","params":{"sessionId":"ses_1","update":{"type":"TurnEnd","stopReason":"end_turn","usage":{"inputTokens":3,"outputTokens":4,"cachedReadTokens":1}}}}`
	c.handleLine(line)

	if got.stopReason != "end_turn" {
		t.Errorf("stopReason: got %q, want end_turn", got.stopReason)
	}
	if got.usage.InputTokens != 3 || got.usage.OutputTokens != 4 || got.usage.CacheReadTokens != 1 {
		t.Errorf("usage: got %+v", got.usage)
	}
}

func TestHermesClientHandleToolCallComplete(t *testing.T) {
	t.Parallel()

	var got Message
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onMessage: func(msg Message) {
			got = msg
		},
	}

	line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc-abc123","status":"completed","kind":"execute","rawOutput":"file1.go\nfile2.go\n"}}}`
	c.handleLine(line)

	if got.Type != MessageToolResult {
		t.Errorf("type: got %v, want MessageToolResult", got.Type)
	}
	if got.CallID != "tc-abc123" {
		t.Errorf("callID: got %q, want %q", got.CallID, "tc-abc123")
	}
	if got.Output != "file1.go\nfile2.go\n" {
		t.Errorf("output: got %q", got.Output)
	}
}

// TestHermesClientKimiStreamingToolCall walks the real kimi frame
// sequence for a single Shell call:
//  1. tool_call with empty content (LLM hasn't started emitting args yet)
//  2. tool_call_update status=in_progress carrying the cumulative args
//     JSON character-by-character ("{", "{\"command", …)
//  3. tool_call_update status=completed carrying the command's stdout
//
// The client must defer MessageToolUse until we have the full args so
// the UI doesn't show a command like `{"comma` — and the MessageToolUse
// must carry the parsed args as the Input map (`{"command": "echo hi"}`
// → Input["command"] = "echo hi") rather than a raw string.
func TestHermesClientKimiStreamingToolCall(t *testing.T) {
	t.Parallel()

	var got []Message
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onMessage: func(msg Message) {
			got = append(got, msg)
		},
	}

	// 1. tool_call: empty content (classic kimi start frame).
	c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call","toolCallId":"tc-kimi-1","title":"Shell","status":"in_progress","content":[{"type":"content","content":{"type":"text","text":""}}]}}}`)
	if len(got) != 0 {
		t.Fatalf("expected nothing emitted yet (args empty), got %+v", got)
	}

	// 2. Streaming updates — cumulative args JSON.
	partials := []string{
		`{"`,
		`{"command`,
		`{"command":`,
		`{"command":"echo `,
		`{"command":"echo hi"}`,
	}
	for _, args := range partials {
		// JSON-encode args so embedded quotes are escaped properly.
		argsJSON, _ := json.Marshal(args)
		line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc-kimi-1","status":"in_progress","content":[{"type":"content","content":{"type":"text","text":` + string(argsJSON) + `}}]}}}`
		c.handleLine(line)
	}
	if len(got) != 0 {
		t.Fatalf("expected nothing emitted mid-stream, got %+v", got)
	}

	// 3. Completed — stdout.
	c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc-kimi-1","status":"completed","content":[{"type":"content","content":{"type":"text","text":"hi\n"}}]}}}`)

	if len(got) != 2 {
		t.Fatalf("expected [MessageToolUse, MessageToolResult], got %d: %+v", len(got), got)
	}
	if got[0].Type != MessageToolUse {
		t.Errorf("first message: got %v, want MessageToolUse", got[0].Type)
	}
	if got[0].CallID != "tc-kimi-1" {
		t.Errorf("first.callID: got %q", got[0].CallID)
	}
	if cmd, _ := got[0].Input["command"].(string); cmd != "echo hi" {
		t.Errorf("first.Input.command: got %v, want %q", got[0].Input["command"], "echo hi")
	}
	if got[1].Type != MessageToolResult {
		t.Errorf("second message: got %v, want MessageToolResult", got[1].Type)
	}
	if got[1].Output != "hi\n" {
		t.Errorf("second.output: got %q, want %q", got[1].Output, "hi\n")
	}
}

// TestHermesClientKimiMalformedArgsFallback: if the accumulated args
// aren't valid JSON (streaming glitch, tool with non-JSON args), we
// still surface the text under Input.text rather than silently
// dropping it.
func TestHermesClientKimiMalformedArgsFallback(t *testing.T) {
	t.Parallel()

	var got []Message
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onMessage: func(msg Message) {
			got = append(got, msg)
		},
	}

	c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call","toolCallId":"tc","title":"Shell","status":"in_progress","content":[{"type":"content","content":{"type":"text","text":"not-json"}}]}}}`)
	c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc","status":"completed","content":[{"type":"content","content":{"type":"text","text":"output"}}]}}}`)

	if len(got) < 1 {
		t.Fatalf("expected ToolUse+ToolResult, got %+v", got)
	}
	if text, _ := got[0].Input["text"].(string); text != "not-json" {
		t.Errorf("fallback Input.text: got %v", got[0].Input["text"])
	}
}

// TestHermesClientHandleToolCallCompleteOrphan: if a completion frame
// arrives without a preceding tool_call (out-of-order / missed frame),
// still emit ToolUse synthesised from the update's own title/rawInput
// before ToolResult. Keeps the UI from showing a bare result with no
// header.
func TestHermesClientHandleToolCallCompleteOrphan(t *testing.T) {
	t.Parallel()

	var got []Message
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onMessage: func(msg Message) {
			got = append(got, msg)
		},
	}

	c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc","status":"completed","title":"terminal: ls","kind":"execute","rawInput":{"command":"ls"},"content":[{"type":"content","content":{"type":"text","text":"file.go\n"}}]}}}`)

	if len(got) != 2 || got[0].Type != MessageToolUse || got[1].Type != MessageToolResult {
		t.Fatalf("expected [ToolUse, ToolResult], got %+v", got)
	}
	if got[0].Tool != "terminal" {
		t.Errorf("orphan ToolUse tool: got %q", got[0].Tool)
	}
	if cmd, _ := got[0].Input["command"].(string); cmd != "ls" {
		t.Errorf("orphan ToolUse input.command: got %v", got[0].Input["command"])
	}
	if got[1].Output != "file.go\n" {
		t.Errorf("ToolResult output: got %q", got[1].Output)
	}
}

// TestHermesClientHandleToolCallRawOutputTakesPrecedence keeps hermes
// behaviour unchanged: when the update has both `rawOutput` (hermes
// convention) and `content` (would be ambiguous), honour rawOutput.
func TestHermesClientHandleToolCallRawOutputTakesPrecedence(t *testing.T) {
	t.Parallel()

	var got Message
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onMessage: func(msg Message) {
			got = msg
		},
	}

	line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc","status":"completed","rawOutput":"raw wins","content":[{"type":"content","content":{"type":"text","text":"ignored"}}]}}}`
	c.handleLine(line)

	if got.Output != "raw wins" {
		t.Errorf("output: got %q, want %q", got.Output, "raw wins")
	}
}

func TestExtractACPToolCallText(t *testing.T) {
	t.Parallel()
	tests := []struct {
		name string
		json string
		want string
	}{
		{
			name: "single text block",
			json: `[{"type":"content","content":{"type":"text","text":"hello"}}]`,
			want: "hello",
		},
		{
			name: "multiple text blocks join with newline",
			json: `[{"type":"content","content":{"type":"text","text":"a"}},{"type":"content","content":{"type":"text","text":"b"}}]`,
			want: "a\nb",
		},
		{
			name: "terminal blocks skipped",
			json: `[{"type":"terminal","terminalId":"t1"},{"type":"content","content":{"type":"text","text":"shell out"}}]`,
			want: "shell out",
		},
		{
			name: "diff block renders as mini header",
			json: `[{"type":"diff","path":"foo.go","oldText":"abc","newText":"abcdef"}]`,
			want: "--- foo.go\n+++ foo.go\n(edited: 3 → 6 bytes)",
		},
		{
			name: "new-file diff (no oldText)",
			json: `[{"type":"diff","path":"new.go","oldText":"","newText":"hi"}]`,
			want: "--- new.go\n+++ new.go\n(new file, 2 bytes)",
		},
		{
			name: "empty array returns empty",
			json: `[]`,
			want: "",
		},
		{
			name: "no text content",
			json: `[{"type":"terminal","terminalId":"t1"}]`,
			want: "",
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			var blocks []json.RawMessage
			if err := json.Unmarshal([]byte(tt.json), &blocks); err != nil {
				t.Fatalf("unmarshal: %v", err)
			}
			if got := extractACPToolCallText(blocks); got != tt.want {
				t.Errorf("got %q, want %q", got, tt.want)
			}
		})
	}
}

func TestHermesClientHandleToolCallInProgressIgnored(t *testing.T) {
	t.Parallel()

	called := false
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onMessage: func(msg Message) {
			called = true
		},
	}

	line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc-abc123","status":"in_progress"}}}`
	c.handleLine(line)

	if called {
		t.Error("expected in_progress tool_call_update to be ignored")
	}
}

func TestHermesClientHandleUsageUpdate(t *testing.T) {
	t.Parallel()

	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
	}

	line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"usage_update","usage":{"inputTokens":500,"outputTokens":200,"cachedReadTokens":100}}}}`
	c.handleLine(line)

	c.usageMu.Lock()
	defer c.usageMu.Unlock()

	if c.usage.InputTokens != 500 {
		t.Errorf("inputTokens: got %d, want 500", c.usage.InputTokens)
	}
	if c.usage.OutputTokens != 200 {
		t.Errorf("outputTokens: got %d, want 200", c.usage.OutputTokens)
	}
	if c.usage.CacheReadTokens != 100 {
		t.Errorf("cacheReadTokens: got %d, want 100", c.usage.CacheReadTokens)
	}
}

func TestHermesClientHandleUsageUpdateCumulative(t *testing.T) {
	t.Parallel()

	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
	}

	// First usage update.
	c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"usage_update","usage":{"inputTokens":100,"outputTokens":50}}}}`)

	// Second usage update with higher values (should take the max).
	c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"usage_update","usage":{"inputTokens":300,"outputTokens":120}}}}`)

	c.usageMu.Lock()
	defer c.usageMu.Unlock()

	if c.usage.InputTokens != 300 {
		t.Errorf("inputTokens: got %d, want 300", c.usage.InputTokens)
	}
	if c.usage.OutputTokens != 120 {
		t.Errorf("outputTokens: got %d, want 120", c.usage.OutputTokens)
	}
}

// ── extractPromptResult ──

func TestHermesClientExtractPromptResult(t *testing.T) {
	t.Parallel()

	var got hermesPromptResult
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onPromptDone: func(result hermesPromptResult) {
			got = result
		},
	}

	data := json.RawMessage(`{"stopReason":"end_turn","usage":{"inputTokens":1000,"outputTokens":200,"cachedReadTokens":50}}`)
	c.extractPromptResult(data)

	if got.stopReason != "end_turn" {
		t.Errorf("stopReason: got %q, want %q", got.stopReason, "end_turn")
	}
	if got.usage.InputTokens != 1000 {
		t.Errorf("inputTokens: got %d, want 1000", got.usage.InputTokens)
	}
	if got.usage.OutputTokens != 200 {
		t.Errorf("outputTokens: got %d, want 200", got.usage.OutputTokens)
	}
	if got.usage.CacheReadTokens != 50 {
		t.Errorf("cacheReadTokens: got %d, want 50", got.usage.CacheReadTokens)
	}
}

func TestHermesClientExtractPromptResultNoUsage(t *testing.T) {
	t.Parallel()

	var got hermesPromptResult
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onPromptDone: func(result hermesPromptResult) {
			got = result
		},
	}

	data := json.RawMessage(`{"stopReason":"cancelled"}`)
	c.extractPromptResult(data)

	if got.stopReason != "cancelled" {
		t.Errorf("stopReason: got %q, want %q", got.stopReason, "cancelled")
	}
	if got.usage.InputTokens != 0 {
		t.Errorf("inputTokens: got %d, want 0", got.usage.InputTokens)
	}
}

func TestHermesClientIgnoresUnknownNotification(t *testing.T) {
	t.Parallel()

	called := false
	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
		onMessage: func(msg Message) {
			called = true
		},
	}

	// Unknown method should be silently ignored.
	c.handleLine(`{"jsonrpc":"2.0","method":"unknown/event","params":{}}`)

	if called {
		t.Error("expected unknown notification to be ignored")
	}
}

func TestHermesClientIgnoresInvalidJSON(t *testing.T) {
	t.Parallel()

	c := &hermesClient{
		pending: make(map[int]*pendingRPC),
	}

	// Should not panic.
	c.handleLine("not json at all")
	c.handleLine("")
	c.handleLine("{}")
}

func TestHermesProviderErrorSniffer(t *testing.T) {
	t.Parallel()

	// Real sample of the stderr hermes emits when the configured
	// LLM endpoint rejects the requested model. We verify the
	// sniffer extracts the `Error: ...` line so the task error
	// tells the user *why* it failed.
	s := newACPProviderErrorSniffer("hermes")
	lines := []string{
		"2026-04-20 23:41:47 [INFO] acp_adapter.server: Prompt on session abc",
		`⚠️  API call failed (attempt 1/3): BadRequestError [HTTP 400]`,
		`   🔌 Provider: openai-codex  Model: gpt-5.1-codex-mini`,
		`   📝 Error: HTTP 400: Error code: 400 - {'detail': "The 'gpt-5.1-codex-mini' model is not supported when using Codex with a ChatGPT account."}`,
		`⏱️  Elapsed: 1.17s`,
	}
	for _, line := range lines {
		if _, err := s.Write([]byte(line + "\n")); err != nil {
			t.Fatalf("Write: %v", err)
		}
	}
	msg := s.message()
	if msg == "" {
		t.Fatal("expected a non-empty error message")
	}
	if !strings.Contains(msg, "model is not supported") {
		t.Errorf("expected detail about model support, got %q", msg)
	}
}

func TestHermesProviderErrorSnifferIgnoresInfoLines(t *testing.T) {
	t.Parallel()

	s := newACPProviderErrorSniffer("hermes")
	s.Write([]byte("2026-04-20 23:41:45 [INFO] acp_adapter.entry: Loaded env\n"))
	s.Write([]byte("2026-04-20 23:41:47 [INFO] agent.auxiliary_client: Vision auto-detect...\n"))
	if msg := s.message(); msg != "" {
		t.Errorf("info lines should produce no error, got %q", msg)
	}
}

func TestHermesProviderErrorSnifferHandlesPartialLines(t *testing.T) {
	t.Parallel()

	// Writer may be called mid-line; the sniffer must buffer until
	// it sees a newline so the regex doesn't miss the header.
	s := newACPProviderErrorSniffer("hermes")
	s.Write([]byte(`⚠️  API call failed (attempt 1/3):`))
	s.Write([]byte(` BadRequestError [HTTP 400]` + "\n"))
	s.Write([]byte(`   📝 Error: something went wrong` + "\n"))
	msg := s.message()
	if !strings.Contains(msg, "something went wrong") {
		t.Errorf("expected buffered line to be captured, got %q", msg)
	}
}

func TestHermesProviderErrorSnifferBoundedBuffer(t *testing.T) {
	t.Parallel()

	s := newACPProviderErrorSniffer("hermes")
	for i := 0; i < 20; i++ {
		// Each line differs so dedup doesn't merge them.
		s.Write([]byte(`⚠️  API call failed (HTTP 400) attempt ` + string(rune('a'+i%26)) + `: Non-retryable error` + "\n"))
	}
	if len(s.lines) > acpMaxErrorLines {
		t.Errorf("sniffer kept %d lines, limit is %d", len(s.lines), acpMaxErrorLines)
	}
}

// fakeHermesACPRateLimitScript impersonates hermes for the GitHub
// multica#1952 scenario: the upstream LLM returns HTTP 429 (rate
// limited / no credit), hermes retries internally and ultimately
// emits both a sniffable stderr error block AND a synthetic agent
// text turn ("API call failed after 3 retries..."), then completes
// session/prompt with stopReason=end_turn (NOT an RPC error). The
// daemon must still treat this as a failed run, not a successful
// one — which means the hermes backend has to promote the status
// to "failed" even though `output` is non-empty.
func fakeHermesACPRateLimitScript() string {
	return `#!/bin/sh
while IFS= read -r line; do
  id=$(printf '%s' "$line" | sed -n 's/.*"id":\([0-9]*\).*/\1/p')
  case "$line" in
    *'"method":"initialize"'*)
      printf '{"jsonrpc":"2.0","id":%s,"result":{"protocolVersion":1,"agentCapabilities":{}}}\n' "$id"
      ;;
    *'"method":"session/new"'*)
      printf '{"jsonrpc":"2.0","id":%s,"result":{"sessionId":"ses_429"}}\n' "$id"
      ;;
    *'"method":"session/prompt"'*)
      # Mimic hermes' real-world stderr block on a 429.
      printf '%s\n' '⚠️  API call failed (attempt 3/3): RateLimitError [HTTP 429]' >&2
      printf '%s\n' '   📝 Error: HTTP 429: The usage limit has been reached' >&2
      # Mimic hermes injecting the failure as a synthetic agent turn so
      # the chat shows *something*; this puts text in output and used to
      # mask the failure from the daemon.
      printf '{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_429","update":{"sessionUpdate":"agent_message_chunk","content":{"type":"text","text":"API call failed after 3 retries: HTTP 429: The usage limit has been reached"}}}}\n'
      printf '{"jsonrpc":"2.0","id":%s,"result":{"stopReason":"end_turn"}}\n' "$id"
      exit 0
      ;;
  esac
done
`
}

// TestHermesProviderErrorSnifferTerminalVsTransient verifies the
// sniffer reports terminalMessage()=="" for a per-attempt warning
// that did NOT escalate to an exhausted/non-retryable failure, but
// still returns the same string from message() so callers wanting
// diagnostic text can use it. This is what prevents the
// promote-on-any-sniff false positive (a transient `attempt 1/3`
// followed by a successful retry must stay "completed").
func TestHermesProviderErrorSnifferTerminalVsTransient(t *testing.T) {
	t.Parallel()

	// Transient: the sniffer DID see something matching acpErrorHeaderRe
	// (so `message()` is non-empty for diagnostic purposes), but the
	// signal is just "attempt 1/3 against a retryable rate limit" — no
	// terminal markers at all.
	s := newACPProviderErrorSniffer("hermes")
	s.Write([]byte("⚠️  API call failed (attempt 1/3): retryable upstream blip\n"))
	if msg := s.message(); msg == "" {
		t.Fatalf("sniffer should still capture transient warnings for diagnostics")
	}
	if msg := s.terminalMessage(); msg != "" {
		t.Fatalf("transient attempt should NOT be a terminal failure, got %q", msg)
	}

	// Now feed a follow-on terminal marker. terminalMessage must turn on.
	s.Write([]byte("❌  API call failed after 3 retries: usage limit reached\n"))
	if msg := s.terminalMessage(); msg == "" {
		t.Fatalf("after-N-retries / ❌ should switch terminalMessage on")
	}
}

// TestHermesProviderErrorSnifferTerminalNonRetryable verifies that a
// non-retryable error (BadRequest / Authentication / Non-retryable)
// is treated as terminal even on attempt 1/3 — those errors don't
// retry, so the very first failure is the final disposition. Also
// covers ❌ / [ERROR] / "after N retries" markers that adapters
// emit on give-up.
func TestHermesProviderErrorSnifferTerminalNonRetryable(t *testing.T) {
	t.Parallel()

	for _, line := range []string{
		`⚠️  API call failed (attempt 1/3): BadRequestError [HTTP 400]`,
		`⚠️  API call failed (attempt 1/3): AuthenticationError [HTTP 401]`,
		`⚠️  API call failed (HTTP 400) attempt a: Non-retryable error`,
		`❌ API call failed after 3 retries: RateLimitError [HTTP 429]`,
		`[ERROR] API call failed: upstream returned HTTP 500`,
	} {
		s := newACPProviderErrorSniffer("hermes")
		s.Write([]byte(line + "\n"))
		if msg := s.terminalMessage(); msg == "" {
			t.Errorf("expected %q to be classified as terminal", line)
		}
	}
}

// TestHermesBackendPromotesProviderErrorWithNonEmptyOutput pins the
// fix for GitHub multica#1952: a hermes run that hits a 429 (or any
// upstream provider error) must surface as Status=failed even though
// hermes' synthetic "API call failed..." agent turn means the output
// buffer is non-empty. Before the fix the sniffer-promotion was
// gated on `finalOutput == ""`, so the run silently completed.
func TestHermesBackendPromotesProviderErrorWithNonEmptyOutput(t *testing.T) {
	t.Parallel()

	fakePath := filepath.Join(t.TempDir(), "hermes")
	writeTestExecutable(t, fakePath, []byte(fakeHermesACPRateLimitScript()))

	backend, err := New("hermes", Config{ExecutablePath: fakePath, Logger: slog.Default()})
	if err != nil {
		t.Fatalf("new hermes backend: %v", err)
	}

	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
	defer cancel()

	session, err := backend.Execute(ctx, "prompt-ignored", ExecOptions{
		Timeout: 5 * time.Second,
	})
	if err != nil {
		t.Fatalf("execute: %v", err)
	}
	go func() {
		for range session.Messages {
		}
	}()

	select {
	case result, ok := <-session.Result:
		if !ok {
			t.Fatal("result channel closed without a value")
		}
		if result.Status != "failed" {
			t.Fatalf("expected status=failed (sniffer should promote on 429 even with non-empty output), got %q (error=%q output=%q)", result.Status, result.Error, result.Output)
		}
		if !strings.Contains(result.Error, "429") && !strings.Contains(result.Error, "usage limit") {
			t.Errorf("expected error to surface the 429 / usage-limit message, got %q", result.Error)
		}
		if result.SessionID != "ses_429" {
			t.Errorf("expected session id to be preserved on failure, got %q", result.SessionID)
		}
	case <-time.After(10 * time.Second):
		t.Fatal("timeout waiting for result")
	}
}

// fakeHermesACPTransientRetryScript emits a single retryable per-
// attempt warning to stderr and then completes with a normal agent
// text turn — the situation where the upstream LLM blipped on
// attempt 1/3 but a subsequent attempt succeeded and produced a
// real answer. The previous (too-broad) promotion logic would have
// flipped this to status=failed; the fix must keep it as completed.
func fakeHermesACPTransientRetryScript() string {
	return `#!/bin/sh
while IFS= read -r line; do
  id=$(printf '%s' "$line" | sed -n 's/.*"id":\([0-9]*\).*/\1/p')
  case "$line" in
    *'"method":"initialize"'*)
      printf '{"jsonrpc":"2.0","id":%s,"result":{"protocolVersion":1,"agentCapabilities":{}}}\n' "$id"
      ;;
    *'"method":"session/new"'*)
      printf '{"jsonrpc":"2.0","id":%s,"result":{"sessionId":"ses_ok"}}\n' "$id"
      ;;
    *'"method":"session/prompt"'*)
      # Per-attempt rate-limit warning that hermes routinely logs on
      # transient blips — the request DOES retry and succeed below.
      printf '%s\n' '⚠️  API call failed (attempt 1/3): RateLimitError [HTTP 429]' >&2
      # Real agent answer streamed back as a normal text turn.
      printf '{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_ok","update":{"sessionUpdate":"agent_message_chunk","content":{"type":"text","text":"Here is the answer you asked for."}}}}\n'
      printf '{"jsonrpc":"2.0","id":%s,"result":{"stopReason":"end_turn"}}\n' "$id"
      exit 0
      ;;
  esac
done
`
}

// TestHermesBackendDoesNotPromoteOnTransientRetry pins the
// regression GPT-Boy flagged on the multica#1952 fix: a per-attempt
// ⚠️ warning on stderr that does NOT include any terminal marker
// ("after N retries", Non-retryable, ❌, [ERROR], BadRequest /
// Authentication errors) and is followed by a successful agent
// turn must stay status=completed. The previous "any sniffer line
// → fail" rule would have wrongly marked this run as failed.
func TestHermesBackendDoesNotPromoteOnTransientRetry(t *testing.T) {
	t.Parallel()

	fakePath := filepath.Join(t.TempDir(), "hermes")
	writeTestExecutable(t, fakePath, []byte(fakeHermesACPTransientRetryScript()))

	backend, err := New("hermes", Config{ExecutablePath: fakePath, Logger: slog.Default()})
	if err != nil {
		t.Fatalf("new hermes backend: %v", err)
	}

	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
	defer cancel()

	session, err := backend.Execute(ctx, "prompt-ignored", ExecOptions{
		Timeout: 5 * time.Second,
	})
	if err != nil {
		t.Fatalf("execute: %v", err)
	}
	go func() {
		for range session.Messages {
		}
	}()

	select {
	case result, ok := <-session.Result:
		if !ok {
			t.Fatal("result channel closed without a value")
		}
		if result.Status != "completed" {
			t.Fatalf("transient retry that ultimately succeeded must stay status=completed, got %q (error=%q output=%q)", result.Status, result.Error, result.Output)
		}
		if !strings.Contains(result.Output, "Here is the answer") {
			t.Errorf("expected the successful agent turn to be in output, got %q", result.Output)
		}
	case <-time.After(10 * time.Second):
		t.Fatal("timeout waiting for result")
	}
}