mirror of
https://github.com/multica-ai/multica.git
synced 2026-07-05 13:29:44 +02:00
ACP backends (Kiro, Hermes, Kimi) put the actionable reason for code=-32603 'Internal error' in the JSON-RPC `data` field, e.g. "No session found with id". The wrapped Go error only carried `code` and `message`, leaving operators staring at a bare "kiro session/prompt failed: session/prompt: Internal error (code=-32603)" with no way to tell apart session expiry, model unavailability, lost auth, or quota. Parse `data` too. Strings render unquoted; objects/arrays render as raw JSON; null/missing keeps the previous format unchanged. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1260 lines
42 KiB
Go
1260 lines
42 KiB
Go
package agent
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"log/slog"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
func TestNewReturnsHermesBackend(t *testing.T) {
|
|
t.Parallel()
|
|
b, err := New("hermes", Config{ExecutablePath: "/nonexistent/hermes"})
|
|
if err != nil {
|
|
t.Fatalf("New(hermes) error: %v", err)
|
|
}
|
|
if _, ok := b.(*hermesBackend); !ok {
|
|
t.Fatalf("expected *hermesBackend, got %T", b)
|
|
}
|
|
}
|
|
|
|
// ── extractACPSessionID ──
|
|
|
|
func TestExtractACPSessionID(t *testing.T) {
|
|
t.Parallel()
|
|
raw := json.RawMessage(`{"sessionId":"20260410_141145_47260c"}`)
|
|
got := extractACPSessionID(raw)
|
|
if got != "20260410_141145_47260c" {
|
|
t.Errorf("got %q, want %q", got, "20260410_141145_47260c")
|
|
}
|
|
}
|
|
|
|
func TestExtractACPSessionIDEmpty(t *testing.T) {
|
|
t.Parallel()
|
|
raw := json.RawMessage(`{}`)
|
|
got := extractACPSessionID(raw)
|
|
if got != "" {
|
|
t.Errorf("got %q, want empty", got)
|
|
}
|
|
}
|
|
|
|
func TestExtractACPSessionIDInvalidJSON(t *testing.T) {
|
|
t.Parallel()
|
|
raw := json.RawMessage(`not json`)
|
|
got := extractACPSessionID(raw)
|
|
if got != "" {
|
|
t.Errorf("got %q, want empty", got)
|
|
}
|
|
}
|
|
|
|
// ── resolveResumedSessionID ──
|
|
|
|
func TestResolveResumedSessionIDMatching(t *testing.T) {
|
|
t.Parallel()
|
|
// Server confirms our requested id — happy resume path. No change.
|
|
got, changed := resolveResumedSessionID(
|
|
"ses_alpha",
|
|
json.RawMessage(`{"sessionId":"ses_alpha"}`),
|
|
)
|
|
if got != "ses_alpha" {
|
|
t.Errorf("got %q, want ses_alpha", got)
|
|
}
|
|
if changed {
|
|
t.Errorf("changed: got true, want false")
|
|
}
|
|
}
|
|
|
|
func TestResolveResumedSessionIDDifferent(t *testing.T) {
|
|
t.Parallel()
|
|
// Server returned a different id — local state was lost and the
|
|
// server silently spun up a new session. We trust the server.
|
|
got, changed := resolveResumedSessionID(
|
|
"ses_alpha",
|
|
json.RawMessage(`{"sessionId":"ses_beta_new"}`),
|
|
)
|
|
if got != "ses_beta_new" {
|
|
t.Errorf("got %q, want ses_beta_new", got)
|
|
}
|
|
if !changed {
|
|
t.Errorf("changed: got false, want true")
|
|
}
|
|
}
|
|
|
|
func TestResolveResumedSessionIDEmptyResponse(t *testing.T) {
|
|
t.Parallel()
|
|
// Older / non-conforming server returns no sessionId — defensive
|
|
// fallback to the requested id. This preserves the legacy happy
|
|
// path; a stale id will eventually fail downstream and be retried
|
|
// via the daemon's session-resume fallback (daemon.go).
|
|
for _, body := range []string{
|
|
`{}`,
|
|
`{"sessionId":""}`,
|
|
`not json`,
|
|
} {
|
|
got, changed := resolveResumedSessionID(
|
|
"ses_alpha",
|
|
json.RawMessage(body),
|
|
)
|
|
if got != "ses_alpha" {
|
|
t.Errorf("body=%q: got %q, want ses_alpha", body, got)
|
|
}
|
|
if changed {
|
|
t.Errorf("body=%q: changed: got true, want false", body)
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── buildHermesSessionParams ──
|
|
|
|
func TestBuildHermesSessionParamsIncludesModel(t *testing.T) {
|
|
t.Parallel()
|
|
params := buildHermesSessionParams("/tmp/work", "gpt-4o")
|
|
if params["cwd"] != "/tmp/work" {
|
|
t.Errorf("cwd: got %v, want /tmp/work", params["cwd"])
|
|
}
|
|
if _, ok := params["mcpServers"]; !ok {
|
|
t.Error("mcpServers missing")
|
|
}
|
|
if got, ok := params["model"].(string); !ok || got != "gpt-4o" {
|
|
t.Errorf("model: got %v, want gpt-4o", params["model"])
|
|
}
|
|
}
|
|
|
|
func TestBuildHermesSessionParamsOmitsEmptyModel(t *testing.T) {
|
|
t.Parallel()
|
|
params := buildHermesSessionParams("/tmp/work", "")
|
|
if _, present := params["model"]; present {
|
|
t.Error("expected model key to be omitted when model is empty")
|
|
}
|
|
}
|
|
|
|
// ── hermesToolNameFromTitle ──
|
|
|
|
func TestHermesToolNameFromTitle(t *testing.T) {
|
|
t.Parallel()
|
|
tests := []struct {
|
|
title string
|
|
kind string
|
|
want string
|
|
}{
|
|
{"terminal: ls -la", "execute", "terminal"},
|
|
{"read: /tmp/foo.go", "read", "read_file"},
|
|
{"write: /tmp/bar.go", "edit", "write_file"},
|
|
{"patch (replace): /tmp/baz.go", "edit", "patch"},
|
|
{"search: *.go", "search", "search_files"},
|
|
{"web search: golang acp protocol", "fetch", "web_search"},
|
|
{"extract: https://example.com", "fetch", "web_extract"},
|
|
{"delegate: fix the bug", "execute", "delegate_task"},
|
|
{"analyze image: what is this?", "read", "vision_analyze"},
|
|
{"execute code", "execute", "execute_code"},
|
|
// Fallback to kind when no colon in title but kind is known.
|
|
{"unknownTool", "read", "read_file"},
|
|
{"unknownTool", "edit", "write_file"},
|
|
{"unknownTool", "execute", "terminal"},
|
|
{"unknownTool", "search", "search_files"},
|
|
{"unknownTool", "fetch", "web_search"},
|
|
{"unknownTool", "think", "thinking"},
|
|
// Bare title (no colon, no known kind) — preserve the title
|
|
// itself rather than falling back to an unclassified kind.
|
|
// Matters for kimi: its ACP `tool_call` updates emit a bare
|
|
// `title: "Shell"` with no `kind`, and we need downstream
|
|
// normalisation (kimiToolNameFromTitle) to see "Shell" rather
|
|
// than an empty string.
|
|
{"Shell", "", "Shell"},
|
|
{"Read file", "", "Read file"},
|
|
{"unknownTool", "other", "unknownTool"},
|
|
// Empty title falls back to kind, even when kind isn't known.
|
|
{"", "other", "other"},
|
|
// Tool with colon but not in known map.
|
|
{"custom_tool: args", "other", "custom_tool"},
|
|
}
|
|
for _, tt := range tests {
|
|
got := hermesToolNameFromTitle(tt.title, tt.kind)
|
|
if got != tt.want {
|
|
t.Errorf("hermesToolNameFromTitle(%q, %q) = %q, want %q", tt.title, tt.kind, got, tt.want)
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── handleLine routing ──
|
|
|
|
func TestHermesClientHandleLineResponse(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
}
|
|
pr := &pendingRPC{ch: make(chan rpcResult, 1), method: "session/new"}
|
|
c.pending[1] = pr
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","id":1,"result":{"sessionId":"ses_abc"}}`)
|
|
|
|
res := <-pr.ch
|
|
if res.err != nil {
|
|
t.Fatalf("unexpected error: %v", res.err)
|
|
}
|
|
sid := extractACPSessionID(res.result)
|
|
if sid != "ses_abc" {
|
|
t.Errorf("sessionId: got %q, want %q", sid, "ses_abc")
|
|
}
|
|
}
|
|
|
|
func TestHermesClientHandleLineError(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
}
|
|
pr := &pendingRPC{ch: make(chan rpcResult, 1), method: "initialize"}
|
|
c.pending[0] = pr
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","id":0,"error":{"code":-32600,"message":"bad request"}}`)
|
|
|
|
res := <-pr.ch
|
|
if res.err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if got := res.err.Error(); got != "initialize: bad request (code=-32600)" {
|
|
t.Errorf("error: got %q", got)
|
|
}
|
|
}
|
|
|
|
// TestHermesClientHandleLineErrorWithData guards #2192-class regressions: when
|
|
// an ACP backend returns -32603 (Internal error), the meaningful reason lives
|
|
// in the `data` field. Dropping it leaves operators with a bare "Internal
|
|
// error" and no way to tell apart "session expired", "model unavailable",
|
|
// "auth lost", etc. Kiro CLI 2.2.x emits `data` as a string; some backends use
|
|
// objects/arrays — both must round-trip into the wrapped Go error.
|
|
func TestHermesClientHandleLineErrorWithStringData(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
}
|
|
pr := &pendingRPC{ch: make(chan rpcResult, 1), method: "session/prompt"}
|
|
c.pending[3] = pr
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","id":3,"error":{"code":-32603,"message":"Internal error","data":"No session found with id"}}`)
|
|
|
|
res := <-pr.ch
|
|
if res.err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
want := "session/prompt: Internal error (code=-32603, data=No session found with id)"
|
|
if got := res.err.Error(); got != want {
|
|
t.Errorf("error: got %q, want %q", got, want)
|
|
}
|
|
}
|
|
|
|
func TestHermesClientHandleLineErrorWithObjectData(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
}
|
|
pr := &pendingRPC{ch: make(chan rpcResult, 1), method: "session/prompt"}
|
|
c.pending[5] = pr
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","id":5,"error":{"code":-32000,"message":"quota","data":{"reason":"limit","remaining":0}}}`)
|
|
|
|
res := <-pr.ch
|
|
if res.err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
want := `session/prompt: quota (code=-32000, data={"reason":"limit","remaining":0})`
|
|
if got := res.err.Error(); got != want {
|
|
t.Errorf("error: got %q, want %q", got, want)
|
|
}
|
|
}
|
|
|
|
func TestHermesClientHandleLineErrorWithNullData(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
}
|
|
pr := &pendingRPC{ch: make(chan rpcResult, 1), method: "initialize"}
|
|
c.pending[7] = pr
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","id":7,"error":{"code":-32600,"message":"bad request","data":null}}`)
|
|
|
|
res := <-pr.ch
|
|
if res.err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if got := res.err.Error(); got != "initialize: bad request (code=-32600)" {
|
|
t.Errorf("error: got %q", got)
|
|
}
|
|
}
|
|
|
|
// ── agent → client request handling ──
|
|
|
|
// bufferWriter is a test stand-in for cmd.StdinPipe that captures
|
|
// writes in-memory so we can assert what handleAgentRequest emitted.
|
|
type bufferWriter struct {
|
|
mu sync.Mutex
|
|
buf strings.Builder
|
|
}
|
|
|
|
func (b *bufferWriter) Write(p []byte) (int, error) {
|
|
b.mu.Lock()
|
|
defer b.mu.Unlock()
|
|
return b.buf.WriteString(string(p))
|
|
}
|
|
|
|
func (b *bufferWriter) String() string {
|
|
b.mu.Lock()
|
|
defer b.mu.Unlock()
|
|
return b.buf.String()
|
|
}
|
|
|
|
// TestHermesClientAutoApprovesPermissionRequest asserts that when an
|
|
// ACP agent sends us `session/request_permission` (kimi does this on
|
|
// every Shell / file-mutating tool call), the client replies with
|
|
// `approve_for_session` — without this the agent blocks 300s and the
|
|
// task hangs. The id in the reply must match the agent's request id
|
|
// so its in-flight future resolves.
|
|
func TestHermesClientAutoApprovesPermissionRequest(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
w := &bufferWriter{}
|
|
c := &hermesClient{
|
|
cfg: Config{Logger: slog.Default()},
|
|
stdin: w,
|
|
pending: make(map[int]*pendingRPC),
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","id":42,"method":"session/request_permission","params":{"sessionId":"ses_1","options":[{"optionId":"approve","name":"Approve once","kind":"allow_once"},{"optionId":"approve_for_session","name":"Approve for this session","kind":"allow_always"},{"optionId":"reject","name":"Reject","kind":"reject_once"}],"toolCall":{"toolCallId":"tc_1","title":"Shell","content":[]}}}`)
|
|
|
|
got := w.String()
|
|
var resp struct {
|
|
JSONRPC string `json:"jsonrpc"`
|
|
ID int `json:"id"`
|
|
Result struct {
|
|
Outcome struct {
|
|
Outcome string `json:"outcome"`
|
|
OptionID string `json:"optionId"`
|
|
} `json:"outcome"`
|
|
} `json:"result"`
|
|
}
|
|
if err := json.Unmarshal([]byte(strings.TrimSpace(got)), &resp); err != nil {
|
|
t.Fatalf("reply is not valid JSON: %q err=%v", got, err)
|
|
}
|
|
if resp.JSONRPC != "2.0" {
|
|
t.Errorf("jsonrpc: got %q, want 2.0", resp.JSONRPC)
|
|
}
|
|
if resp.ID != 42 {
|
|
t.Errorf("id: got %d, want 42 (must echo agent's request id)", resp.ID)
|
|
}
|
|
if resp.Result.Outcome.Outcome != "selected" {
|
|
t.Errorf("outcome.outcome: got %q, want %q", resp.Result.Outcome.Outcome, "selected")
|
|
}
|
|
if resp.Result.Outcome.OptionID != "approve_for_session" {
|
|
t.Errorf("outcome.optionId: got %q, want %q", resp.Result.Outcome.OptionID, "approve_for_session")
|
|
}
|
|
}
|
|
|
|
// TestHermesClientReplesMethodNotFoundForUnknownAgentRequest ensures
|
|
// that any agent → client request we don't explicitly handle gets a
|
|
// proper JSON-RPC error back, not silence. Silence would block the
|
|
// agent for however long its internal timeout is, same as the
|
|
// session/request_permission hang this change fixes.
|
|
func TestHermesClientReplesMethodNotFoundForUnknownAgentRequest(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
w := &bufferWriter{}
|
|
c := &hermesClient{
|
|
cfg: Config{Logger: slog.Default()},
|
|
stdin: w,
|
|
pending: make(map[int]*pendingRPC),
|
|
}
|
|
c.handleLine(`{"jsonrpc":"2.0","id":7,"method":"fs/read_text_file","params":{"path":"/tmp/x"}}`)
|
|
|
|
got := w.String()
|
|
var resp struct {
|
|
ID int `json:"id"`
|
|
Error struct {
|
|
Code int `json:"code"`
|
|
Message string `json:"message"`
|
|
} `json:"error"`
|
|
}
|
|
if err := json.Unmarshal([]byte(strings.TrimSpace(got)), &resp); err != nil {
|
|
t.Fatalf("reply not valid JSON: %q err=%v", got, err)
|
|
}
|
|
if resp.ID != 7 {
|
|
t.Errorf("id echo: got %d, want 7", resp.ID)
|
|
}
|
|
if resp.Error.Code != -32601 {
|
|
t.Errorf("error code: got %d, want -32601 (method not found)", resp.Error.Code)
|
|
}
|
|
if !strings.Contains(resp.Error.Message, "fs/read_text_file") {
|
|
t.Errorf("error message should name the unhandled method, got %q", resp.Error.Message)
|
|
}
|
|
}
|
|
|
|
// ── session/update notification handling ──
|
|
|
|
func TestHermesClientHandleAgentMessage(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got Message
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
got = msg
|
|
},
|
|
}
|
|
|
|
line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"agent_message_chunk","content":{"type":"text","text":"Hello world"}}}}`
|
|
c.handleLine(line)
|
|
|
|
if got.Type != MessageText {
|
|
t.Errorf("type: got %v, want MessageText", got.Type)
|
|
}
|
|
if got.Content != "Hello world" {
|
|
t.Errorf("content: got %q, want %q", got.Content, "Hello world")
|
|
}
|
|
}
|
|
|
|
func TestHermesClientHandleSessionNotificationAgentMessage(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got Message
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
got = msg
|
|
},
|
|
}
|
|
|
|
line := `{"jsonrpc":"2.0","method":"session/notification","params":{"sessionId":"ses_1","update":{"type":"AgentMessageChunk","content":{"type":"text","text":"Hello from Kiro"}}}}`
|
|
c.handleLine(line)
|
|
|
|
if got.Type != MessageText {
|
|
t.Errorf("type: got %v, want MessageText", got.Type)
|
|
}
|
|
if got.Content != "Hello from Kiro" {
|
|
t.Errorf("content: got %q, want %q", got.Content, "Hello from Kiro")
|
|
}
|
|
}
|
|
|
|
// Regression for #1997: Hermes ACP can flush queued session updates from
|
|
// the previous turn (history replay on session/resume, or chunks queued
|
|
// before our session/prompt response is sent) before the current turn
|
|
// actually starts. Until acceptNotification gates them out, those updates
|
|
// were appended to output and re-sent to the UI, making the previous
|
|
// answer appear duplicated alongside the new one. The Backend wires the
|
|
// gate to a streamingCurrentTurn flag set just before session/prompt; here
|
|
// we exercise the gate directly on hermesClient.
|
|
func TestHermesClientAcceptNotificationGate(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var (
|
|
got []Message
|
|
accept bool
|
|
)
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
acceptNotification: func(string) bool {
|
|
return accept
|
|
},
|
|
onMessage: func(msg Message) {
|
|
got = append(got, msg)
|
|
},
|
|
}
|
|
|
|
replay := `{"jsonrpc":"2.0","method":"session/notification","params":{"sessionId":"ses_1","update":{"type":"AgentMessageChunk","content":{"type":"text","text":"history should be ignored"}}}}`
|
|
c.handleLine(replay)
|
|
if len(got) != 0 {
|
|
t.Fatalf("expected gate to drop replay before turn starts, got %+v", got)
|
|
}
|
|
|
|
accept = true
|
|
live := `{"jsonrpc":"2.0","method":"session/notification","params":{"sessionId":"ses_1","update":{"type":"AgentMessageChunk","content":{"type":"text","text":"current"}}}}`
|
|
c.handleLine(live)
|
|
if len(got) != 1 {
|
|
t.Fatalf("expected current-turn update to pass the gate, got %+v", got)
|
|
}
|
|
if got[0].Content != "current" {
|
|
t.Fatalf("got content %q, want \"current\"", got[0].Content)
|
|
}
|
|
}
|
|
|
|
func TestHermesClientHandleAgentThought(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got Message
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
got = msg
|
|
},
|
|
}
|
|
|
|
line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"agent_thought_chunk","content":{"type":"text","text":"Let me think..."}}}}`
|
|
c.handleLine(line)
|
|
|
|
if got.Type != MessageThinking {
|
|
t.Errorf("type: got %v, want MessageThinking", got.Type)
|
|
}
|
|
if got.Content != "Let me think..." {
|
|
t.Errorf("content: got %q, want %q", got.Content, "Let me think...")
|
|
}
|
|
}
|
|
|
|
func TestHermesClientHandleToolCallStart(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got Message
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
got = msg
|
|
},
|
|
}
|
|
|
|
line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call","toolCallId":"tc-abc123","title":"terminal: ls -la","kind":"execute","status":"pending","rawInput":{"command":"ls -la"}}}}`
|
|
c.handleLine(line)
|
|
|
|
if got.Type != MessageToolUse {
|
|
t.Errorf("type: got %v, want MessageToolUse", got.Type)
|
|
}
|
|
if got.Tool != "terminal" {
|
|
t.Errorf("tool: got %q, want %q", got.Tool, "terminal")
|
|
}
|
|
if got.CallID != "tc-abc123" {
|
|
t.Errorf("callID: got %q, want %q", got.CallID, "tc-abc123")
|
|
}
|
|
if cmd, ok := got.Input["command"].(string); !ok || cmd != "ls -la" {
|
|
t.Errorf("input.command: got %v", got.Input["command"])
|
|
}
|
|
}
|
|
|
|
func TestHermesClientHandleSessionNotificationToolCall(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got []Message
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
got = append(got, msg)
|
|
},
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"session/notification","params":{"sessionId":"ses_1","update":{"type":"ToolCall","toolCallId":"tc-kiro","name":"Shell","status":"pending","parameters":{"command":"pwd"}}}}`)
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"session/notification","params":{"sessionId":"ses_1","update":{"type":"ToolCallUpdate","toolCallId":"tc-kiro","status":"completed","name":"Shell","output":"/tmp/project\n"}}}`)
|
|
|
|
if len(got) != 2 {
|
|
t.Fatalf("expected [ToolUse, ToolResult], got %+v", got)
|
|
}
|
|
if got[0].Type != MessageToolUse {
|
|
t.Errorf("first message: got %v, want MessageToolUse", got[0].Type)
|
|
}
|
|
if got[0].Tool != "Shell" {
|
|
t.Errorf("first tool: got %q, want Shell", got[0].Tool)
|
|
}
|
|
if cmd, _ := got[0].Input["command"].(string); cmd != "pwd" {
|
|
t.Errorf("first input.command: got %v, want pwd", got[0].Input["command"])
|
|
}
|
|
if got[1].Type != MessageToolResult {
|
|
t.Errorf("second message: got %v, want MessageToolResult", got[1].Type)
|
|
}
|
|
if got[1].Output != "/tmp/project\n" {
|
|
t.Errorf("second output: got %q", got[1].Output)
|
|
}
|
|
}
|
|
|
|
func TestHermesClientHandleSessionNotificationTurnEnd(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got hermesPromptResult
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onPromptDone: func(result hermesPromptResult) {
|
|
got = result
|
|
},
|
|
}
|
|
|
|
line := `{"jsonrpc":"2.0","method":"session/notification","params":{"sessionId":"ses_1","update":{"type":"TurnEnd","stopReason":"end_turn","usage":{"inputTokens":3,"outputTokens":4,"cachedReadTokens":1}}}}`
|
|
c.handleLine(line)
|
|
|
|
if got.stopReason != "end_turn" {
|
|
t.Errorf("stopReason: got %q, want end_turn", got.stopReason)
|
|
}
|
|
if got.usage.InputTokens != 3 || got.usage.OutputTokens != 4 || got.usage.CacheReadTokens != 1 {
|
|
t.Errorf("usage: got %+v", got.usage)
|
|
}
|
|
}
|
|
|
|
func TestHermesClientHandleToolCallComplete(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got Message
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
got = msg
|
|
},
|
|
}
|
|
|
|
line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc-abc123","status":"completed","kind":"execute","rawOutput":"file1.go\nfile2.go\n"}}}`
|
|
c.handleLine(line)
|
|
|
|
if got.Type != MessageToolResult {
|
|
t.Errorf("type: got %v, want MessageToolResult", got.Type)
|
|
}
|
|
if got.CallID != "tc-abc123" {
|
|
t.Errorf("callID: got %q, want %q", got.CallID, "tc-abc123")
|
|
}
|
|
if got.Output != "file1.go\nfile2.go\n" {
|
|
t.Errorf("output: got %q", got.Output)
|
|
}
|
|
}
|
|
|
|
// TestHermesClientKimiStreamingToolCall walks the real kimi frame
|
|
// sequence for a single Shell call:
|
|
// 1. tool_call with empty content (LLM hasn't started emitting args yet)
|
|
// 2. tool_call_update status=in_progress carrying the cumulative args
|
|
// JSON character-by-character ("{", "{\"command", …)
|
|
// 3. tool_call_update status=completed carrying the command's stdout
|
|
//
|
|
// The client must defer MessageToolUse until we have the full args so
|
|
// the UI doesn't show a command like `{"comma` — and the MessageToolUse
|
|
// must carry the parsed args as the Input map (`{"command": "echo hi"}`
|
|
// → Input["command"] = "echo hi") rather than a raw string.
|
|
func TestHermesClientKimiStreamingToolCall(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got []Message
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
got = append(got, msg)
|
|
},
|
|
}
|
|
|
|
// 1. tool_call: empty content (classic kimi start frame).
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call","toolCallId":"tc-kimi-1","title":"Shell","status":"in_progress","content":[{"type":"content","content":{"type":"text","text":""}}]}}}`)
|
|
if len(got) != 0 {
|
|
t.Fatalf("expected nothing emitted yet (args empty), got %+v", got)
|
|
}
|
|
|
|
// 2. Streaming updates — cumulative args JSON.
|
|
partials := []string{
|
|
`{"`,
|
|
`{"command`,
|
|
`{"command":`,
|
|
`{"command":"echo `,
|
|
`{"command":"echo hi"}`,
|
|
}
|
|
for _, args := range partials {
|
|
// JSON-encode args so embedded quotes are escaped properly.
|
|
argsJSON, _ := json.Marshal(args)
|
|
line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc-kimi-1","status":"in_progress","content":[{"type":"content","content":{"type":"text","text":` + string(argsJSON) + `}}]}}}`
|
|
c.handleLine(line)
|
|
}
|
|
if len(got) != 0 {
|
|
t.Fatalf("expected nothing emitted mid-stream, got %+v", got)
|
|
}
|
|
|
|
// 3. Completed — stdout.
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc-kimi-1","status":"completed","content":[{"type":"content","content":{"type":"text","text":"hi\n"}}]}}}`)
|
|
|
|
if len(got) != 2 {
|
|
t.Fatalf("expected [MessageToolUse, MessageToolResult], got %d: %+v", len(got), got)
|
|
}
|
|
if got[0].Type != MessageToolUse {
|
|
t.Errorf("first message: got %v, want MessageToolUse", got[0].Type)
|
|
}
|
|
if got[0].CallID != "tc-kimi-1" {
|
|
t.Errorf("first.callID: got %q", got[0].CallID)
|
|
}
|
|
if cmd, _ := got[0].Input["command"].(string); cmd != "echo hi" {
|
|
t.Errorf("first.Input.command: got %v, want %q", got[0].Input["command"], "echo hi")
|
|
}
|
|
if got[1].Type != MessageToolResult {
|
|
t.Errorf("second message: got %v, want MessageToolResult", got[1].Type)
|
|
}
|
|
if got[1].Output != "hi\n" {
|
|
t.Errorf("second.output: got %q, want %q", got[1].Output, "hi\n")
|
|
}
|
|
}
|
|
|
|
// TestHermesClientKimiMalformedArgsFallback: if the accumulated args
|
|
// aren't valid JSON (streaming glitch, tool with non-JSON args), we
|
|
// still surface the text under Input.text rather than silently
|
|
// dropping it.
|
|
func TestHermesClientKimiMalformedArgsFallback(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got []Message
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
got = append(got, msg)
|
|
},
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call","toolCallId":"tc","title":"Shell","status":"in_progress","content":[{"type":"content","content":{"type":"text","text":"not-json"}}]}}}`)
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc","status":"completed","content":[{"type":"content","content":{"type":"text","text":"output"}}]}}}`)
|
|
|
|
if len(got) < 1 {
|
|
t.Fatalf("expected ToolUse+ToolResult, got %+v", got)
|
|
}
|
|
if text, _ := got[0].Input["text"].(string); text != "not-json" {
|
|
t.Errorf("fallback Input.text: got %v", got[0].Input["text"])
|
|
}
|
|
}
|
|
|
|
// TestHermesClientHandleToolCallCompleteOrphan: if a completion frame
|
|
// arrives without a preceding tool_call (out-of-order / missed frame),
|
|
// still emit ToolUse synthesised from the update's own title/rawInput
|
|
// before ToolResult. Keeps the UI from showing a bare result with no
|
|
// header.
|
|
func TestHermesClientHandleToolCallCompleteOrphan(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got []Message
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
got = append(got, msg)
|
|
},
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc","status":"completed","title":"terminal: ls","kind":"execute","rawInput":{"command":"ls"},"content":[{"type":"content","content":{"type":"text","text":"file.go\n"}}]}}}`)
|
|
|
|
if len(got) != 2 || got[0].Type != MessageToolUse || got[1].Type != MessageToolResult {
|
|
t.Fatalf("expected [ToolUse, ToolResult], got %+v", got)
|
|
}
|
|
if got[0].Tool != "terminal" {
|
|
t.Errorf("orphan ToolUse tool: got %q", got[0].Tool)
|
|
}
|
|
if cmd, _ := got[0].Input["command"].(string); cmd != "ls" {
|
|
t.Errorf("orphan ToolUse input.command: got %v", got[0].Input["command"])
|
|
}
|
|
if got[1].Output != "file.go\n" {
|
|
t.Errorf("ToolResult output: got %q", got[1].Output)
|
|
}
|
|
}
|
|
|
|
// TestHermesClientHandleToolCallRawOutputTakesPrecedence keeps hermes
|
|
// behaviour unchanged: when the update has both `rawOutput` (hermes
|
|
// convention) and `content` (would be ambiguous), honour rawOutput.
|
|
func TestHermesClientHandleToolCallRawOutputTakesPrecedence(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got Message
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
got = msg
|
|
},
|
|
}
|
|
|
|
line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc","status":"completed","rawOutput":"raw wins","content":[{"type":"content","content":{"type":"text","text":"ignored"}}]}}}`
|
|
c.handleLine(line)
|
|
|
|
if got.Output != "raw wins" {
|
|
t.Errorf("output: got %q, want %q", got.Output, "raw wins")
|
|
}
|
|
}
|
|
|
|
func TestExtractACPToolCallText(t *testing.T) {
|
|
t.Parallel()
|
|
tests := []struct {
|
|
name string
|
|
json string
|
|
want string
|
|
}{
|
|
{
|
|
name: "single text block",
|
|
json: `[{"type":"content","content":{"type":"text","text":"hello"}}]`,
|
|
want: "hello",
|
|
},
|
|
{
|
|
name: "multiple text blocks join with newline",
|
|
json: `[{"type":"content","content":{"type":"text","text":"a"}},{"type":"content","content":{"type":"text","text":"b"}}]`,
|
|
want: "a\nb",
|
|
},
|
|
{
|
|
name: "terminal blocks skipped",
|
|
json: `[{"type":"terminal","terminalId":"t1"},{"type":"content","content":{"type":"text","text":"shell out"}}]`,
|
|
want: "shell out",
|
|
},
|
|
{
|
|
name: "diff block renders as mini header",
|
|
json: `[{"type":"diff","path":"foo.go","oldText":"abc","newText":"abcdef"}]`,
|
|
want: "--- foo.go\n+++ foo.go\n(edited: 3 → 6 bytes)",
|
|
},
|
|
{
|
|
name: "new-file diff (no oldText)",
|
|
json: `[{"type":"diff","path":"new.go","oldText":"","newText":"hi"}]`,
|
|
want: "--- new.go\n+++ new.go\n(new file, 2 bytes)",
|
|
},
|
|
{
|
|
name: "empty array returns empty",
|
|
json: `[]`,
|
|
want: "",
|
|
},
|
|
{
|
|
name: "no text content",
|
|
json: `[{"type":"terminal","terminalId":"t1"}]`,
|
|
want: "",
|
|
},
|
|
}
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
var blocks []json.RawMessage
|
|
if err := json.Unmarshal([]byte(tt.json), &blocks); err != nil {
|
|
t.Fatalf("unmarshal: %v", err)
|
|
}
|
|
if got := extractACPToolCallText(blocks); got != tt.want {
|
|
t.Errorf("got %q, want %q", got, tt.want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestHermesClientHandleToolCallInProgressIgnored(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
called := false
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
called = true
|
|
},
|
|
}
|
|
|
|
line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"tool_call_update","toolCallId":"tc-abc123","status":"in_progress"}}}`
|
|
c.handleLine(line)
|
|
|
|
if called {
|
|
t.Error("expected in_progress tool_call_update to be ignored")
|
|
}
|
|
}
|
|
|
|
func TestHermesClientHandleUsageUpdate(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
}
|
|
|
|
line := `{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"usage_update","usage":{"inputTokens":500,"outputTokens":200,"cachedReadTokens":100}}}}`
|
|
c.handleLine(line)
|
|
|
|
c.usageMu.Lock()
|
|
defer c.usageMu.Unlock()
|
|
|
|
if c.usage.InputTokens != 500 {
|
|
t.Errorf("inputTokens: got %d, want 500", c.usage.InputTokens)
|
|
}
|
|
if c.usage.OutputTokens != 200 {
|
|
t.Errorf("outputTokens: got %d, want 200", c.usage.OutputTokens)
|
|
}
|
|
if c.usage.CacheReadTokens != 100 {
|
|
t.Errorf("cacheReadTokens: got %d, want 100", c.usage.CacheReadTokens)
|
|
}
|
|
}
|
|
|
|
func TestHermesClientHandleUsageUpdateCumulative(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
}
|
|
|
|
// First usage update.
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"usage_update","usage":{"inputTokens":100,"outputTokens":50}}}}`)
|
|
|
|
// Second usage update with higher values (should take the max).
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_1","update":{"sessionUpdate":"usage_update","usage":{"inputTokens":300,"outputTokens":120}}}}`)
|
|
|
|
c.usageMu.Lock()
|
|
defer c.usageMu.Unlock()
|
|
|
|
if c.usage.InputTokens != 300 {
|
|
t.Errorf("inputTokens: got %d, want 300", c.usage.InputTokens)
|
|
}
|
|
if c.usage.OutputTokens != 120 {
|
|
t.Errorf("outputTokens: got %d, want 120", c.usage.OutputTokens)
|
|
}
|
|
}
|
|
|
|
// ── extractPromptResult ──
|
|
|
|
func TestHermesClientExtractPromptResult(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got hermesPromptResult
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onPromptDone: func(result hermesPromptResult) {
|
|
got = result
|
|
},
|
|
}
|
|
|
|
data := json.RawMessage(`{"stopReason":"end_turn","usage":{"inputTokens":1000,"outputTokens":200,"cachedReadTokens":50}}`)
|
|
c.extractPromptResult(data)
|
|
|
|
if got.stopReason != "end_turn" {
|
|
t.Errorf("stopReason: got %q, want %q", got.stopReason, "end_turn")
|
|
}
|
|
if got.usage.InputTokens != 1000 {
|
|
t.Errorf("inputTokens: got %d, want 1000", got.usage.InputTokens)
|
|
}
|
|
if got.usage.OutputTokens != 200 {
|
|
t.Errorf("outputTokens: got %d, want 200", got.usage.OutputTokens)
|
|
}
|
|
if got.usage.CacheReadTokens != 50 {
|
|
t.Errorf("cacheReadTokens: got %d, want 50", got.usage.CacheReadTokens)
|
|
}
|
|
}
|
|
|
|
func TestHermesClientExtractPromptResultNoUsage(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var got hermesPromptResult
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onPromptDone: func(result hermesPromptResult) {
|
|
got = result
|
|
},
|
|
}
|
|
|
|
data := json.RawMessage(`{"stopReason":"cancelled"}`)
|
|
c.extractPromptResult(data)
|
|
|
|
if got.stopReason != "cancelled" {
|
|
t.Errorf("stopReason: got %q, want %q", got.stopReason, "cancelled")
|
|
}
|
|
if got.usage.InputTokens != 0 {
|
|
t.Errorf("inputTokens: got %d, want 0", got.usage.InputTokens)
|
|
}
|
|
}
|
|
|
|
func TestHermesClientIgnoresUnknownNotification(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
called := false
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
called = true
|
|
},
|
|
}
|
|
|
|
// Unknown method should be silently ignored.
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"unknown/event","params":{}}`)
|
|
|
|
if called {
|
|
t.Error("expected unknown notification to be ignored")
|
|
}
|
|
}
|
|
|
|
func TestHermesClientIgnoresInvalidJSON(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c := &hermesClient{
|
|
pending: make(map[int]*pendingRPC),
|
|
}
|
|
|
|
// Should not panic.
|
|
c.handleLine("not json at all")
|
|
c.handleLine("")
|
|
c.handleLine("{}")
|
|
}
|
|
|
|
func TestHermesProviderErrorSniffer(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// Real sample of the stderr hermes emits when the configured
|
|
// LLM endpoint rejects the requested model. We verify the
|
|
// sniffer extracts the `Error: ...` line so the task error
|
|
// tells the user *why* it failed.
|
|
s := newACPProviderErrorSniffer("hermes")
|
|
lines := []string{
|
|
"2026-04-20 23:41:47 [INFO] acp_adapter.server: Prompt on session abc",
|
|
`⚠️ API call failed (attempt 1/3): BadRequestError [HTTP 400]`,
|
|
` 🔌 Provider: openai-codex Model: gpt-5.1-codex-mini`,
|
|
` 📝 Error: HTTP 400: Error code: 400 - {'detail': "The 'gpt-5.1-codex-mini' model is not supported when using Codex with a ChatGPT account."}`,
|
|
`⏱️ Elapsed: 1.17s`,
|
|
}
|
|
for _, line := range lines {
|
|
if _, err := s.Write([]byte(line + "\n")); err != nil {
|
|
t.Fatalf("Write: %v", err)
|
|
}
|
|
}
|
|
msg := s.message()
|
|
if msg == "" {
|
|
t.Fatal("expected a non-empty error message")
|
|
}
|
|
if !strings.Contains(msg, "model is not supported") {
|
|
t.Errorf("expected detail about model support, got %q", msg)
|
|
}
|
|
}
|
|
|
|
func TestHermesProviderErrorSnifferIgnoresInfoLines(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
s := newACPProviderErrorSniffer("hermes")
|
|
s.Write([]byte("2026-04-20 23:41:45 [INFO] acp_adapter.entry: Loaded env\n"))
|
|
s.Write([]byte("2026-04-20 23:41:47 [INFO] agent.auxiliary_client: Vision auto-detect...\n"))
|
|
if msg := s.message(); msg != "" {
|
|
t.Errorf("info lines should produce no error, got %q", msg)
|
|
}
|
|
}
|
|
|
|
func TestHermesProviderErrorSnifferHandlesPartialLines(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// Writer may be called mid-line; the sniffer must buffer until
|
|
// it sees a newline so the regex doesn't miss the header.
|
|
s := newACPProviderErrorSniffer("hermes")
|
|
s.Write([]byte(`⚠️ API call failed (attempt 1/3):`))
|
|
s.Write([]byte(` BadRequestError [HTTP 400]` + "\n"))
|
|
s.Write([]byte(` 📝 Error: something went wrong` + "\n"))
|
|
msg := s.message()
|
|
if !strings.Contains(msg, "something went wrong") {
|
|
t.Errorf("expected buffered line to be captured, got %q", msg)
|
|
}
|
|
}
|
|
|
|
func TestHermesProviderErrorSnifferBoundedBuffer(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
s := newACPProviderErrorSniffer("hermes")
|
|
for i := 0; i < 20; i++ {
|
|
// Each line differs so dedup doesn't merge them.
|
|
s.Write([]byte(`⚠️ API call failed (HTTP 400) attempt ` + string(rune('a'+i%26)) + `: Non-retryable error` + "\n"))
|
|
}
|
|
if len(s.lines) > acpMaxErrorLines {
|
|
t.Errorf("sniffer kept %d lines, limit is %d", len(s.lines), acpMaxErrorLines)
|
|
}
|
|
}
|
|
|
|
// fakeHermesACPRateLimitScript impersonates hermes for the GitHub
|
|
// multica#1952 scenario: the upstream LLM returns HTTP 429 (rate
|
|
// limited / no credit), hermes retries internally and ultimately
|
|
// emits both a sniffable stderr error block AND a synthetic agent
|
|
// text turn ("API call failed after 3 retries..."), then completes
|
|
// session/prompt with stopReason=end_turn (NOT an RPC error). The
|
|
// daemon must still treat this as a failed run, not a successful
|
|
// one — which means the hermes backend has to promote the status
|
|
// to "failed" even though `output` is non-empty.
|
|
func fakeHermesACPRateLimitScript() string {
|
|
return `#!/bin/sh
|
|
while IFS= read -r line; do
|
|
id=$(printf '%s' "$line" | sed -n 's/.*"id":\([0-9]*\).*/\1/p')
|
|
case "$line" in
|
|
*'"method":"initialize"'*)
|
|
printf '{"jsonrpc":"2.0","id":%s,"result":{"protocolVersion":1,"agentCapabilities":{}}}\n' "$id"
|
|
;;
|
|
*'"method":"session/new"'*)
|
|
printf '{"jsonrpc":"2.0","id":%s,"result":{"sessionId":"ses_429"}}\n' "$id"
|
|
;;
|
|
*'"method":"session/prompt"'*)
|
|
# Mimic hermes' real-world stderr block on a 429.
|
|
printf '%s\n' '⚠️ API call failed (attempt 3/3): RateLimitError [HTTP 429]' >&2
|
|
printf '%s\n' ' 📝 Error: HTTP 429: The usage limit has been reached' >&2
|
|
# Mimic hermes injecting the failure as a synthetic agent turn so
|
|
# the chat shows *something*; this puts text in output and used to
|
|
# mask the failure from the daemon.
|
|
printf '{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_429","update":{"sessionUpdate":"agent_message_chunk","content":{"type":"text","text":"API call failed after 3 retries: HTTP 429: The usage limit has been reached"}}}}\n'
|
|
printf '{"jsonrpc":"2.0","id":%s,"result":{"stopReason":"end_turn"}}\n' "$id"
|
|
exit 0
|
|
;;
|
|
esac
|
|
done
|
|
`
|
|
}
|
|
|
|
// TestHermesProviderErrorSnifferTerminalVsTransient verifies the
|
|
// sniffer reports terminalMessage()=="" for a per-attempt warning
|
|
// that did NOT escalate to an exhausted/non-retryable failure, but
|
|
// still returns the same string from message() so callers wanting
|
|
// diagnostic text can use it. This is what prevents the
|
|
// promote-on-any-sniff false positive (a transient `attempt 1/3`
|
|
// followed by a successful retry must stay "completed").
|
|
func TestHermesProviderErrorSnifferTerminalVsTransient(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// Transient: the sniffer DID see something matching acpErrorHeaderRe
|
|
// (so `message()` is non-empty for diagnostic purposes), but the
|
|
// signal is just "attempt 1/3 against a retryable rate limit" — no
|
|
// terminal markers at all.
|
|
s := newACPProviderErrorSniffer("hermes")
|
|
s.Write([]byte("⚠️ API call failed (attempt 1/3): retryable upstream blip\n"))
|
|
if msg := s.message(); msg == "" {
|
|
t.Fatalf("sniffer should still capture transient warnings for diagnostics")
|
|
}
|
|
if msg := s.terminalMessage(); msg != "" {
|
|
t.Fatalf("transient attempt should NOT be a terminal failure, got %q", msg)
|
|
}
|
|
|
|
// Now feed a follow-on terminal marker. terminalMessage must turn on.
|
|
s.Write([]byte("❌ API call failed after 3 retries: usage limit reached\n"))
|
|
if msg := s.terminalMessage(); msg == "" {
|
|
t.Fatalf("after-N-retries / ❌ should switch terminalMessage on")
|
|
}
|
|
}
|
|
|
|
// TestHermesProviderErrorSnifferTerminalNonRetryable verifies that a
|
|
// non-retryable error (BadRequest / Authentication / Non-retryable)
|
|
// is treated as terminal even on attempt 1/3 — those errors don't
|
|
// retry, so the very first failure is the final disposition. Also
|
|
// covers ❌ / [ERROR] / "after N retries" markers that adapters
|
|
// emit on give-up.
|
|
func TestHermesProviderErrorSnifferTerminalNonRetryable(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
for _, line := range []string{
|
|
`⚠️ API call failed (attempt 1/3): BadRequestError [HTTP 400]`,
|
|
`⚠️ API call failed (attempt 1/3): AuthenticationError [HTTP 401]`,
|
|
`⚠️ API call failed (HTTP 400) attempt a: Non-retryable error`,
|
|
`❌ API call failed after 3 retries: RateLimitError [HTTP 429]`,
|
|
`[ERROR] API call failed: upstream returned HTTP 500`,
|
|
} {
|
|
s := newACPProviderErrorSniffer("hermes")
|
|
s.Write([]byte(line + "\n"))
|
|
if msg := s.terminalMessage(); msg == "" {
|
|
t.Errorf("expected %q to be classified as terminal", line)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestHermesBackendPromotesProviderErrorWithNonEmptyOutput pins the
|
|
// fix for GitHub multica#1952: a hermes run that hits a 429 (or any
|
|
// upstream provider error) must surface as Status=failed even though
|
|
// hermes' synthetic "API call failed..." agent turn means the output
|
|
// buffer is non-empty. Before the fix the sniffer-promotion was
|
|
// gated on `finalOutput == ""`, so the run silently completed.
|
|
func TestHermesBackendPromotesProviderErrorWithNonEmptyOutput(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
fakePath := filepath.Join(t.TempDir(), "hermes")
|
|
writeTestExecutable(t, fakePath, []byte(fakeHermesACPRateLimitScript()))
|
|
|
|
backend, err := New("hermes", Config{ExecutablePath: fakePath, Logger: slog.Default()})
|
|
if err != nil {
|
|
t.Fatalf("new hermes backend: %v", err)
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
|
|
session, err := backend.Execute(ctx, "prompt-ignored", ExecOptions{
|
|
Timeout: 5 * time.Second,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("execute: %v", err)
|
|
}
|
|
go func() {
|
|
for range session.Messages {
|
|
}
|
|
}()
|
|
|
|
select {
|
|
case result, ok := <-session.Result:
|
|
if !ok {
|
|
t.Fatal("result channel closed without a value")
|
|
}
|
|
if result.Status != "failed" {
|
|
t.Fatalf("expected status=failed (sniffer should promote on 429 even with non-empty output), got %q (error=%q output=%q)", result.Status, result.Error, result.Output)
|
|
}
|
|
if !strings.Contains(result.Error, "429") && !strings.Contains(result.Error, "usage limit") {
|
|
t.Errorf("expected error to surface the 429 / usage-limit message, got %q", result.Error)
|
|
}
|
|
if result.SessionID != "ses_429" {
|
|
t.Errorf("expected session id to be preserved on failure, got %q", result.SessionID)
|
|
}
|
|
case <-time.After(10 * time.Second):
|
|
t.Fatal("timeout waiting for result")
|
|
}
|
|
}
|
|
|
|
// fakeHermesACPTransientRetryScript emits a single retryable per-
|
|
// attempt warning to stderr and then completes with a normal agent
|
|
// text turn — the situation where the upstream LLM blipped on
|
|
// attempt 1/3 but a subsequent attempt succeeded and produced a
|
|
// real answer. The previous (too-broad) promotion logic would have
|
|
// flipped this to status=failed; the fix must keep it as completed.
|
|
func fakeHermesACPTransientRetryScript() string {
|
|
return `#!/bin/sh
|
|
while IFS= read -r line; do
|
|
id=$(printf '%s' "$line" | sed -n 's/.*"id":\([0-9]*\).*/\1/p')
|
|
case "$line" in
|
|
*'"method":"initialize"'*)
|
|
printf '{"jsonrpc":"2.0","id":%s,"result":{"protocolVersion":1,"agentCapabilities":{}}}\n' "$id"
|
|
;;
|
|
*'"method":"session/new"'*)
|
|
printf '{"jsonrpc":"2.0","id":%s,"result":{"sessionId":"ses_ok"}}\n' "$id"
|
|
;;
|
|
*'"method":"session/prompt"'*)
|
|
# Per-attempt rate-limit warning that hermes routinely logs on
|
|
# transient blips — the request DOES retry and succeed below.
|
|
printf '%s\n' '⚠️ API call failed (attempt 1/3): RateLimitError [HTTP 429]' >&2
|
|
# Real agent answer streamed back as a normal text turn.
|
|
printf '{"jsonrpc":"2.0","method":"session/update","params":{"sessionId":"ses_ok","update":{"sessionUpdate":"agent_message_chunk","content":{"type":"text","text":"Here is the answer you asked for."}}}}\n'
|
|
printf '{"jsonrpc":"2.0","id":%s,"result":{"stopReason":"end_turn"}}\n' "$id"
|
|
exit 0
|
|
;;
|
|
esac
|
|
done
|
|
`
|
|
}
|
|
|
|
// TestHermesBackendDoesNotPromoteOnTransientRetry pins the
|
|
// regression GPT-Boy flagged on the multica#1952 fix: a per-attempt
|
|
// ⚠️ warning on stderr that does NOT include any terminal marker
|
|
// ("after N retries", Non-retryable, ❌, [ERROR], BadRequest /
|
|
// Authentication errors) and is followed by a successful agent
|
|
// turn must stay status=completed. The previous "any sniffer line
|
|
// → fail" rule would have wrongly marked this run as failed.
|
|
func TestHermesBackendDoesNotPromoteOnTransientRetry(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
fakePath := filepath.Join(t.TempDir(), "hermes")
|
|
writeTestExecutable(t, fakePath, []byte(fakeHermesACPTransientRetryScript()))
|
|
|
|
backend, err := New("hermes", Config{ExecutablePath: fakePath, Logger: slog.Default()})
|
|
if err != nil {
|
|
t.Fatalf("new hermes backend: %v", err)
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
|
|
session, err := backend.Execute(ctx, "prompt-ignored", ExecOptions{
|
|
Timeout: 5 * time.Second,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("execute: %v", err)
|
|
}
|
|
go func() {
|
|
for range session.Messages {
|
|
}
|
|
}()
|
|
|
|
select {
|
|
case result, ok := <-session.Result:
|
|
if !ok {
|
|
t.Fatal("result channel closed without a value")
|
|
}
|
|
if result.Status != "completed" {
|
|
t.Fatalf("transient retry that ultimately succeeded must stay status=completed, got %q (error=%q output=%q)", result.Status, result.Error, result.Output)
|
|
}
|
|
if !strings.Contains(result.Output, "Here is the answer") {
|
|
t.Errorf("expected the successful agent turn to be in output, got %q", result.Output)
|
|
}
|
|
case <-time.After(10 * time.Second):
|
|
t.Fatal("timeout waiting for result")
|
|
}
|
|
}
|