mirror of
https://github.com/multica-ai/multica.git
synced 2026-06-17 11:48:42 +02:00
2050 lines
62 KiB
Go
2050 lines
62 KiB
Go
package agent
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"reflect"
|
|
"runtime"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
func newTestCodexClient(t *testing.T) (*codexClient, *fakeStdin, []Message) {
|
|
t.Helper()
|
|
fs := &fakeStdin{}
|
|
var mu sync.Mutex
|
|
var messages []Message
|
|
|
|
c := &codexClient{
|
|
cfg: Config{Logger: slog.Default()},
|
|
stdin: fs,
|
|
pending: make(map[int]*pendingRPC),
|
|
onMessage: func(msg Message) {
|
|
mu.Lock()
|
|
messages = append(messages, msg)
|
|
mu.Unlock()
|
|
},
|
|
onTurnDone: func(aborted bool) {},
|
|
}
|
|
return c, fs, messages
|
|
}
|
|
|
|
type fakeStdin struct {
|
|
mu sync.Mutex
|
|
data []byte
|
|
}
|
|
|
|
func (f *fakeStdin) Write(p []byte) (int, error) {
|
|
f.mu.Lock()
|
|
defer f.mu.Unlock()
|
|
f.data = append(f.data, p...)
|
|
return len(p), nil
|
|
}
|
|
|
|
func (f *fakeStdin) Lines() []string {
|
|
f.mu.Lock()
|
|
defer f.mu.Unlock()
|
|
var lines []string
|
|
for _, line := range splitLines(string(f.data)) {
|
|
if line != "" {
|
|
lines = append(lines, line)
|
|
}
|
|
}
|
|
return lines
|
|
}
|
|
|
|
func splitLines(s string) []string {
|
|
var lines []string
|
|
start := 0
|
|
for i, c := range s {
|
|
if c == '\n' {
|
|
lines = append(lines, s[start:i])
|
|
start = i + 1
|
|
}
|
|
}
|
|
if start < len(s) {
|
|
lines = append(lines, s[start:])
|
|
}
|
|
return lines
|
|
}
|
|
|
|
func TestCodexHandleResponseSuccess(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
|
|
// Register a pending request
|
|
pr := &pendingRPC{ch: make(chan rpcResult, 1), method: "test"}
|
|
c.mu.Lock()
|
|
c.pending[1] = pr
|
|
c.mu.Unlock()
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","id":1,"result":{"ok":true}}`)
|
|
|
|
res := <-pr.ch
|
|
if res.err != nil {
|
|
t.Fatalf("expected no error, got %v", res.err)
|
|
}
|
|
|
|
var parsed map[string]any
|
|
if err := json.Unmarshal(res.result, &parsed); err != nil {
|
|
t.Fatalf("unmarshal result: %v", err)
|
|
}
|
|
if parsed["ok"] != true {
|
|
t.Fatalf("expected ok=true, got %v", parsed["ok"])
|
|
}
|
|
}
|
|
|
|
func TestCodexHandleResponseError(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
|
|
pr := &pendingRPC{ch: make(chan rpcResult, 1), method: "test"}
|
|
c.mu.Lock()
|
|
c.pending[1] = pr
|
|
c.mu.Unlock()
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","id":1,"error":{"code":-32600,"message":"bad request"}}`)
|
|
|
|
res := <-pr.ch
|
|
if res.err == nil {
|
|
t.Fatal("expected error")
|
|
}
|
|
if res.result != nil {
|
|
t.Fatalf("expected nil result, got %v", res.result)
|
|
}
|
|
}
|
|
|
|
func TestCodexHandleServerRequestAutoApproves(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, fs, _ := newTestCodexClient(t)
|
|
|
|
// Command execution approval
|
|
c.handleLine(`{"jsonrpc":"2.0","id":10,"method":"item/commandExecution/requestApproval","params":{}}`)
|
|
|
|
lines := fs.Lines()
|
|
if len(lines) != 1 {
|
|
t.Fatalf("expected 1 response, got %d", len(lines))
|
|
}
|
|
|
|
var resp map[string]any
|
|
if err := json.Unmarshal([]byte(lines[0]), &resp); err != nil {
|
|
t.Fatalf("unmarshal: %v", err)
|
|
}
|
|
if resp["id"] != float64(10) {
|
|
t.Fatalf("expected id=10, got %v", resp["id"])
|
|
}
|
|
result := resp["result"].(map[string]any)
|
|
if result["decision"] != "accept" {
|
|
t.Fatalf("expected decision=accept, got %v", result["decision"])
|
|
}
|
|
}
|
|
|
|
func TestCodexHandleServerRequestFileChangeApproval(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, fs, _ := newTestCodexClient(t)
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","id":11,"method":"applyPatchApproval","params":{}}`)
|
|
|
|
lines := fs.Lines()
|
|
if len(lines) != 1 {
|
|
t.Fatalf("expected 1 response, got %d", len(lines))
|
|
}
|
|
|
|
var resp map[string]any
|
|
if err := json.Unmarshal([]byte(lines[0]), &resp); err != nil {
|
|
t.Fatalf("unmarshal: %v", err)
|
|
}
|
|
result := resp["result"].(map[string]any)
|
|
if result["decision"] != "accept" {
|
|
t.Fatalf("expected decision=accept, got %v", result["decision"])
|
|
}
|
|
}
|
|
|
|
func TestCodexHandleServerRequestMCPElicitation(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, fs, _ := newTestCodexClient(t)
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","id":12,"method":"mcpServer/elicitation/request","params":{}}`)
|
|
|
|
lines := fs.Lines()
|
|
if len(lines) != 1 {
|
|
t.Fatalf("expected 1 response, got %d", len(lines))
|
|
}
|
|
|
|
var resp map[string]any
|
|
if err := json.Unmarshal([]byte(lines[0]), &resp); err != nil {
|
|
t.Fatalf("unmarshal: %v", err)
|
|
}
|
|
if resp["id"] != float64(12) {
|
|
t.Fatalf("expected id=12, got %v", resp["id"])
|
|
}
|
|
result := resp["result"].(map[string]any)
|
|
if result["action"] != "accept" {
|
|
t.Fatalf("expected action=accept, got %v", result["action"])
|
|
}
|
|
if _, ok := result["content"]; !ok {
|
|
t.Fatal("expected content key in response")
|
|
}
|
|
if _, ok := result["_meta"]; !ok {
|
|
t.Fatal("expected _meta key in response")
|
|
}
|
|
}
|
|
|
|
func TestCodexHandleServerRequestUnknownReturnsError(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, fs, _ := newTestCodexClient(t)
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","id":13,"method":"some/unknown/method","params":{}}`)
|
|
|
|
lines := fs.Lines()
|
|
if len(lines) != 1 {
|
|
t.Fatalf("expected 1 response, got %d", len(lines))
|
|
}
|
|
|
|
var resp map[string]any
|
|
if err := json.Unmarshal([]byte(lines[0]), &resp); err != nil {
|
|
t.Fatalf("unmarshal: %v", err)
|
|
}
|
|
if resp["id"] != float64(13) {
|
|
t.Fatalf("expected id=13, got %v", resp["id"])
|
|
}
|
|
if resp["result"] != nil {
|
|
t.Fatalf("expected no result for error response, got %v", resp["result"])
|
|
}
|
|
errObj, ok := resp["error"].(map[string]any)
|
|
if !ok {
|
|
t.Fatal("expected error object in response")
|
|
}
|
|
if errObj["code"] != float64(-32601) {
|
|
t.Fatalf("expected error code -32601, got %v", errObj["code"])
|
|
}
|
|
}
|
|
|
|
func TestCodexLegacyEventTaskStarted(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
var gotStatus bool
|
|
c.onMessage = func(msg Message) {
|
|
if msg.Type == MessageStatus && msg.Status == "running" {
|
|
gotStatus = true
|
|
}
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"codex/event","params":{"msg":{"type":"task_started"}}}`)
|
|
|
|
if !gotStatus {
|
|
t.Fatal("expected status=running message")
|
|
}
|
|
if !c.turnStarted {
|
|
t.Fatal("expected turnStarted=true")
|
|
}
|
|
if c.notificationProtocol != "legacy" {
|
|
t.Fatalf("expected protocol=legacy, got %q", c.notificationProtocol)
|
|
}
|
|
}
|
|
|
|
func TestCodexLegacyEventAgentMessage(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
var gotText string
|
|
c.onMessage = func(msg Message) {
|
|
if msg.Type == MessageText {
|
|
gotText = msg.Content
|
|
}
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"codex/event","params":{"msg":{"type":"agent_message","message":"I found the bug"}}}`)
|
|
|
|
if gotText != "I found the bug" {
|
|
t.Fatalf("expected text 'I found the bug', got %q", gotText)
|
|
}
|
|
}
|
|
|
|
func TestCodexLegacyEventExecCommand(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
var messages []Message
|
|
c.onMessage = func(msg Message) {
|
|
messages = append(messages, msg)
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"codex/event","params":{"msg":{"type":"exec_command_begin","call_id":"c1","command":"ls -la"}}}`)
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"codex/event","params":{"msg":{"type":"exec_command_end","call_id":"c1","output":"total 42"}}}`)
|
|
|
|
if len(messages) != 2 {
|
|
t.Fatalf("expected 2 messages, got %d", len(messages))
|
|
}
|
|
if messages[0].Type != MessageToolUse || messages[0].Tool != "exec_command" || messages[0].CallID != "c1" {
|
|
t.Fatalf("unexpected begin message: %+v", messages[0])
|
|
}
|
|
if messages[1].Type != MessageToolResult || messages[1].CallID != "c1" || messages[1].Output != "total 42" {
|
|
t.Fatalf("unexpected end message: %+v", messages[1])
|
|
}
|
|
}
|
|
|
|
func TestCodexLegacyEventTaskComplete(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
var done bool
|
|
c.onTurnDone = func(aborted bool) {
|
|
done = true
|
|
if aborted {
|
|
t.Fatal("expected aborted=false")
|
|
}
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"codex/event","params":{"msg":{"type":"task_complete"}}}`)
|
|
|
|
if !done {
|
|
t.Fatal("expected onTurnDone to be called")
|
|
}
|
|
}
|
|
|
|
func TestCodexLegacyEventTurnAborted(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
var abortedResult bool
|
|
c.onTurnDone = func(aborted bool) {
|
|
abortedResult = aborted
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"codex/event","params":{"msg":{"type":"turn_aborted"}}}`)
|
|
|
|
if !abortedResult {
|
|
t.Fatal("expected aborted=true")
|
|
}
|
|
}
|
|
|
|
func TestCodexRawTurnStarted(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
// The zero value "" doesn't match "unknown", so protocol auto-detection
|
|
// won't trigger. Set it explicitly as production code would.
|
|
c.notificationProtocol = "unknown"
|
|
|
|
var gotStatus bool
|
|
c.onMessage = func(msg Message) {
|
|
if msg.Type == MessageStatus && msg.Status == "running" {
|
|
gotStatus = true
|
|
}
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"turn/started","params":{"turn":{"id":"turn-1"}}}`)
|
|
|
|
if !gotStatus {
|
|
t.Fatal("expected status=running message")
|
|
}
|
|
if c.notificationProtocol != "raw" {
|
|
t.Fatalf("expected protocol=raw, got %q", c.notificationProtocol)
|
|
}
|
|
if c.turnID != "turn-1" {
|
|
t.Fatalf("expected turnID=turn-1, got %q", c.turnID)
|
|
}
|
|
}
|
|
|
|
func TestCodexRawTurnCompleted(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
c.notificationProtocol = "raw"
|
|
|
|
var doneCount int
|
|
c.onTurnDone = func(aborted bool) {
|
|
doneCount++
|
|
if aborted {
|
|
t.Fatal("expected aborted=false")
|
|
}
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"turn/completed","params":{"turn":{"id":"turn-1","status":"completed"}}}`)
|
|
|
|
if doneCount != 1 {
|
|
t.Fatalf("expected onTurnDone called once, got %d", doneCount)
|
|
}
|
|
}
|
|
|
|
func TestCodexRawTurnCompletedDeduplication(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
c.notificationProtocol = "raw"
|
|
|
|
var doneCount int
|
|
c.onTurnDone = func(aborted bool) {
|
|
doneCount++
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"turn/completed","params":{"turn":{"id":"turn-1","status":"completed"}}}`)
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"turn/completed","params":{"turn":{"id":"turn-1","status":"completed"}}}`)
|
|
|
|
if doneCount != 1 {
|
|
t.Fatalf("expected deduplication, but onTurnDone called %d times", doneCount)
|
|
}
|
|
}
|
|
|
|
func TestCodexRawTurnCompletedAborted(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
c.notificationProtocol = "raw"
|
|
|
|
var wasAborted bool
|
|
c.onTurnDone = func(aborted bool) {
|
|
wasAborted = aborted
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"turn/completed","params":{"turn":{"id":"turn-2","status":"cancelled"}}}`)
|
|
|
|
if !wasAborted {
|
|
t.Fatal("expected aborted=true for cancelled status")
|
|
}
|
|
}
|
|
|
|
func TestCodexRawTurnCompletedFailedCapturesError(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
c.notificationProtocol = "raw"
|
|
|
|
var wasAborted bool
|
|
c.onTurnDone = func(aborted bool) {
|
|
wasAborted = aborted
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"turn/completed","params":{"turn":{"id":"turn-f","status":"failed","error":{"message":"unexpected status 401 Unauthorized"}}}}`)
|
|
|
|
if wasAborted {
|
|
t.Fatal("failed is distinct from aborted")
|
|
}
|
|
if got := c.getTurnError(); got != "unexpected status 401 Unauthorized" {
|
|
t.Fatalf("expected error captured from turn.error.message, got %q", got)
|
|
}
|
|
}
|
|
|
|
func TestCodexRawTurnCompletedFailedWithoutMessageFallsBack(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
c.notificationProtocol = "raw"
|
|
c.onTurnDone = func(aborted bool) {}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"turn/completed","params":{"turn":{"id":"turn-f","status":"failed"}}}`)
|
|
|
|
if got := c.getTurnError(); got != "codex turn failed" {
|
|
t.Fatalf("expected fallback message, got %q", got)
|
|
}
|
|
}
|
|
|
|
func TestCodexRawErrorNotificationTerminal(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
c.notificationProtocol = "raw"
|
|
done := false
|
|
var activities []string
|
|
c.onSemanticActivity = func(activity string) {
|
|
activities = append(activities, activity)
|
|
}
|
|
c.onTurnDone = func(aborted bool) {
|
|
if aborted {
|
|
t.Fatal("terminal error should not mark the turn aborted")
|
|
}
|
|
done = true
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"error","params":{"error":{"message":"boom"},"willRetry":false}}`)
|
|
|
|
if got := c.getTurnError(); got != "boom" {
|
|
t.Fatalf("expected terminal error captured, got %q", got)
|
|
}
|
|
if !done {
|
|
t.Fatal("terminal error should finish the turn")
|
|
}
|
|
if got, want := strings.Join(activities, ","), "error:terminal"; got != want {
|
|
t.Fatalf("semantic activity = %q, want %q", got, want)
|
|
}
|
|
}
|
|
|
|
func TestCodexRawErrorNotificationRetryingIgnored(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
c.notificationProtocol = "raw"
|
|
var activities []string
|
|
c.onSemanticActivity = func(activity string) {
|
|
activities = append(activities, activity)
|
|
}
|
|
c.onTurnDone = func(aborted bool) {
|
|
t.Fatal("retrying error should not finish the turn")
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"error","params":{"error":{"message":"reconnecting"},"willRetry":true}}`)
|
|
|
|
if got := c.getTurnError(); got != "" {
|
|
t.Fatalf("retrying error should not be captured, got %q", got)
|
|
}
|
|
if got, want := strings.Join(activities, ","), "error:retry"; got != want {
|
|
t.Fatalf("semantic activity = %q, want %q", got, want)
|
|
}
|
|
}
|
|
|
|
func TestCodexFirstTurnProgressActivity(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
cases := []struct {
|
|
activity string
|
|
want bool
|
|
}{
|
|
{activity: "", want: false},
|
|
{activity: "status:running", want: false},
|
|
{activity: "error:retry", want: false},
|
|
{activity: "error", want: true},
|
|
{activity: "text", want: true},
|
|
{activity: "tool-use:exec_command", want: true},
|
|
{activity: "tool-result:exec_command", want: true},
|
|
{activity: "item/started:commandExecution:cmd-1", want: true},
|
|
{activity: "item/completed:agentMessage:msg-1", want: true},
|
|
{activity: "error:terminal", want: true},
|
|
{activity: "turn:completed", want: true},
|
|
}
|
|
|
|
for _, tc := range cases {
|
|
t.Run(tc.activity, func(t *testing.T) {
|
|
if got := isCodexFirstTurnProgressActivity(tc.activity); got != tc.want {
|
|
t.Fatalf("isCodexFirstTurnProgressActivity(%q) = %v, want %v", tc.activity, got, tc.want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestCodexSetTurnErrorFirstWins(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
|
|
c.setTurnError("first")
|
|
c.setTurnError("second")
|
|
|
|
if got := c.getTurnError(); got != "first" {
|
|
t.Fatalf("expected first-wins semantics, got %q", got)
|
|
}
|
|
}
|
|
|
|
func TestCodexRawItemCommandExecution(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
c.notificationProtocol = "raw"
|
|
|
|
var messages []Message
|
|
c.onMessage = func(msg Message) {
|
|
messages = append(messages, msg)
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"item/started","params":{"item":{"type":"commandExecution","id":"item-1","command":"git status"}}}`)
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"item/completed","params":{"item":{"type":"commandExecution","id":"item-1","aggregatedOutput":"on branch main"}}}`)
|
|
|
|
if len(messages) != 2 {
|
|
t.Fatalf("expected 2 messages, got %d", len(messages))
|
|
}
|
|
if messages[0].Type != MessageToolUse || messages[0].Tool != "exec_command" || messages[0].Input["command"] != "git status" {
|
|
t.Fatalf("unexpected start message: %+v", messages[0])
|
|
}
|
|
if messages[1].Type != MessageToolResult || messages[1].Output != "on branch main" {
|
|
t.Fatalf("unexpected complete message: %+v", messages[1])
|
|
}
|
|
}
|
|
|
|
func TestCodexRawItemAgentMessageFinalAnswer(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
c.notificationProtocol = "raw"
|
|
c.turnStarted = true
|
|
|
|
var gotText string
|
|
var turnDone bool
|
|
c.onMessage = func(msg Message) {
|
|
if msg.Type == MessageText {
|
|
gotText = msg.Content
|
|
}
|
|
}
|
|
c.onTurnDone = func(aborted bool) {
|
|
turnDone = true
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"item/completed","params":{"item":{"type":"agentMessage","id":"msg-1","text":"Done!","phase":"final_answer"}}}`)
|
|
|
|
if gotText != "Done!" {
|
|
t.Fatalf("expected text 'Done!', got %q", gotText)
|
|
}
|
|
if !turnDone {
|
|
t.Fatal("expected onTurnDone for final_answer")
|
|
}
|
|
}
|
|
|
|
func TestCodexRawThreadStatusIdle(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
c.notificationProtocol = "raw"
|
|
c.turnStarted = true
|
|
|
|
var turnDone bool
|
|
c.onTurnDone = func(aborted bool) {
|
|
turnDone = true
|
|
if aborted {
|
|
t.Fatal("expected aborted=false for idle")
|
|
}
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"thread/status/changed","params":{"status":{"type":"idle"}}}`)
|
|
|
|
if !turnDone {
|
|
t.Fatal("expected onTurnDone for idle status")
|
|
}
|
|
}
|
|
|
|
// Regression for #1181: subagent threads (e.g. memory consolidation)
|
|
// are multiplexed on the same stdio pipe. Their turn/completed must not
|
|
// terminate the main turn.
|
|
func TestCodexRawTurnCompletedFromSubagentIgnored(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
c.notificationProtocol = "raw"
|
|
c.threadID = "thr_main"
|
|
|
|
var doneCount int
|
|
c.onTurnDone = func(aborted bool) {
|
|
doneCount++
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"turn/completed","params":{"threadId":"thr_subagent","turn":{"id":"turn-sub","status":"completed"}}}`)
|
|
|
|
if doneCount != 0 {
|
|
t.Fatalf("subagent turn/completed must not trigger onTurnDone, got %d calls", doneCount)
|
|
}
|
|
|
|
// Sanity check: a matching threadId still drives completion.
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"turn/completed","params":{"threadId":"thr_main","turn":{"id":"turn-main","status":"completed"}}}`)
|
|
if doneCount != 1 {
|
|
t.Fatalf("matching threadId should trigger onTurnDone exactly once, got %d", doneCount)
|
|
}
|
|
}
|
|
|
|
// Regression for #1181: subagent agentMessage/final_answer must not
|
|
// trigger turn completion or leak text into the main output stream.
|
|
func TestCodexRawItemAgentMessageFinalAnswerFromSubagentIgnored(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
c.notificationProtocol = "raw"
|
|
c.threadID = "thr_main"
|
|
c.turnStarted = true
|
|
|
|
var messages []Message
|
|
var doneCount int
|
|
c.onMessage = func(msg Message) {
|
|
messages = append(messages, msg)
|
|
}
|
|
c.onTurnDone = func(aborted bool) {
|
|
doneCount++
|
|
}
|
|
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"item/completed","params":{"threadId":"thr_subagent","item":{"type":"agentMessage","id":"sub-1","text":"subagent leakage","phase":"final_answer"}}}`)
|
|
|
|
if len(messages) != 0 {
|
|
t.Fatalf("subagent text must not leak into output builder, got %+v", messages)
|
|
}
|
|
if doneCount != 0 {
|
|
t.Fatalf("subagent final_answer must not trigger onTurnDone, got %d calls", doneCount)
|
|
}
|
|
}
|
|
|
|
func TestCodexCloseAllPending(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
|
|
pr1 := &pendingRPC{ch: make(chan rpcResult, 1), method: "m1"}
|
|
pr2 := &pendingRPC{ch: make(chan rpcResult, 1), method: "m2"}
|
|
c.mu.Lock()
|
|
c.pending[1] = pr1
|
|
c.pending[2] = pr2
|
|
c.mu.Unlock()
|
|
|
|
c.closeAllPending(fmt.Errorf("test error"))
|
|
|
|
r1 := <-pr1.ch
|
|
if r1.err == nil {
|
|
t.Fatal("expected error for pending 1")
|
|
}
|
|
r2 := <-pr2.ch
|
|
if r2.err == nil {
|
|
t.Fatal("expected error for pending 2")
|
|
}
|
|
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
if len(c.pending) != 0 {
|
|
t.Fatalf("expected empty pending map, got %d", len(c.pending))
|
|
}
|
|
}
|
|
|
|
func TestCodexHandleInvalidJSON(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
// Should not panic
|
|
c.handleLine("not json at all")
|
|
c.handleLine("")
|
|
c.handleLine("{}")
|
|
}
|
|
|
|
func TestExtractThreadID(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
data := json.RawMessage(`{"thread":{"id":"t-123"}}`)
|
|
got := extractThreadID(data)
|
|
if got != "t-123" {
|
|
t.Fatalf("expected t-123, got %q", got)
|
|
}
|
|
}
|
|
|
|
func TestExtractThreadIDMissing(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
got := extractThreadID(json.RawMessage(`{}`))
|
|
if got != "" {
|
|
t.Fatalf("expected empty, got %q", got)
|
|
}
|
|
}
|
|
|
|
func TestExtractNestedString(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
m := map[string]any{
|
|
"a": map[string]any{
|
|
"b": "value",
|
|
},
|
|
}
|
|
got := extractNestedString(m, "a", "b")
|
|
if got != "value" {
|
|
t.Fatalf("expected 'value', got %q", got)
|
|
}
|
|
}
|
|
|
|
func TestExtractNestedStringMissingKey(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
m := map[string]any{"a": "flat"}
|
|
got := extractNestedString(m, "a", "b")
|
|
if got != "" {
|
|
t.Fatalf("expected empty, got %q", got)
|
|
}
|
|
}
|
|
|
|
func TestNilIfEmpty(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
if nilIfEmpty("") != nil {
|
|
t.Fatal("expected nil for empty string")
|
|
}
|
|
if nilIfEmpty("hello") != "hello" {
|
|
t.Fatal("expected 'hello'")
|
|
}
|
|
}
|
|
|
|
// runRPCScript feeds JSON-RPC responses back to the codexClient by matching
|
|
// each method call written to stdin against the script, and emitting the
|
|
// scripted response via c.handleLine. It returns once all scripted calls have
|
|
// been served.
|
|
type rpcResponse struct {
|
|
method string // expected request method
|
|
result json.RawMessage // success result body (mutually exclusive with errMsg)
|
|
errMsg string // non-empty → respond with JSON-RPC error object
|
|
errCode int // JSON-RPC error code when errMsg is set
|
|
assertFn func(t *testing.T, params map[string]any)
|
|
}
|
|
|
|
// drainRPCScript spins up a goroutine that watches fs.Lines() for new outbound
|
|
// requests and, for each one, injects the scripted response via c.handleLine.
|
|
// It returns a stop function that blocks until the script is exhausted or the
|
|
// test terminates.
|
|
func drainRPCScript(t *testing.T, c *codexClient, fs *fakeStdin, script []rpcResponse) func() {
|
|
t.Helper()
|
|
|
|
done := make(chan struct{})
|
|
go func() {
|
|
defer close(done)
|
|
seen := 0
|
|
deadline := time.Now().Add(2 * time.Second)
|
|
for seen < len(script) {
|
|
lines := fs.Lines()
|
|
for seen < len(lines) && seen < len(script) {
|
|
var req struct {
|
|
ID int `json:"id"`
|
|
Method string `json:"method"`
|
|
Params json.RawMessage `json:"params"`
|
|
}
|
|
if err := json.Unmarshal([]byte(lines[seen]), &req); err != nil {
|
|
t.Errorf("drainRPCScript: unmarshal request %d: %v", seen, err)
|
|
return
|
|
}
|
|
expected := script[seen]
|
|
if req.Method != expected.method {
|
|
t.Errorf("drainRPCScript: call %d method = %q, want %q", seen, req.Method, expected.method)
|
|
return
|
|
}
|
|
if expected.assertFn != nil {
|
|
var params map[string]any
|
|
_ = json.Unmarshal(req.Params, ¶ms)
|
|
expected.assertFn(t, params)
|
|
}
|
|
var resp string
|
|
if expected.errMsg != "" {
|
|
resp = fmt.Sprintf(`{"jsonrpc":"2.0","id":%d,"error":{"code":%d,"message":%q}}`, req.ID, expected.errCode, expected.errMsg)
|
|
} else {
|
|
resp = fmt.Sprintf(`{"jsonrpc":"2.0","id":%d,"result":%s}`, req.ID, string(expected.result))
|
|
}
|
|
c.handleLine(resp)
|
|
seen++
|
|
}
|
|
if seen < len(script) {
|
|
if time.Now().After(deadline) {
|
|
t.Errorf("drainRPCScript: timed out after %d/%d responses", seen, len(script))
|
|
return
|
|
}
|
|
time.Sleep(5 * time.Millisecond)
|
|
}
|
|
}
|
|
}()
|
|
|
|
return func() {
|
|
select {
|
|
case <-done:
|
|
case <-time.After(3 * time.Second):
|
|
t.Fatal("drainRPCScript did not finish")
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestCodexStartOrResumeThreadStartsFresh(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, fs, _ := newTestCodexClient(t)
|
|
|
|
wait := drainRPCScript(t, c, fs, []rpcResponse{
|
|
{
|
|
method: "thread/start",
|
|
result: json.RawMessage(`{"thread":{"id":"thr_fresh"}}`),
|
|
assertFn: func(t *testing.T, params map[string]any) {
|
|
if params["cwd"] != "/work" {
|
|
t.Errorf("cwd = %v, want /work", params["cwd"])
|
|
}
|
|
if params["persistExtendedHistory"] != true {
|
|
t.Error("expected persistExtendedHistory=true on thread/start")
|
|
}
|
|
},
|
|
},
|
|
})
|
|
defer wait()
|
|
|
|
threadID, resumed, err := c.startOrResumeThread(context.Background(), ExecOptions{Cwd: "/work"}, slog.Default())
|
|
if err != nil {
|
|
t.Fatalf("startOrResumeThread: %v", err)
|
|
}
|
|
if threadID != "thr_fresh" {
|
|
t.Errorf("threadID = %q, want thr_fresh", threadID)
|
|
}
|
|
if resumed {
|
|
t.Error("resumed should be false when no prior session is provided")
|
|
}
|
|
}
|
|
|
|
func TestCodexStartOrResumeThreadSetsNameOnFreshThread(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, fs, _ := newTestCodexClient(t)
|
|
|
|
wait := drainRPCScript(t, c, fs, []rpcResponse{
|
|
{
|
|
method: "thread/start",
|
|
result: json.RawMessage(`{"thread":{"id":"thr_named"}}`),
|
|
},
|
|
{
|
|
method: "thread/name/set",
|
|
result: json.RawMessage(`{}`),
|
|
assertFn: func(t *testing.T, params map[string]any) {
|
|
if params["threadId"] != "thr_named" {
|
|
t.Errorf("threadId = %v, want thr_named", params["threadId"])
|
|
}
|
|
if params["name"] != "Review GitHub issue #3843" {
|
|
t.Errorf("name = %v, want semantic title", params["name"])
|
|
}
|
|
},
|
|
},
|
|
})
|
|
defer wait()
|
|
|
|
threadID, resumed, err := c.startOrResumeThread(
|
|
context.Background(),
|
|
ExecOptions{ThreadName: "Review GitHub issue #3843"},
|
|
slog.Default(),
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("startOrResumeThread: %v", err)
|
|
}
|
|
if threadID != "thr_named" {
|
|
t.Errorf("threadID = %q, want thr_named", threadID)
|
|
}
|
|
if resumed {
|
|
t.Error("resumed should be false when no prior session is provided")
|
|
}
|
|
}
|
|
|
|
func TestCodexStartOrResumeThreadNameFailureDoesNotBlock(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, fs, _ := newTestCodexClient(t)
|
|
|
|
wait := drainRPCScript(t, c, fs, []rpcResponse{
|
|
{
|
|
method: "thread/start",
|
|
result: json.RawMessage(`{"thread":{"id":"thr_named"}}`),
|
|
},
|
|
{
|
|
method: "thread/name/set",
|
|
errMsg: "unsupported method",
|
|
errCode: -32601,
|
|
},
|
|
})
|
|
defer wait()
|
|
|
|
threadID, resumed, err := c.startOrResumeThread(
|
|
context.Background(),
|
|
ExecOptions{ThreadName: "Semantic task title"},
|
|
slog.Default(),
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("startOrResumeThread should continue after name failure: %v", err)
|
|
}
|
|
if threadID != "thr_named" {
|
|
t.Errorf("threadID = %q, want thr_named", threadID)
|
|
}
|
|
if resumed {
|
|
t.Error("resumed should be false when no prior session is provided")
|
|
}
|
|
}
|
|
|
|
func TestCodexStartOrResumeThreadResumesPriorThread(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, fs, _ := newTestCodexClient(t)
|
|
|
|
wait := drainRPCScript(t, c, fs, []rpcResponse{
|
|
{
|
|
method: "thread/resume",
|
|
result: json.RawMessage(`{"thread":{"id":"thr_prior"}}`),
|
|
assertFn: func(t *testing.T, params map[string]any) {
|
|
if params["threadId"] != "thr_prior" {
|
|
t.Errorf("threadId = %v, want thr_prior", params["threadId"])
|
|
}
|
|
if params["cwd"] != "/work" {
|
|
t.Errorf("cwd = %v, want /work", params["cwd"])
|
|
}
|
|
},
|
|
},
|
|
})
|
|
defer wait()
|
|
|
|
threadID, resumed, err := c.startOrResumeThread(
|
|
context.Background(),
|
|
ExecOptions{Cwd: "/work", ResumeSessionID: "thr_prior"},
|
|
slog.Default(),
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("startOrResumeThread: %v", err)
|
|
}
|
|
if threadID != "thr_prior" {
|
|
t.Errorf("threadID = %q, want thr_prior", threadID)
|
|
}
|
|
if !resumed {
|
|
t.Error("expected resumed=true when thread/resume succeeded")
|
|
}
|
|
}
|
|
|
|
func TestCodexStartOrResumeThreadFallsBackOnResumeError(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, fs, _ := newTestCodexClient(t)
|
|
|
|
wait := drainRPCScript(t, c, fs, []rpcResponse{
|
|
{
|
|
method: "thread/resume",
|
|
errMsg: "unknown thread",
|
|
errCode: -32602,
|
|
},
|
|
{
|
|
method: "thread/start",
|
|
result: json.RawMessage(`{"thread":{"id":"thr_new"}}`),
|
|
},
|
|
})
|
|
defer wait()
|
|
|
|
threadID, resumed, err := c.startOrResumeThread(
|
|
context.Background(),
|
|
ExecOptions{Cwd: "/work", ResumeSessionID: "thr_stale"},
|
|
slog.Default(),
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("startOrResumeThread: %v", err)
|
|
}
|
|
if threadID != "thr_new" {
|
|
t.Errorf("threadID = %q, want thr_new (fresh thread after fallback)", threadID)
|
|
}
|
|
if resumed {
|
|
t.Error("expected resumed=false after falling back to thread/start")
|
|
}
|
|
}
|
|
|
|
func TestCodexStartOrResumeThreadFallsBackWhenResumeReturnsNoID(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, fs, _ := newTestCodexClient(t)
|
|
|
|
wait := drainRPCScript(t, c, fs, []rpcResponse{
|
|
{
|
|
method: "thread/resume",
|
|
result: json.RawMessage(`{"thread":{}}`),
|
|
},
|
|
{
|
|
method: "thread/start",
|
|
result: json.RawMessage(`{"thread":{"id":"thr_new"}}`),
|
|
},
|
|
})
|
|
defer wait()
|
|
|
|
threadID, resumed, err := c.startOrResumeThread(
|
|
context.Background(),
|
|
ExecOptions{ResumeSessionID: "thr_prior"},
|
|
slog.Default(),
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("startOrResumeThread: %v", err)
|
|
}
|
|
if threadID != "thr_new" {
|
|
t.Errorf("threadID = %q, want thr_new", threadID)
|
|
}
|
|
if resumed {
|
|
t.Error("expected resumed=false when resume yielded no thread ID")
|
|
}
|
|
}
|
|
|
|
func TestCodexStartOrResumeThreadStartFailureSurfaces(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, fs, _ := newTestCodexClient(t)
|
|
|
|
wait := drainRPCScript(t, c, fs, []rpcResponse{
|
|
{
|
|
method: "thread/start",
|
|
errMsg: "boom",
|
|
errCode: -32000,
|
|
},
|
|
})
|
|
defer wait()
|
|
|
|
_, _, err := c.startOrResumeThread(context.Background(), ExecOptions{}, slog.Default())
|
|
if err == nil {
|
|
t.Fatal("expected error when thread/start fails")
|
|
}
|
|
if !strings.Contains(err.Error(), "thread/start") {
|
|
t.Errorf("error should mention thread/start, got %v", err)
|
|
}
|
|
}
|
|
|
|
func TestCodexProtocolDetectionLegacyBlocksRaw(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
c, _, _ := newTestCodexClient(t)
|
|
|
|
var messages []Message
|
|
c.onMessage = func(msg Message) {
|
|
messages = append(messages, msg)
|
|
}
|
|
|
|
// First: receive a legacy event -> locks to "legacy"
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"codex/event","params":{"msg":{"type":"task_started"}}}`)
|
|
|
|
if c.notificationProtocol != "legacy" {
|
|
t.Fatalf("expected legacy, got %q", c.notificationProtocol)
|
|
}
|
|
|
|
// Now send a raw notification -> should be ignored
|
|
messagesBefore := len(messages)
|
|
c.handleLine(`{"jsonrpc":"2.0","method":"turn/started","params":{"turn":{"id":"turn-1"}}}`)
|
|
|
|
if len(messages) != messagesBefore {
|
|
t.Fatal("raw notification should be ignored in legacy mode")
|
|
}
|
|
}
|
|
|
|
func TestStderrTailForwardsAndCapturesTail(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var sink strings.Builder
|
|
s := newStderrTail(&sink, 16)
|
|
|
|
if _, err := s.Write([]byte("first line\n")); err != nil {
|
|
t.Fatalf("write: %v", err)
|
|
}
|
|
if _, err := s.Write([]byte("error: unexpected argument '-m' found\n")); err != nil {
|
|
t.Fatalf("write: %v", err)
|
|
}
|
|
|
|
// Inner writer sees every byte verbatim.
|
|
want := "first line\nerror: unexpected argument '-m' found\n"
|
|
if sink.String() != want {
|
|
t.Errorf("inner sink: got %q, want %q", sink.String(), want)
|
|
}
|
|
|
|
// Tail is bounded by max; earlier bytes get dropped.
|
|
tail := s.Tail()
|
|
if len(tail) > 16 {
|
|
t.Errorf("tail exceeds bound: got %d bytes (%q)", len(tail), tail)
|
|
}
|
|
if tail == "" {
|
|
t.Fatal("expected non-empty tail")
|
|
}
|
|
// Tail must be a suffix of what was written (whitespace-trimmed).
|
|
if !strings.HasSuffix(strings.TrimSpace(want), tail) {
|
|
t.Errorf("tail %q is not a suffix of %q", tail, want)
|
|
}
|
|
}
|
|
|
|
func TestStderrTailEmptyWhenNothingWritten(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var sink strings.Builder
|
|
s := newStderrTail(&sink, 16)
|
|
if tail := s.Tail(); tail != "" {
|
|
t.Errorf("expected empty tail, got %q", tail)
|
|
}
|
|
}
|
|
|
|
func TestCodexExecuteSurfacesStderrWhenChildExitsEarly(t *testing.T) {
|
|
t.Parallel()
|
|
if runtime.GOOS == "windows" {
|
|
t.Skip("shell-script fixture is POSIX-only")
|
|
}
|
|
|
|
// Fake codex binary: writes a canonical CLI rejection line to stderr and
|
|
// exits before ever responding to `initialize`, mimicking what real codex
|
|
// does when `app-server` gets a flag it doesn't accept. This exercises the
|
|
// real os/exec stderr pipe-copy goroutine — without drainAndWait joining
|
|
// cmd.Wait() before sampling stderrBuf.Tail(), Result.Error would come
|
|
// back empty or truncated here.
|
|
fakePath := filepath.Join(t.TempDir(), "codex")
|
|
script := "#!/bin/sh\n" +
|
|
"echo \"error: unexpected argument '-m' found\" >&2\n" +
|
|
"exit 2\n"
|
|
writeTestExecutable(t, fakePath, []byte(script))
|
|
|
|
backend, err := New("codex", Config{ExecutablePath: fakePath, Logger: slog.Default()})
|
|
if err != nil {
|
|
t.Fatalf("new codex backend: %v", err)
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
session, err := backend.Execute(ctx, "prompt-ignored", ExecOptions{Timeout: 5 * time.Second})
|
|
if err != nil {
|
|
t.Fatalf("execute: %v", err)
|
|
}
|
|
// Drain message stream so the lifecycle goroutine can progress.
|
|
go func() {
|
|
for range session.Messages {
|
|
}
|
|
}()
|
|
|
|
select {
|
|
case result, ok := <-session.Result:
|
|
if !ok {
|
|
t.Fatal("result channel closed without a value")
|
|
}
|
|
if result.Status != "failed" {
|
|
t.Fatalf("expected status=failed, got %q (error=%q)", result.Status, result.Error)
|
|
}
|
|
if !strings.Contains(result.Error, "codex initialize failed") {
|
|
t.Fatalf("expected error to mention initialize failure, got %q", result.Error)
|
|
}
|
|
if !strings.Contains(result.Error, "unexpected argument '-m' found") {
|
|
t.Fatalf("expected error to include stderr hint, got %q", result.Error)
|
|
}
|
|
case <-time.After(10 * time.Second):
|
|
t.Fatal("timeout waiting for result")
|
|
}
|
|
}
|
|
|
|
func TestCodexExecuteTimesOutWhenTurnStopsAfterToolResult(t *testing.T) {
|
|
t.Parallel()
|
|
if runtime.GOOS == "windows" {
|
|
t.Skip("shell-script fixture is POSIX-only")
|
|
}
|
|
|
|
fakePath := writeFakeCodexAppServer(t, ""+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":1,"result":{}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":2,"result":{"thread":{"id":"thr-stale"}}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":3,"result":{}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"turn/started","params":{"threadId":"thr-stale","turn":{"id":"turn-stale"}}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"item/started","params":{"threadId":"thr-stale","item":{"type":"commandExecution","id":"cmd-1","command":"git status"}}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"item/completed","params":{"threadId":"thr-stale","item":{"type":"commandExecution","id":"cmd-1","aggregatedOutput":"clean"}}}'`+"\n"+
|
|
`sleep 5`+"\n")
|
|
|
|
result := executeFakeCodex(t, fakePath, ExecOptions{
|
|
Timeout: 5 * time.Second,
|
|
SemanticInactivityTimeout: 100 * time.Millisecond,
|
|
})
|
|
if result.Status != "timeout" {
|
|
t.Fatalf("expected timeout, got status=%q error=%q", result.Status, result.Error)
|
|
}
|
|
if !strings.Contains(result.Error, "semantic inactivity") {
|
|
t.Fatalf("expected semantic inactivity error, got %q", result.Error)
|
|
}
|
|
if result.SessionID != "thr-stale" {
|
|
t.Fatalf("expected session id to be preserved, got %q", result.SessionID)
|
|
}
|
|
}
|
|
|
|
func TestCodexExecuteFirstTurnNoProgressSurfacesDiagnostics(t *testing.T) {
|
|
t.Parallel()
|
|
if runtime.GOOS == "windows" {
|
|
t.Skip("shell-script fixture is POSIX-only")
|
|
}
|
|
|
|
fakePath := writeFakeCodexAppServer(t, ""+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":1,"result":{}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":2,"result":{"thread":{"id":"thr-stuck"}}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":3,"result":{}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"turn/started","params":{"threadId":"thr-stuck","turn":{"id":"turn-stuck"}}}'`+"\n"+
|
|
`echo 'ERROR codex_models_manager::manager: failed to refresh available models: timeout waiting for child process to exit' >&2`+"\n"+
|
|
`sleep 5`+"\n")
|
|
|
|
result := executeFakeCodex(t, fakePath, ExecOptions{
|
|
Timeout: 5 * time.Second,
|
|
SemanticInactivityTimeout: 100 * time.Millisecond,
|
|
})
|
|
if result.Status != "timeout" {
|
|
t.Fatalf("expected timeout, got status=%q error=%q", result.Status, result.Error)
|
|
}
|
|
for _, want := range []string{
|
|
CodexFirstTurnNoProgressMarker,
|
|
"thr-stuck",
|
|
"turn-stuck",
|
|
`model="default(empty)"`,
|
|
`codex_version="codex-cli 0.0.0-test"`,
|
|
"model catalog refresh timed out",
|
|
"codex stderr:",
|
|
codexModelCatalogRefreshTimeoutSignal,
|
|
} {
|
|
if !strings.Contains(result.Error, want) {
|
|
t.Fatalf("expected error to contain %q, got %q", want, result.Error)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestCodexExecuteFirstTurnRetryErrorDoesNotSatisfyProgress(t *testing.T) {
|
|
t.Parallel()
|
|
if runtime.GOOS == "windows" {
|
|
t.Skip("shell-script fixture is POSIX-only")
|
|
}
|
|
|
|
fakePath := writeFakeCodexAppServer(t, ""+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":1,"result":{}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":2,"result":{"thread":{"id":"thr-retry"}}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":3,"result":{}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"turn/started","params":{"threadId":"thr-retry","turn":{"id":"turn-retry"}}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"error","params":{"threadId":"thr-retry","error":{"message":"temporary reconnect"},"willRetry":true}}'`+"\n"+
|
|
`sleep 5`+"\n")
|
|
|
|
result := executeFakeCodex(t, fakePath, ExecOptions{
|
|
Timeout: 5 * time.Second,
|
|
SemanticInactivityTimeout: 200 * time.Millisecond,
|
|
})
|
|
if result.Status != "timeout" {
|
|
t.Fatalf("expected timeout, got status=%q error=%q", result.Status, result.Error)
|
|
}
|
|
if !strings.Contains(result.Error, CodexFirstTurnNoProgressMarker) {
|
|
t.Fatalf("expected first-turn no-progress error, got %q", result.Error)
|
|
}
|
|
if strings.Contains(result.Error, CodexSemanticInactivityMarker) {
|
|
t.Fatalf("retrying error should not demote first-turn timeout to semantic inactivity, got %q", result.Error)
|
|
}
|
|
}
|
|
|
|
func TestCodexExecuteLegacyFirstTurnMessageSatisfiesProgress(t *testing.T) {
|
|
t.Parallel()
|
|
if runtime.GOOS == "windows" {
|
|
t.Skip("shell-script fixture is POSIX-only")
|
|
}
|
|
|
|
fakePath := writeFakeCodexAppServer(t, ""+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":1,"result":{}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":2,"result":{"thread":{"id":"thr-legacy"}}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":3,"result":{}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"codex/event","params":{"msg":{"type":"task_started"}}}'`+"\n"+
|
|
`sleep 0.05`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"codex/event","params":{"msg":{"type":"agent_message","message":"legacy alive"}}}'`+"\n"+
|
|
`sleep 0.07`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"codex/event","params":{"msg":{"type":"task_complete"}}}'`+"\n")
|
|
|
|
result := executeFakeCodex(t, fakePath, ExecOptions{
|
|
Timeout: 5 * time.Second,
|
|
SemanticInactivityTimeout: 100 * time.Millisecond,
|
|
})
|
|
if result.Status != "completed" {
|
|
t.Fatalf("expected completed, got status=%q error=%q", result.Status, result.Error)
|
|
}
|
|
if result.Output != "legacy alive" {
|
|
t.Fatalf("expected legacy output, got %q", result.Output)
|
|
}
|
|
}
|
|
|
|
func TestCodexExecuteSemanticInactivityAllowsContinuousMessages(t *testing.T) {
|
|
t.Parallel()
|
|
if runtime.GOOS == "windows" {
|
|
t.Skip("shell-script fixture is POSIX-only")
|
|
}
|
|
|
|
fakePath := writeFakeCodexAppServer(t, ""+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":1,"result":{}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":2,"result":{"thread":{"id":"thr-progress"}}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":3,"result":{}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"turn/started","params":{"threadId":"thr-progress","turn":{"id":"turn-progress"}}}'`+"\n"+
|
|
`sleep 0.05`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"item/completed","params":{"threadId":"thr-progress","item":{"type":"agentMessage","id":"msg-1","text":"still working"}}}'`+"\n"+
|
|
`sleep 0.05`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"item/completed","params":{"threadId":"thr-progress","item":{"type":"commandExecution","id":"cmd-1","aggregatedOutput":"ok"}}}'`+"\n"+
|
|
`sleep 0.05`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"turn/completed","params":{"threadId":"thr-progress","turn":{"id":"turn-progress","status":"completed"}}}'`+"\n")
|
|
|
|
result := executeFakeCodex(t, fakePath, ExecOptions{
|
|
Timeout: 5 * time.Second,
|
|
SemanticInactivityTimeout: 90 * time.Millisecond,
|
|
})
|
|
if result.Status != "completed" {
|
|
t.Fatalf("expected completed, got status=%q error=%q", result.Status, result.Error)
|
|
}
|
|
if !strings.Contains(result.Output, "still working") {
|
|
t.Fatalf("expected streamed text in output, got %q", result.Output)
|
|
}
|
|
}
|
|
|
|
func TestCodexExecuteSemanticInactivityAllowsContinuousDeltaProgress(t *testing.T) {
|
|
t.Parallel()
|
|
if runtime.GOOS == "windows" {
|
|
t.Skip("shell-script fixture is POSIX-only")
|
|
}
|
|
|
|
fakePath := writeFakeCodexAppServer(t, ""+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":1,"result":{}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":2,"result":{"thread":{"id":"thr-delta"}}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":3,"result":{}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"turn/started","params":{"threadId":"thr-delta","turn":{"id":"turn-delta"}}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"item/commandExecution/outputDelta","params":{"threadId":"thr-delta","item":{"type":"commandExecution","id":"cmd-1"},"delta":"line 1\n"}}'`+"\n"+
|
|
`sleep 0.05`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"item/agentMessage/delta","params":{"threadId":"thr-delta","item":{"type":"agentMessage","id":"msg-1"},"delta":"thinking"}}'`+"\n"+
|
|
`sleep 0.05`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"item/fileChange/outputDelta","params":{"threadId":"thr-delta","item":{"type":"fileChange","id":"patch-1"},"delta":"patched"}}'`+"\n"+
|
|
`sleep 0.05`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"item/mcpToolCall/progress","params":{"threadId":"thr-delta","item":{"type":"mcpToolCall","id":"mcp-1"},"progress":{"message":"still running"}}}'`+"\n"+
|
|
`sleep 0.05`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"turn/completed","params":{"threadId":"thr-delta","turn":{"id":"turn-delta","status":"completed"}}}'`+"\n")
|
|
|
|
result := executeFakeCodex(t, fakePath, ExecOptions{
|
|
Timeout: 5 * time.Second,
|
|
SemanticInactivityTimeout: 150 * time.Millisecond,
|
|
})
|
|
if result.Status != "completed" {
|
|
t.Fatalf("expected completed, got status=%q error=%q", result.Status, result.Error)
|
|
}
|
|
}
|
|
|
|
func TestCodexExecuteSemanticInactivityDoesNotAffectNormalTurnCompletion(t *testing.T) {
|
|
t.Parallel()
|
|
if runtime.GOOS == "windows" {
|
|
t.Skip("shell-script fixture is POSIX-only")
|
|
}
|
|
|
|
fakePath := writeFakeCodexAppServer(t, ""+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":1,"result":{}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":2,"result":{"thread":{"id":"thr-normal"}}}'`+"\n"+
|
|
`read line`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","id":3,"result":{}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"turn/started","params":{"threadId":"thr-normal","turn":{"id":"turn-normal"}}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"item/completed","params":{"threadId":"thr-normal","item":{"type":"agentMessage","id":"msg-1","text":"Done"}}}'`+"\n"+
|
|
`echo '{"jsonrpc":"2.0","method":"turn/completed","params":{"threadId":"thr-normal","turn":{"id":"turn-normal","status":"completed"}}}'`+"\n")
|
|
|
|
result := executeFakeCodex(t, fakePath, ExecOptions{
|
|
Timeout: 5 * time.Second,
|
|
SemanticInactivityTimeout: 100 * time.Millisecond,
|
|
})
|
|
if result.Status != "completed" {
|
|
t.Fatalf("expected completed, got status=%q error=%q", result.Status, result.Error)
|
|
}
|
|
if result.Output != "Done" {
|
|
t.Fatalf("expected output Done, got %q", result.Output)
|
|
}
|
|
}
|
|
|
|
func writeFakeCodexAppServer(t *testing.T, body string) string {
|
|
t.Helper()
|
|
fakePath := filepath.Join(t.TempDir(), "codex")
|
|
script := "#!/bin/sh\n" +
|
|
`if [ "$1" = "--version" ]; then echo "codex-cli 0.0.0-test"; exit 0; fi` + "\n" +
|
|
body
|
|
writeTestExecutable(t, fakePath, []byte(script))
|
|
return fakePath
|
|
}
|
|
|
|
func executeFakeCodex(t *testing.T, fakePath string, opts ExecOptions) Result {
|
|
t.Helper()
|
|
backend, err := New("codex", Config{ExecutablePath: fakePath, Logger: slog.Default()})
|
|
if err != nil {
|
|
t.Fatalf("new codex backend: %v", err)
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
defer cancel()
|
|
session, err := backend.Execute(ctx, "prompt", opts)
|
|
if err != nil {
|
|
t.Fatalf("execute: %v", err)
|
|
}
|
|
go func() {
|
|
for range session.Messages {
|
|
}
|
|
}()
|
|
select {
|
|
case result, ok := <-session.Result:
|
|
if !ok {
|
|
t.Fatal("result channel closed without a value")
|
|
}
|
|
return result
|
|
case <-time.After(10 * time.Second):
|
|
t.Fatal("timeout waiting for result")
|
|
return Result{}
|
|
}
|
|
}
|
|
|
|
func TestWithAgentStderrAppendsHint(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
if got := withAgentStderr("codex initialize failed: process exited", "codex", ""); got != "codex initialize failed: process exited" {
|
|
t.Errorf("empty tail should not modify msg, got %q", got)
|
|
}
|
|
msg := withAgentStderr("codex initialize failed: process exited", "codex", "unexpected argument '-m' found")
|
|
want := "codex initialize failed: process exited; codex stderr: unexpected argument '-m' found"
|
|
if msg != want {
|
|
t.Errorf("got %q, want %q", msg, want)
|
|
}
|
|
}
|
|
|
|
func TestBuildCodexArgsExtraArgsBeforeCustomArgsAndFiltersBoth(t *testing.T) {
|
|
args := buildCodexArgs(ExecOptions{
|
|
ExtraArgs: []string{"--listen", "tcp://evil", "--sandbox", "read-only"},
|
|
CustomArgs: []string{"--sandbox", "workspace-write", "--listen=bad"},
|
|
}, slog.Default())
|
|
joined := strings.Join(args, " ")
|
|
if strings.Contains(joined, "tcp://evil") || strings.Contains(joined, "--listen=bad") {
|
|
t.Fatalf("blocked args should be filtered from both layers: %v", args)
|
|
}
|
|
extraIdx, customIdx := -1, -1
|
|
for i := 0; i+1 < len(args); i++ {
|
|
if args[i] == "--sandbox" && args[i+1] == "read-only" {
|
|
extraIdx = i
|
|
}
|
|
if args[i] == "--sandbox" && args[i+1] == "workspace-write" {
|
|
customIdx = i
|
|
}
|
|
}
|
|
if extraIdx == -1 || customIdx == -1 || extraIdx > customIdx {
|
|
t.Fatalf("expected extra args before custom args, got %v", args)
|
|
}
|
|
}
|
|
|
|
func TestBuildCodexArgsDoesNotLeakMcpToArgv(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// MCP config is materialised into $CODEX_HOME/config.toml, never into
|
|
// argv — otherwise `mcp_servers.<id>.env` secrets would land in
|
|
// `ps aux` output and in the daemon's `agent command` log line. This
|
|
// test pins the contract: even with a non-empty mcp_config, no -c /
|
|
// --config / mcp_servers.* entry shows up in buildCodexArgs output.
|
|
raw := json.RawMessage(`{"mcpServers":{"fetch":{"command":"uvx","env":{"SECRET":"hunter2"}}}}`)
|
|
args := buildCodexArgs(ExecOptions{
|
|
McpConfig: raw,
|
|
CustomArgs: []string{"-c", `model="o3"`},
|
|
}, slog.Default())
|
|
|
|
joined := strings.Join(args, " ")
|
|
if strings.Contains(joined, "mcp_servers") {
|
|
t.Fatalf("argv must not mention mcp_servers (now lives in config.toml), got %v", args)
|
|
}
|
|
if strings.Contains(joined, "hunter2") {
|
|
t.Fatalf("argv must not leak secret env values, got %v", args)
|
|
}
|
|
for i := 0; i+1 < len(args); i++ {
|
|
if (args[i] == "-c" || args[i] == "--config") && strings.HasPrefix(args[i+1], "mcp_servers.") {
|
|
t.Fatalf("expected no -c mcp_servers.* in argv, got %v", args)
|
|
}
|
|
}
|
|
// Legitimate non-mcp `-c model=…` from custom_args must still survive.
|
|
foundModel := false
|
|
for i := 0; i+1 < len(args); i++ {
|
|
if args[i] == "-c" && args[i+1] == `model="o3"` {
|
|
foundModel = true
|
|
}
|
|
}
|
|
if !foundModel {
|
|
t.Fatalf("expected non-mcp -c override to be preserved, got %v", args)
|
|
}
|
|
}
|
|
|
|
func TestCodexExecuteFailsClosedWhenMcpConfigInvalid(t *testing.T) {
|
|
t.Parallel()
|
|
if runtime.GOOS == "windows" {
|
|
t.Skip("shell-script fixture is POSIX-only")
|
|
}
|
|
|
|
// When the admin has a managed mcp_config but the JSON is malformed
|
|
// (or any other reason ensureCodexMcpConfig fails), fail closed
|
|
// instead of silently launching with the user's global MCP — that
|
|
// would look indistinguishable from "the saved config was applied"
|
|
// and is exactly the surprise the MCP Tab is supposed to remove.
|
|
fakePath := writeFakeCodexAppServer(t, "exit 0\n")
|
|
|
|
codexHome := t.TempDir()
|
|
backend, err := New("codex", Config{
|
|
ExecutablePath: fakePath,
|
|
Logger: slog.Default(),
|
|
Env: map[string]string{"CODEX_HOME": codexHome},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("new codex backend: %v", err)
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
_, err = backend.Execute(ctx, "prompt", ExecOptions{
|
|
Timeout: 2 * time.Second,
|
|
McpConfig: json.RawMessage(`not json`),
|
|
})
|
|
if err == nil {
|
|
t.Fatal("expected Execute to fail closed on malformed mcp_config, got nil error")
|
|
}
|
|
if !strings.Contains(err.Error(), "mcp_config") {
|
|
t.Fatalf("expected error to mention mcp_config, got %q", err)
|
|
}
|
|
}
|
|
|
|
func TestCodexExecuteFailsClosedWhenManagedMcpButNoCodexHome(t *testing.T) {
|
|
t.Parallel()
|
|
if runtime.GOOS == "windows" {
|
|
t.Skip("shell-script fixture is POSIX-only")
|
|
}
|
|
|
|
// Managed mcp_config saved but no CODEX_HOME to anchor it — same
|
|
// fail-closed reasoning: silently launching would inherit whatever
|
|
// MCP setup the host user has, which is the wrong shape of failure.
|
|
fakePath := writeFakeCodexAppServer(t, "exit 0\n")
|
|
|
|
backend, err := New("codex", Config{
|
|
ExecutablePath: fakePath,
|
|
Logger: slog.Default(),
|
|
Env: map[string]string{}, // no CODEX_HOME
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("new codex backend: %v", err)
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
_, err = backend.Execute(ctx, "prompt", ExecOptions{
|
|
Timeout: 2 * time.Second,
|
|
McpConfig: json.RawMessage(`{"mcpServers":{"fetch":{"command":"uvx"}}}`),
|
|
})
|
|
if err == nil {
|
|
t.Fatal("expected Execute to fail closed when managed mcp_config but no CODEX_HOME, got nil error")
|
|
}
|
|
if !strings.Contains(err.Error(), "CODEX_HOME") {
|
|
t.Fatalf("expected error to mention CODEX_HOME, got %q", err)
|
|
}
|
|
}
|
|
|
|
func TestBuildCodexArgsPreservesCustomMcpOverridesWhenUnmanaged(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// Existing Codex agents may rely on `custom_args: ["-c", "mcp_servers.…"]`
|
|
// because before MUL-2764 there was no MCP Tab. When the agent has
|
|
// no managed mcp_config saved, the daemon must leave those entries
|
|
// alone — silently dropping them would break the only way those
|
|
// users had to configure MCP. We only claim the `mcp_servers`
|
|
// namespace once an admin opts in via the MCP Tab.
|
|
args := buildCodexArgs(ExecOptions{
|
|
CustomArgs: []string{"-c", `mcp_servers.fetch={ command = "uvx" }`, "-c", `model="o3"`},
|
|
}, slog.Default())
|
|
foundMcp := false
|
|
for i := 0; i+1 < len(args); i++ {
|
|
if args[i] == "-c" && strings.HasPrefix(args[i+1], "mcp_servers.") {
|
|
foundMcp = true
|
|
}
|
|
}
|
|
if !foundMcp {
|
|
t.Fatalf("custom_args mcp_servers entry must survive when agent has no managed mcp_config, got %v", args)
|
|
}
|
|
}
|
|
|
|
func TestBuildCodexArgsDropsCustomMcpOverridesWhenManaged(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// Once an admin saves a managed mcp_config, the daemon owns
|
|
// the `mcp_servers` namespace via $CODEX_HOME/config.toml. Codex's
|
|
// `-c` is last-wins, so any `-c mcp_servers.…` left in custom_args
|
|
// would silently shadow the saved managed entries.
|
|
raw := json.RawMessage(`{"mcpServers":{"managed":{"command":"managed-cmd"}}}`)
|
|
args := buildCodexArgs(ExecOptions{
|
|
McpConfig: raw,
|
|
CustomArgs: []string{"-c", `mcp_servers.fetch={ command = "evil" }`, "-c", `model="o3"`},
|
|
}, slog.Default())
|
|
for i := 0; i+1 < len(args); i++ {
|
|
if args[i] == "-c" && strings.HasPrefix(args[i+1], "mcp_servers.") {
|
|
t.Fatalf("custom_args mcp_servers must be filtered when managed mcp_config is present, got %v", args)
|
|
}
|
|
}
|
|
// Unrelated -c key still passes through.
|
|
foundModel := false
|
|
for i := 0; i+1 < len(args); i++ {
|
|
if args[i] == "-c" && args[i+1] == `model="o3"` {
|
|
foundModel = true
|
|
}
|
|
}
|
|
if !foundModel {
|
|
t.Fatalf("unrelated -c override must still survive, got %v", args)
|
|
}
|
|
}
|
|
|
|
func TestFilterCodexCustomConfigOverridesDropsMcpServers(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// Codex `-c` is last-wins, so a user-supplied `-c mcp_servers.…` in
|
|
// custom_args would silently shadow whatever the MCP Tab wrote into
|
|
// CODEX_HOME/config.toml. Verify that all spellings of the override
|
|
// get dropped, while unrelated `-c` keys pass through.
|
|
cases := []struct {
|
|
name string
|
|
in []string
|
|
want []string
|
|
}{
|
|
{
|
|
name: "separated -c mcp_servers.fetch=…",
|
|
in: []string{"-c", `mcp_servers.fetch={ command = "evil" }`, "-c", `model="o3"`},
|
|
want: []string{"-c", `model="o3"`},
|
|
},
|
|
{
|
|
name: "inline -c=mcp_servers.fetch=…",
|
|
in: []string{`-c=mcp_servers.fetch={ command = "evil" }`, "--listen=keep"},
|
|
want: []string{"--listen=keep"},
|
|
},
|
|
{
|
|
name: "long form --config mcp_servers.x.env.KEY=val",
|
|
in: []string{"--config", `mcp_servers.x.env.KEY="leak"`, "--config", `sandbox="workspace-write"`},
|
|
want: []string{"--config", `sandbox="workspace-write"`},
|
|
},
|
|
{
|
|
name: "passes through unrelated -c overrides",
|
|
in: []string{"-c", `model="o3"`, "-c", `sandbox.network_access=true`},
|
|
want: []string{"-c", `model="o3"`, "-c", `sandbox.network_access=true`},
|
|
},
|
|
{
|
|
name: "matches mcp_servers root assignment",
|
|
in: []string{"-c", `mcp_servers={fetch={command="evil"}}`, "-c", `model="o3"`},
|
|
want: []string{"-c", `model="o3"`},
|
|
},
|
|
}
|
|
for _, tc := range cases {
|
|
tc := tc
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
t.Parallel()
|
|
got := filterCodexCustomConfigOverrides(tc.in, slog.Default())
|
|
if !reflect.DeepEqual(got, tc.want) {
|
|
t.Fatalf("filterCodexCustomConfigOverrides(%v) = %v, want %v", tc.in, got, tc.want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestEnsureCodexMcpConfigEmptyClearsBlock(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// When agent.mcp_config is null/empty the managed block is removed
|
|
// from config.toml, but unrelated content (sandbox block, user-level
|
|
// `[mcp_servers.user]`) is left untouched.
|
|
tmp := filepath.Join(t.TempDir(), "config.toml")
|
|
initial := "sandbox_mode = \"workspace-write\"\n\n" +
|
|
multicaCodexMcpBeginMarker + "\n" +
|
|
"[mcp_servers.fetch]\ncommand = \"uvx\"\n" +
|
|
multicaCodexMcpEndMarker + "\n\n" +
|
|
"[mcp_servers.user_global]\ncommand = \"keep\"\n"
|
|
if err := os.WriteFile(tmp, []byte(initial), 0o600); err != nil {
|
|
t.Fatalf("seed config: %v", err)
|
|
}
|
|
|
|
if err := ensureCodexMcpConfig(tmp, nil, slog.Default()); err != nil {
|
|
t.Fatalf("ensure: %v", err)
|
|
}
|
|
data, err := os.ReadFile(tmp)
|
|
if err != nil {
|
|
t.Fatalf("read after: %v", err)
|
|
}
|
|
got := string(data)
|
|
if strings.Contains(got, multicaCodexMcpBeginMarker) {
|
|
t.Fatalf("managed block should be cleared, got:\n%s", got)
|
|
}
|
|
if !strings.Contains(got, "[mcp_servers.user_global]") {
|
|
t.Fatalf("user-defined mcp_servers should be left alone when agent has no mcp_config, got:\n%s", got)
|
|
}
|
|
if !strings.Contains(got, `sandbox_mode = "workspace-write"`) {
|
|
t.Fatalf("unrelated config preserved, got:\n%s", got)
|
|
}
|
|
}
|
|
|
|
func TestEnsureCodexMcpConfigWritesManagedBlock(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// A non-empty mcp_config writes one `[mcp_servers.<name>]` table per
|
|
// server, in stable alphabetical order, into the managed block. The
|
|
// file mode is 0o600 because env values may carry secrets.
|
|
tmp := filepath.Join(t.TempDir(), "config.toml")
|
|
if err := os.WriteFile(tmp, []byte("sandbox_mode = \"workspace-write\"\n"), 0o600); err != nil {
|
|
t.Fatalf("seed: %v", err)
|
|
}
|
|
|
|
raw := json.RawMessage(`{"mcpServers":{"zeta":{"command":"b"},"alpha":{"command":"a","env":{"K":"v"}}}}`)
|
|
if err := ensureCodexMcpConfig(tmp, raw, slog.Default()); err != nil {
|
|
t.Fatalf("ensure: %v", err)
|
|
}
|
|
data, err := os.ReadFile(tmp)
|
|
if err != nil {
|
|
t.Fatalf("read after: %v", err)
|
|
}
|
|
got := string(data)
|
|
|
|
if !strings.Contains(got, multicaCodexMcpBeginMarker) || !strings.Contains(got, multicaCodexMcpEndMarker) {
|
|
t.Fatalf("expected managed block markers, got:\n%s", got)
|
|
}
|
|
alphaIdx := strings.Index(got, "[mcp_servers.alpha]")
|
|
zetaIdx := strings.Index(got, "[mcp_servers.zeta]")
|
|
if alphaIdx == -1 || zetaIdx == -1 {
|
|
t.Fatalf("expected both server tables, got:\n%s", got)
|
|
}
|
|
if alphaIdx > zetaIdx {
|
|
t.Fatalf("expected alpha before zeta (alphabetical), got:\n%s", got)
|
|
}
|
|
for _, want := range []string{
|
|
`command = "a"`,
|
|
`env = { K = "v" }`,
|
|
`command = "b"`,
|
|
`sandbox_mode = "workspace-write"`, // unrelated user content preserved
|
|
} {
|
|
if !strings.Contains(got, want) {
|
|
t.Fatalf("expected %q in:\n%s", want, got)
|
|
}
|
|
}
|
|
|
|
fi, err := os.Stat(tmp)
|
|
if err != nil {
|
|
t.Fatalf("stat: %v", err)
|
|
}
|
|
if mode := fi.Mode().Perm(); mode != 0o600 {
|
|
t.Fatalf("expected mode 0o600 for secret-bearing config, got %o", mode)
|
|
}
|
|
}
|
|
|
|
func TestEnsureCodexMcpConfigForces0600OnPreexistingFile(t *testing.T) {
|
|
t.Parallel()
|
|
if runtime.GOOS == "windows" {
|
|
t.Skip("POSIX permissions only")
|
|
}
|
|
|
|
// `execenv.copyFile` seeds the per-task config.toml at 0o644. Once we
|
|
// add secret-bearing mcp_servers tables to it, the mode must drop to
|
|
// 0o600 — `os.WriteFile` alone keeps the existing mode, so the chmod
|
|
// is the part we need to pin.
|
|
tmp := filepath.Join(t.TempDir(), "config.toml")
|
|
if err := os.WriteFile(tmp, []byte("sandbox_mode = \"workspace-write\"\n"), 0o644); err != nil {
|
|
t.Fatalf("seed: %v", err)
|
|
}
|
|
|
|
raw := json.RawMessage(`{"mcpServers":{"fetch":{"command":"uvx","env":{"API_KEY":"secret"}}}}`)
|
|
if err := ensureCodexMcpConfig(tmp, raw, slog.Default()); err != nil {
|
|
t.Fatalf("ensure: %v", err)
|
|
}
|
|
fi, err := os.Stat(tmp)
|
|
if err != nil {
|
|
t.Fatalf("stat: %v", err)
|
|
}
|
|
if mode := fi.Mode().Perm(); mode != 0o600 {
|
|
t.Fatalf("expected 0o600 after overwrite of pre-existing 0o644 file, got %o", mode)
|
|
}
|
|
}
|
|
|
|
func TestEnsureCodexMcpConfigStripsUserMcpServersWhenManaged(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// When agent.mcp_config is non-empty, ALL user-defined `[mcp_servers.*]`
|
|
// tables (inherited from ~/.codex/config.toml) are stripped to avoid
|
|
// (a) TOML "table already exists" errors when names collide and (b) the
|
|
// user's global servers silently being mixed in with the strict
|
|
// agent-managed list. Sub-tables like `[mcp_servers.x.env]` are also
|
|
// dropped as part of their parent.
|
|
tmp := filepath.Join(t.TempDir(), "config.toml")
|
|
initial := "sandbox_mode = \"workspace-write\"\n\n" +
|
|
"[mcp_servers.global_fetch]\ncommand = \"uvx-old\"\n\n" +
|
|
"[mcp_servers.global_fetch.env]\nOLD_KEY = \"old\"\n\n" +
|
|
"[other_section]\nkeep_me = true\n"
|
|
if err := os.WriteFile(tmp, []byte(initial), 0o600); err != nil {
|
|
t.Fatalf("seed: %v", err)
|
|
}
|
|
|
|
raw := json.RawMessage(`{"mcpServers":{"new_server":{"command":"new"}}}`)
|
|
if err := ensureCodexMcpConfig(tmp, raw, slog.Default()); err != nil {
|
|
t.Fatalf("ensure: %v", err)
|
|
}
|
|
data, _ := os.ReadFile(tmp)
|
|
got := string(data)
|
|
|
|
if strings.Contains(got, "global_fetch") {
|
|
t.Fatalf("user mcp_servers tables must be stripped when agent has its own mcp_config, got:\n%s", got)
|
|
}
|
|
if strings.Contains(got, "OLD_KEY") {
|
|
t.Fatalf("user mcp_servers sub-tables must be stripped too, got:\n%s", got)
|
|
}
|
|
if !strings.Contains(got, "[other_section]") || !strings.Contains(got, "keep_me = true") {
|
|
t.Fatalf("unrelated tables must survive, got:\n%s", got)
|
|
}
|
|
if !strings.Contains(got, "[mcp_servers.new_server]") {
|
|
t.Fatalf("managed server should be written, got:\n%s", got)
|
|
}
|
|
}
|
|
|
|
func TestEnsureCodexMcpConfigIdempotent(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// Running ensure twice with the same input must produce byte-identical
|
|
// output — needed because Prepare and Reuse may both call into this on
|
|
// the same per-task config.toml across a task's lifetime.
|
|
tmp := filepath.Join(t.TempDir(), "config.toml")
|
|
raw := json.RawMessage(`{"mcpServers":{"fetch":{"command":"uvx","args":["a","b"]}}}`)
|
|
|
|
if err := ensureCodexMcpConfig(tmp, raw, slog.Default()); err != nil {
|
|
t.Fatalf("first ensure: %v", err)
|
|
}
|
|
first, _ := os.ReadFile(tmp)
|
|
|
|
if err := ensureCodexMcpConfig(tmp, raw, slog.Default()); err != nil {
|
|
t.Fatalf("second ensure: %v", err)
|
|
}
|
|
second, _ := os.ReadFile(tmp)
|
|
|
|
if string(first) != string(second) {
|
|
t.Fatalf("non-idempotent write:\nfirst:\n%s\nsecond:\n%s", first, second)
|
|
}
|
|
}
|
|
|
|
func TestEnsureCodexMcpConfigRejectsBadShapes(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
cases := []struct {
|
|
name string
|
|
raw string
|
|
}{
|
|
{"non-json", `not json`},
|
|
{"server is array", `{"mcpServers":{"x":[1,2]}}`},
|
|
{"server is string", `{"mcpServers":{"x":"oops"}}`},
|
|
{"null value inside server", `{"mcpServers":{"x":{"command":null}}}`},
|
|
{"bad server name", `{"mcpServers":{"has space":{"command":"a"}}}`},
|
|
}
|
|
for _, tc := range cases {
|
|
tc := tc
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
t.Parallel()
|
|
tmp := filepath.Join(t.TempDir(), "config.toml")
|
|
if err := ensureCodexMcpConfig(tmp, json.RawMessage(tc.raw), slog.Default()); err == nil {
|
|
t.Fatalf("expected error for %s, got nil", tc.name)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestEnsureCodexMcpConfigAbsentLeavesUserTablesAlone(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// nil / `null` map to the API's "absent" state: the agent has no
|
|
// managed mcp_config, so the daemon must not touch the user's
|
|
// inherited `[mcp_servers.*]` tables — the run falls back to the
|
|
// user's global CLI config.
|
|
for _, raw := range []json.RawMessage{nil, json.RawMessage(`null`)} {
|
|
tmp := filepath.Join(t.TempDir(), "config.toml")
|
|
initial := "sandbox_mode = \"workspace-write\"\n\n" +
|
|
"[mcp_servers.user_global]\ncommand = \"keep\"\n"
|
|
if err := os.WriteFile(tmp, []byte(initial), 0o600); err != nil {
|
|
t.Fatalf("seed: %v", err)
|
|
}
|
|
if err := ensureCodexMcpConfig(tmp, raw, slog.Default()); err != nil {
|
|
t.Fatalf("ensure (%q): %v", string(raw), err)
|
|
}
|
|
data, _ := os.ReadFile(tmp)
|
|
got := string(data)
|
|
if !strings.Contains(got, "[mcp_servers.user_global]") {
|
|
t.Fatalf("absent mcp_config (%q) must leave user MCP tables alone, got:\n%s", string(raw), got)
|
|
}
|
|
if strings.Contains(got, multicaCodexMcpBeginMarker) {
|
|
t.Fatalf("absent mcp_config (%q) must not write managed markers, got:\n%s", string(raw), got)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestEnsureCodexMcpConfigEmptyManagedSetStripsUserMcp(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// `{}` / `{"mcpServers":{}}` map to the API's "present, empty" state.
|
|
// The admin saved an explicit (empty) MCP list, so the daemon must
|
|
// strip inherited user `[mcp_servers.*]` tables and pin the managed
|
|
// markers — equivalent to Claude's --strict-mcp-config with an empty
|
|
// servers map. Falling back to the user's global MCP would defeat
|
|
// the affordance.
|
|
for _, raw := range []json.RawMessage{
|
|
json.RawMessage(`{}`),
|
|
json.RawMessage(`{"mcpServers":{}}`),
|
|
} {
|
|
tmp := filepath.Join(t.TempDir(), "config.toml")
|
|
initial := "sandbox_mode = \"workspace-write\"\n\n" +
|
|
"[mcp_servers.user_global]\ncommand = \"keep\"\n"
|
|
if err := os.WriteFile(tmp, []byte(initial), 0o600); err != nil {
|
|
t.Fatalf("seed: %v", err)
|
|
}
|
|
if err := ensureCodexMcpConfig(tmp, raw, slog.Default()); err != nil {
|
|
t.Fatalf("ensure (%q): %v", string(raw), err)
|
|
}
|
|
data, _ := os.ReadFile(tmp)
|
|
got := string(data)
|
|
if strings.Contains(got, "user_global") {
|
|
t.Fatalf("managed empty set (%q) must strip user MCP tables, got:\n%s", string(raw), got)
|
|
}
|
|
if !strings.Contains(got, multicaCodexMcpBeginMarker) || !strings.Contains(got, multicaCodexMcpEndMarker) {
|
|
t.Fatalf("managed empty set (%q) must still write markers so future runs find them, got:\n%s", string(raw), got)
|
|
}
|
|
if !strings.Contains(got, `sandbox_mode = "workspace-write"`) {
|
|
t.Fatalf("unrelated content must survive (%q), got:\n%s", string(raw), got)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestEnsureCodexMcpConfigEmptyManagedSetIdempotent(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// Running ensure twice with the same `{}` input must produce
|
|
// byte-identical output — guards against the empty-marker block
|
|
// accreting blank lines or duplicate markers across reruns.
|
|
tmp := filepath.Join(t.TempDir(), "config.toml")
|
|
if err := os.WriteFile(tmp, []byte("sandbox_mode = \"workspace-write\"\n"), 0o600); err != nil {
|
|
t.Fatalf("seed: %v", err)
|
|
}
|
|
raw := json.RawMessage(`{}`)
|
|
if err := ensureCodexMcpConfig(tmp, raw, slog.Default()); err != nil {
|
|
t.Fatalf("first ensure: %v", err)
|
|
}
|
|
first, _ := os.ReadFile(tmp)
|
|
if err := ensureCodexMcpConfig(tmp, raw, slog.Default()); err != nil {
|
|
t.Fatalf("second ensure: %v", err)
|
|
}
|
|
second, _ := os.ReadFile(tmp)
|
|
if string(first) != string(second) {
|
|
t.Fatalf("non-idempotent write:\nfirst:\n%s\nsecond:\n%s", first, second)
|
|
}
|
|
}
|
|
|
|
func TestHasManagedCodexMcpConfig(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
cases := []struct {
|
|
name string
|
|
raw json.RawMessage
|
|
want bool
|
|
}{
|
|
{"nil", nil, false},
|
|
{"empty bytes", json.RawMessage(""), false},
|
|
{"whitespace only", json.RawMessage(" \n\t"), false},
|
|
{"json null", json.RawMessage(`null`), false},
|
|
{"json null with whitespace", json.RawMessage(" null \n"), false},
|
|
{"empty object", json.RawMessage(`{}`), true},
|
|
{"empty mcp servers map", json.RawMessage(`{"mcpServers":{}}`), true},
|
|
{"populated", json.RawMessage(`{"mcpServers":{"x":{"command":"a"}}}`), true},
|
|
}
|
|
for _, tc := range cases {
|
|
tc := tc
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
t.Parallel()
|
|
if got := hasManagedCodexMcpConfig(tc.raw); got != tc.want {
|
|
t.Fatalf("hasManagedCodexMcpConfig(%q) = %v, want %v", string(tc.raw), got, tc.want)
|
|
}
|
|
})
|
|
}
|
|
}
|