Compare commits

...

1 Commits

Author SHA1 Message Date
Jiang Bohan
e2532bab3a test(agent): regression tests for codex subagent threadId filter
Follow-up to #1192. Document the v2 protocol contract that the
dispatch-level threadId guard relies on, and lock down the two leakage
paths the guard closes:

- turn/completed from a subagent thread must not call onTurnDone
- item/completed (agentMessage, final_answer) from a subagent thread
  must neither leak text into the output builder nor terminate the turn

Without these tests a future refactor that drops or relocates the guard
would not be caught by CI, since existing notification tests omit the
top-level threadId field and pass through unfiltered.
2026-04-17 14:47:01 +08:00
2 changed files with 63 additions and 0 deletions

View File

@@ -618,6 +618,12 @@ func (c *codexClient) handleRawNotification(method string, params map[string]any
// Ignore notifications from threads other than the one we are tracking.
// Codex multiplexes subagent threads (e.g. memory consolidation) on the
// same stdio pipe; only our thread should drive turn lifecycle and output.
//
// The v2 app-server-protocol schema guarantees a top-level threadId on
// every notification, so this dispatch-level guard transparently covers
// every handler below. If a future codex revision introduces notifications
// without threadId, they fall through (ok=false) — re-audit this guard
// when bumping codex.
if threadID, ok := params["threadId"].(string); ok && c.threadID != "" && threadID != c.threadID {
return
}

View File

@@ -501,6 +501,63 @@ func TestCodexRawThreadStatusIdle(t *testing.T) {
}
}
// Regression for #1181: subagent threads (e.g. memory consolidation)
// are multiplexed on the same stdio pipe. Their turn/completed must not
// terminate the main turn.
func TestCodexRawTurnCompletedFromSubagentIgnored(t *testing.T) {
t.Parallel()
c, _, _ := newTestCodexClient(t)
c.notificationProtocol = "raw"
c.threadID = "thr_main"
var doneCount int
c.onTurnDone = func(aborted bool) {
doneCount++
}
c.handleLine(`{"jsonrpc":"2.0","method":"turn/completed","params":{"threadId":"thr_subagent","turn":{"id":"turn-sub","status":"completed"}}}`)
if doneCount != 0 {
t.Fatalf("subagent turn/completed must not trigger onTurnDone, got %d calls", doneCount)
}
// Sanity check: a matching threadId still drives completion.
c.handleLine(`{"jsonrpc":"2.0","method":"turn/completed","params":{"threadId":"thr_main","turn":{"id":"turn-main","status":"completed"}}}`)
if doneCount != 1 {
t.Fatalf("matching threadId should trigger onTurnDone exactly once, got %d", doneCount)
}
}
// Regression for #1181: subagent agentMessage/final_answer must not
// trigger turn completion or leak text into the main output stream.
func TestCodexRawItemAgentMessageFinalAnswerFromSubagentIgnored(t *testing.T) {
t.Parallel()
c, _, _ := newTestCodexClient(t)
c.notificationProtocol = "raw"
c.threadID = "thr_main"
c.turnStarted = true
var messages []Message
var doneCount int
c.onMessage = func(msg Message) {
messages = append(messages, msg)
}
c.onTurnDone = func(aborted bool) {
doneCount++
}
c.handleLine(`{"jsonrpc":"2.0","method":"item/completed","params":{"threadId":"thr_subagent","item":{"type":"agentMessage","id":"sub-1","text":"subagent leakage","phase":"final_answer"}}}`)
if len(messages) != 0 {
t.Fatalf("subagent text must not leak into output builder, got %+v", messages)
}
if doneCount != 0 {
t.Fatalf("subagent final_answer must not trigger onTurnDone, got %d calls", doneCount)
}
}
func TestCodexCloseAllPending(t *testing.T) {
t.Parallel()